Esempio n. 1
0
    def _PreprocessSources(self, extraction_engine, session, storage_writer):
        """Preprocesses the sources.

    Args:
      extraction_engine (BaseEngine): extraction engine to preprocess
          the sources.
      session (Session): session in which the sources are processed.
      storage_writer (StorageWriter): storage writer.
    """
        logger.debug('Starting preprocessing.')

        try:
            artifacts_registry = engine.BaseEngine.BuildArtifactsRegistry(
                self._artifact_definitions_path, self._custom_artifacts_path)
            extraction_engine.PreprocessSources(
                artifacts_registry,
                self._source_path_specs,
                session,
                storage_writer,
                resolver_context=self._resolver_context)

        except IOError as exception:
            logger.error(
                'Unable to preprocess with error: {0!s}'.format(exception))

        logger.debug('Preprocessing done.')
Esempio n. 2
0
    def _PreprocessSources(self, extraction_engine):
        """Preprocesses the sources.

    Args:
      extraction_engine (BaseEngine): extraction engine to preprocess
          the sources.
    """
        logger.debug('Starting preprocessing.')

        session = sessions.Session()

        try:
            artifacts_registry = engine.BaseEngine.BuildArtifactsRegistry(
                self._artifact_definitions_path, self._custom_artifacts_path)
            # Setting storage writer to None here since we do not want to store
            # preprocessing information.
            extraction_engine.PreprocessSources(
                artifacts_registry,
                self._source_path_specs,
                session,
                None,
                resolver_context=self._resolver_context)

        except IOError as exception:
            logger.error(
                'Unable to preprocess with error: {0!s}'.format(exception))

        logger.debug('Preprocessing done.')
Esempio n. 3
0
  def _GetExpandedParserFilterExpression(self, knowledge_base):
    """Determines the expanded parser filter expression.

    Args:
      knowledge_base (KnowledgeBase): contains information from the source
          data needed for parsing.

    Returns:
      str: expanded parser filter expression.

    Raises:
      BadConfigOption: if presets in the parser filter expression could not
          be expanded or if an invalid parser or plugin name is specified.
    """
    parser_filter_expression = self._parser_filter_expression
    if not parser_filter_expression:
      operating_system_family = knowledge_base.GetValue('operating_system')
      operating_system_product = knowledge_base.GetValue(
          'operating_system_product')
      operating_system_version = knowledge_base.GetValue(
          'operating_system_version')

      operating_system_artifact = artifacts.OperatingSystemArtifact(
          family=operating_system_family, product=operating_system_product,
          version=operating_system_version)

      preset_definitions = self._presets_manager.GetPresetsByOperatingSystem(
          operating_system_artifact)
      if preset_definitions:
        self._parser_filter_expression = ','.join([
            preset_definition.name
            for preset_definition in preset_definitions])

        logger.debug('Parser filter expression set to preset: {0:s}'.format(
            self._parser_filter_expression))

    parser_filter_helper = parser_filter.ParserFilterExpressionHelper()

    try:
      parser_filter_expression = parser_filter_helper.ExpandPresets(
          self._presets_manager, self._parser_filter_expression)
      logger.debug('Parser filter expression set to: {0:s}'.format(
          parser_filter_expression or 'N/A'))
    except RuntimeError as exception:
      raise errors.BadConfigOption((
          'Unable to expand presets in parser filter expression with '
          'error: {0!s}').format(exception))

    parser_elements, invalid_parser_elements = (
        parsers_manager.ParsersManager.CheckFilterExpression(
            parser_filter_expression))

    if invalid_parser_elements:
      invalid_parser_names_string = ','.join(invalid_parser_elements)
      raise errors.BadConfigOption(
          'Unknown parser or plugin names in element(s): "{0:s}" of '
          'parser filter expression: {1:s}'.format(
              invalid_parser_names_string, parser_filter_expression))

    return ','.join(sorted(parser_elements))
Esempio n. 4
0
  def _Preprocess(self, file_system, mount_point):
    """Preprocesses the image.

    Args:
      file_system (dfvfs.FileSystem): file system to be preprocessed.
      mount_point (dfvfs.PathSpec): mount point path specification that refers
          to the base location of the file system.
    """
    logger.debug('Starting preprocessing.')

    try:
      preprocess_manager.PreprocessPluginsManager.RunPlugins(
          self._artifacts_registry, file_system, mount_point,
          self._knowledge_base)

    except IOError as exception:
      logger.error('Unable to preprocess with error: {0!s}'.format(exception))

    logger.debug('Preprocessing done.')
Esempio n. 5
0
  def _Preprocess(self, file_system, mount_point):
    """Preprocesses the image.

    Args:
      file_system (dfvfs.FileSystem): file system to be preprocessed.
      mount_point (dfvfs.PathSpec): mount point path specification that refers
          to the base location of the file system.
    """
    logger.debug('Starting preprocessing.')

    try:
      preprocess_manager.PreprocessPluginsManager.RunPlugins(
          self._artifacts_registry, file_system, mount_point,
          self._knowledge_base)

    except IOError as exception:
      logger.error('Unable to preprocess with error: {0!s}'.format(exception))

    logger.debug('Preprocessing done.')
Esempio n. 6
0
  def _PreprocessSources(self, extraction_engine):
    """Preprocesses the sources.

    Args:
      extraction_engine (BaseEngine): extraction engine to preprocess
          the sources.
    """
    logger.debug('Starting preprocessing.')

    try:
      artifacts_registry = engine.BaseEngine.BuildArtifactsRegistry(
          self._artifact_definitions_path, self._custom_artifacts_path)
      extraction_engine.PreprocessSources(
          artifacts_registry, self._source_path_specs,
          resolver_context=self._resolver_context)

    except IOError as exception:
      logger.error('Unable to preprocess with error: {0!s}'.format(exception))

    logger.debug('Preprocessing done.')
Esempio n. 7
0
    def _PreprocessSources(self, extraction_engine):
        """Preprocesses the sources.

    Args:
      extraction_engine (BaseEngine): extraction engine to preprocess
          the sources.
    """
        logger.debug('Starting preprocessing.')

        try:
            extraction_engine.PreprocessSources(
                self._artifacts_registry,
                self._source_path_specs,
                resolver_context=self._resolver_context)

        except IOError as exception:
            logger.error(
                'Unable to preprocess with error: {0!s}'.format(exception))

        logger.debug('Preprocessing done.')
Esempio n. 8
0
  def ExtractEventsFromSources(self):
    """Processes the sources and extracts events.

    Raises:
      BadConfigOption: if the storage file path is invalid or the storage
          format not supported.
      SourceScannerError: if the source scanner could not find a supported
          file system.
      UserAbort: if the user initiated an abort.
    """
    self._CheckStorageFile(self._storage_file_path, warn_about_existing=True)

    scan_context = self.ScanSource(self._source_path)
    self._source_type = scan_context.source_type

    self._status_view.SetMode(self._status_view_mode)
    self._status_view.SetSourceInformation(
        self._source_path, self._source_type, filter_file=self._filter_file)

    status_update_callback = (
        self._status_view.GetExtractionStatusUpdateCallback())

    self._output_writer.Write('\n')
    self._status_view.PrintExtractionStatusHeader(None)
    self._output_writer.Write('Processing started.\n')

    session = engine.BaseEngine.CreateSession(
        command_line_arguments=self._command_line_arguments,
        debug_mode=self._debug_mode,
        filter_file=self._filter_file,
        preferred_encoding=self.preferred_encoding,
        preferred_time_zone=self._preferred_time_zone,
        preferred_year=self._preferred_year)

    storage_writer = storage_factory.StorageFactory.CreateStorageWriter(
        self._storage_format, session, self._storage_file_path)
    if not storage_writer:
      raise errors.BadConfigOption(
          'Unsupported storage format: {0:s}'.format(self._storage_format))

    single_process_mode = self._single_process_mode
    if self._source_type == dfvfs_definitions.SOURCE_TYPE_FILE:
      # No need to multi process a single file source.
      single_process_mode = True

    if single_process_mode:
      extraction_engine = single_process_engine.SingleProcessEngine()
    else:
      extraction_engine = multi_process_engine.TaskMultiProcessEngine(
          use_zeromq=self._use_zeromq)

    # If the source is a directory or a storage media image
    # run pre-processing.
    if self._source_type in self._SOURCE_TYPES_TO_PREPROCESS:
      self._PreprocessSources(extraction_engine)

    configuration = self._CreateProcessingConfiguration(
        extraction_engine.knowledge_base)

    self._SetExtractionParsersAndPlugins(configuration, session)
    self._SetExtractionPreferredTimeZone(extraction_engine.knowledge_base)

    filter_find_specs = None
    if configuration.filter_file:
      environment_variables = (
          extraction_engine.knowledge_base.GetEnvironmentVariables())
      filter_file_object = filter_file.FilterFile(configuration.filter_file)
      filter_find_specs = filter_file_object.BuildFindSpecs(
          environment_variables=environment_variables)

    processing_status = None
    if single_process_mode:
      logger.debug('Starting extraction in single process mode.')

      processing_status = extraction_engine.ProcessSources(
          self._source_path_specs, storage_writer, self._resolver_context,
          configuration, filter_find_specs=filter_find_specs,
          status_update_callback=status_update_callback)

    else:
      logger.debug('Starting extraction in multi process mode.')

      processing_status = extraction_engine.ProcessSources(
          session.identifier, self._source_path_specs, storage_writer,
          configuration, enable_sigsegv_handler=self._enable_sigsegv_handler,
          filter_find_specs=filter_find_specs,
          number_of_worker_processes=self._number_of_extraction_workers,
          status_update_callback=status_update_callback,
          worker_memory_limit=self._worker_memory_limit)

    self._status_view.PrintExtractionSummary(processing_status)
Esempio n. 9
0
    def ExtractEventsFromSources(self):
        """Processes the sources and extracts events.

    Raises:
      BadConfigOption: if the storage file path is invalid or the storage
          format not supported or an invalid collection filter was specified.
      SourceScannerError: if the source scanner could not find a supported
          file system.
      UserAbort: if the user initiated an abort.
    """
        self._CheckStorageFile(self._storage_file_path,
                               warn_about_existing=True)

        scan_context = self.ScanSource(self._source_path)
        self._source_type = scan_context.source_type

        is_archive = False
        if self._source_type == dfvfs_definitions.SOURCE_TYPE_FILE:
            is_archive = self._IsArchiveFile(self._source_path_specs[0])
            if is_archive:
                self._source_type = definitions.SOURCE_TYPE_ARCHIVE

        self._status_view.SetMode(self._status_view_mode)
        self._status_view.SetSourceInformation(
            self._source_path,
            self._source_type,
            artifact_filters=self._artifact_filters,
            filter_file=self._filter_file)

        status_update_callback = (
            self._status_view.GetExtractionStatusUpdateCallback())

        self._output_writer.Write('\n')
        self._status_view.PrintExtractionStatusHeader(None)
        self._output_writer.Write('Processing started.\n')

        session = engine.BaseEngine.CreateSession(
            artifact_filter_names=self._artifact_filters,
            command_line_arguments=self._command_line_arguments,
            debug_mode=self._debug_mode,
            filter_file_path=self._filter_file,
            preferred_encoding=self.preferred_encoding,
            preferred_time_zone=self._preferred_time_zone,
            preferred_year=self._preferred_year,
            text_prepend=self._text_prepend)

        storage_writer = storage_factory.StorageFactory.CreateStorageWriter(
            self._storage_format, session, self._storage_file_path)
        if not storage_writer:
            raise errors.BadConfigOption(
                'Unsupported storage format: {0:s}'.format(
                    self._storage_format))

        single_process_mode = self._single_process_mode
        if self._source_type == dfvfs_definitions.SOURCE_TYPE_FILE:
            if not self._process_archives or not is_archive:
                single_process_mode = True

        if single_process_mode:
            extraction_engine = single_process_engine.SingleProcessEngine()
        else:
            extraction_engine = multi_process_engine.TaskMultiProcessEngine(
                number_of_worker_processes=self._number_of_extraction_workers,
                worker_memory_limit=self._worker_memory_limit,
                worker_timeout=self._worker_timeout)

        # If the source is a storage media image or device, or directory
        # run pre-processing.
        if self._source_type in self._SOURCE_TYPES_TO_PREPROCESS:
            self._PreprocessSources(extraction_engine)

        configuration = self._CreateProcessingConfiguration(
            extraction_engine.knowledge_base)

        session.enabled_parser_names = (
            configuration.parser_filter_expression.split(','))
        session.parser_filter_expression = self._parser_filter_expression

        self._SetExtractionPreferredTimeZone(extraction_engine.knowledge_base)

        # TODO: set mount path in knowledge base with
        # extraction_engine.knowledge_base.SetMountPath()
        extraction_engine.knowledge_base.SetTextPrepend(self._text_prepend)

        try:
            extraction_engine.BuildCollectionFilters(
                self._artifact_definitions_path, self._custom_artifacts_path,
                extraction_engine.knowledge_base, self._artifact_filters,
                self._filter_file)
        except errors.InvalidFilter as exception:
            raise errors.BadConfigOption(
                'Unable to build collection filters with error: {0!s}'.format(
                    exception))

        processing_status = None
        if single_process_mode:
            logger.debug('Starting extraction in single process mode.')

            processing_status = extraction_engine.ProcessSources(
                session,
                self._source_path_specs,
                storage_writer,
                self._resolver_context,
                configuration,
                status_update_callback=status_update_callback)

        else:
            logger.debug('Starting extraction in multi process mode.')

            # The following overrides are needed because pylint 2.6.0 gets confused
            # about which ProcessSources to check against.
            # pylint: disable=no-value-for-parameter,unexpected-keyword-arg
            processing_status = extraction_engine.ProcessSources(
                session,
                self._source_path_specs,
                storage_writer,
                configuration,
                enable_sigsegv_handler=self._enable_sigsegv_handler,
                status_update_callback=status_update_callback)

        self._status_view.PrintExtractionSummary(processing_status)
Esempio n. 10
0
    def ExtractEventsFromSources(self):
        """Processes the sources and extract events.

    This is a stripped down copy of tools/log2timeline.py that doesn't support
    the full set of flags. The defaults for these are hard coded in the
    constructor of this class.

    Raises:
      BadConfigOption: if the storage file path is invalid or the storage
          format not supported or an invalid collection filter was specified.
      SourceScannerError: if the source scanner could not find a supported
          file system.
      UserAbort: if the user initiated an abort.
    """
        self._CheckStorageFile(self._storage_file_path,
                               warn_about_existing=True)

        scan_context = self.ScanSource(self._source_path)
        source_type = scan_context.source_type

        self._status_view.SetMode(self._status_view_mode)
        self._status_view.SetSourceInformation(
            self._source_path,
            source_type,
            artifact_filters=self._artifact_filters,
            filter_file=self._filter_file)

        status_update_callback = (
            self._status_view.GetExtractionStatusUpdateCallback())

        self._output_writer.Write('\n')
        self._status_view.PrintExtractionStatusHeader(None)
        self._output_writer.Write('Processing started.\n')

        session = engine.BaseEngine.CreateSession(
            artifact_filter_names=self._artifact_filters,
            command_line_arguments=self._command_line_arguments,
            filter_file_path=self._filter_file,
            preferred_encoding=self.preferred_encoding,
            preferred_time_zone=self._preferred_time_zone,
            preferred_year=self._preferred_year)

        storage_writer = storage_factory.StorageFactory.CreateStorageWriter(
            self._storage_format, session, self._storage_file_path)
        if not storage_writer:
            raise errors.BadConfigOption(
                'Unsupported storage format: {0:s}'.format(
                    self._storage_format))

        single_process_mode = self._single_process_mode
        if source_type == dfvfs_definitions.SOURCE_TYPE_FILE:
            # No need to multi process a single file source.
            single_process_mode = True

        if single_process_mode:
            extraction_engine = single_process_engine.SingleProcessEngine()
        else:
            extraction_engine = multi_process_engine.TaskMultiProcessEngine()

        # If the source is a directory or a storage media image
        # run pre-processing.
        if source_type in self._SOURCE_TYPES_TO_PREPROCESS:
            self._PreprocessSources(extraction_engine)

        configuration = self._CreateProcessingConfiguration(
            extraction_engine.knowledge_base)

        self._SetExtractionParsersAndPlugins(configuration, session)
        self._SetExtractionPreferredTimeZone(extraction_engine.knowledge_base)

        try:
            extraction_engine.BuildCollectionFilters(
                self._artifact_definitions_path, self._custom_artifacts_path,
                extraction_engine.knowledge_base, self._artifact_filters,
                self._filter_file)
        except errors.InvalidFilter as exception:
            raise errors.BadConfigOption(
                'Unable to build collection filters with error: {0!s}'.format(
                    exception))

        processing_status = None
        if single_process_mode:
            logger.debug('Starting extraction in single process mode.')

            processing_status = extraction_engine.ProcessSources(
                self._source_path_specs,
                storage_writer,
                self._resolver_context,
                configuration,
                status_update_callback=status_update_callback)

        else:
            logger.debug('Starting extraction in multi process mode.')

            processing_status = extraction_engine.ProcessSources(
                session.identifier,
                self._source_path_specs,
                storage_writer,
                configuration,
                enable_sigsegv_handler=self._enable_sigsegv_handler,
                number_of_worker_processes=self._number_of_extraction_workers,
                status_update_callback=status_update_callback)

        self._status_view.PrintExtractionSummary(processing_status)
Esempio n. 11
0
    def _PromptUserForEncryptedVolumeCredential(self, scan_context,
                                                locked_scan_node, credentials):
        """Prompts the user to provide a credential for an encrypted volume.

    Args:
      scan_context (dfvfs.SourceScannerContext): source scanner context.
      locked_scan_node (dfvfs.SourceScanNode): locked scan node.
      credentials (dfvfs.Credentials): credentials supported by the locked
          scan node.

    Returns:
      bool: True if the volume was unlocked.
    """
        # TODO: print volume description.
        if locked_scan_node.type_indicator == dfvfs_definitions.TYPE_INDICATOR_BDE:
            self._output_writer.Write('Found a BitLocker encrypted volume.\n')
        else:
            self._output_writer.Write('Found an encrypted volume.\n')

        credentials_list = list(credentials.CREDENTIALS)
        credentials_list.append('skip')

        self._output_writer.Write('Supported credentials:\n')
        self._output_writer.Write('\n')
        for index, name in enumerate(credentials_list):
            self._output_writer.Write('  {0:d}. {1:s}\n'.format(index, name))
        self._output_writer.Write('\nNote that you can abort with Ctrl^C.\n\n')

        result = False
        while not result:
            self._output_writer.Write(
                'Select a credential to unlock the volume: ')
            # TODO: add an input reader.
            input_line = self._input_reader.Read()
            input_line = input_line.strip()

            if input_line in credentials_list:
                credential_type = input_line
            else:
                try:
                    credential_type = int(input_line, 10)
                    credential_type = credentials_list[credential_type]
                except (IndexError, ValueError):
                    self._output_writer.Write(
                        'Unsupported credential: {0:s}\n'.format(input_line))
                    continue

            if credential_type == 'skip':
                break

            getpass_string = 'Enter credential data: '
            if sys.platform.startswith('win') and sys.version_info[0] < 3:
                # For Python 2 on Windows getpass (win_getpass) requires an encoded
                # byte string. For Python 3 we need it to be a Unicode string.
                getpass_string = self._EncodeString(getpass_string)

            credential_data = getpass.getpass(getpass_string)
            self._output_writer.Write('\n')

            if credential_type in self._BINARY_DATA_CREDENTIAL_TYPES:
                try:
                    credential_data = credential_data.decode('hex')
                except TypeError:
                    self._output_writer.Write('Unsupported credential data.\n')
                    continue

            try:
                result = self._source_scanner.Unlock(
                    scan_context, locked_scan_node.path_spec, credential_type,
                    credential_data)

            except IOError as exception:
                logger.debug(
                    'Unable to unlock volume with error: {0!s}'.format(
                        exception))
                result = False

            if not result:
                self._output_writer.Write('Unable to unlock volume.\n')
                self._output_writer.Write('\n')

        self._output_writer.Write('\n')

        if result:
            self._AddCredentialConfiguration(locked_scan_node.path_spec,
                                             credential_type, credential_data)

        return result
Esempio n. 12
0
    def _CreateProcessingConfiguration(self, knowledge_base):
        """Creates a processing configuration.

    Args:
      knowledge_base (KnowledgeBase): contains information from the source
          data needed for parsing.

    Returns:
      ProcessingConfiguration: processing configuration.

    Raises:
      BadConfigOption: if presets in the parser filter expression could not
          be expanded or if an invalid parser or plugin name is specified.
    """
        parser_filter_expression = self._parser_filter_expression
        if not parser_filter_expression and not self._single_process_mode:
            operating_system_family = knowledge_base.GetValue(
                'operating_system')
            operating_system_product = knowledge_base.GetValue(
                'operating_system_product')
            operating_system_version = knowledge_base.GetValue(
                'operating_system_version')

            operating_system_artifact = artifacts.OperatingSystemArtifact(
                family=operating_system_family,
                product=operating_system_product,
                version=operating_system_version)

            preset_definitions = self._presets_manager.GetPresetsByOperatingSystem(
                operating_system_artifact)
            if preset_definitions:
                self._parser_filter_expression = ','.join([
                    preset_definition.name
                    for preset_definition in preset_definitions
                ])

                logger.debug(
                    'Parser filter expression set to preset: {0:s}'.format(
                        self._parser_filter_expression))

        parser_filter_helper = parser_filter.ParserFilterExpressionHelper()

        try:
            parser_filter_expression = parser_filter_helper.ExpandPresets(
                self._presets_manager, self._parser_filter_expression)
            logger.debug('Parser filter expression set to: {0:s}'.format(
                parser_filter_expression or 'N/A'))
        except RuntimeError as exception:
            raise errors.BadConfigOption(
                ('Unable to expand presets in parser filter expression with '
                 'error: {0!s}').format(exception))

        parser_elements, invalid_parser_elements = (
            parsers_manager.ParsersManager.CheckFilterExpression(
                parser_filter_expression))

        if invalid_parser_elements:
            invalid_parser_names_string = ','.join(invalid_parser_elements)
            raise errors.BadConfigOption(
                'Unknown parser or plugin names in element(s): "{0:s}" of '
                'parser filter expression: {1:s}'.format(
                    invalid_parser_names_string, parser_filter_expression))

        if not parser_filter_expression:
            parser_filter_expression = ','.join(sorted(parser_elements))

        self._expanded_parser_filter_expression = parser_filter_expression

        # TODO: pass preferred_encoding.
        configuration = configurations.ProcessingConfiguration()
        configuration.artifact_filters = self._artifact_filters
        configuration.credentials = self._credential_configurations
        configuration.debug_output = self._debug_mode
        configuration.extraction.hasher_file_size_limit = (
            self._hasher_file_size_limit)
        configuration.extraction.hasher_names_string = self._hasher_names_string
        configuration.extraction.process_archives = self._process_archives
        configuration.extraction.process_compressed_streams = (
            self._process_compressed_streams)
        configuration.extraction.yara_rules_string = self._yara_rules_string
        configuration.filter_file = self._filter_file
        configuration.log_filename = self._log_file
        configuration.parser_filter_expression = (
            self._expanded_parser_filter_expression)
        configuration.preferred_year = self._preferred_year
        configuration.profiling.directory = self._profiling_directory
        configuration.profiling.sample_rate = self._profiling_sample_rate
        configuration.profiling.profilers = self._profilers
        configuration.task_storage_format = self._task_storage_format
        configuration.temporary_directory = self._temporary_directory

        return configuration
Esempio n. 13
0
  def ExtractEventsFromSources(self):
    """Processes the sources and extract events.

    This is a stripped down copy of tools/log2timeline.py that doesn't support
    the full set of flags. The defaults for these are hard coded in the
    constructor of this class.

    Raises:
      BadConfigOption: if the storage format is not supported.
      SourceScannerError: if the source scanner could not find a supported
          file system.
      UserAbort: if the user initiated an abort.
    """
    self._CheckStorageFile(self._storage_file_path, warn_about_existing=True)

    scan_context = self.ScanSource(self._source_path)
    source_type = scan_context.source_type

    self._status_view.SetMode(self._status_view_mode)
    self._status_view.SetSourceInformation(
        self._source_path, source_type,
        artifact_filters=self._artifact_filters,
        filter_file=self._filter_file)

    status_update_callback = (
        self._status_view.GetExtractionStatusUpdateCallback())

    self._output_writer.Write('\n')
    self._status_view.PrintExtractionStatusHeader(None)
    self._output_writer.Write('Processing started.\n')

    session = engine.BaseEngine.CreateSession(
        artifact_filter_names=self._artifact_filters,
        command_line_arguments=self._command_line_arguments,
        filter_file_path=self._filter_file,
        preferred_encoding=self.preferred_encoding,
        preferred_time_zone=self._preferred_time_zone,
        preferred_year=self._preferred_year)

    storage_writer = storage_factory.StorageFactory.CreateStorageWriter(
        self._storage_format, session, self._storage_file_path)
    if not storage_writer:
      raise errors.BadConfigOption(
          'Unsupported storage format: {0:s}'.format(self._storage_format))

    single_process_mode = self._single_process_mode
    if source_type == dfvfs_definitions.SOURCE_TYPE_FILE:
      # No need to multi process a single file source.
      single_process_mode = True

    if single_process_mode:
      extraction_engine = single_process_engine.SingleProcessEngine()
    else:
      extraction_engine = multi_process_engine.TaskMultiProcessEngine(
          use_zeromq=self._use_zeromq)

    # If the source is a directory or a storage media image
    # run pre-processing.
    if source_type in self._SOURCE_TYPES_TO_PREPROCESS:
      self._PreprocessSources(extraction_engine)

    configuration = self._CreateProcessingConfiguration(
        extraction_engine.knowledge_base)

    self._SetExtractionParsersAndPlugins(configuration, session)
    self._SetExtractionPreferredTimeZone(extraction_engine.knowledge_base)

    filter_find_specs = engine.BaseEngine.BuildFilterFindSpecs(
        self._artifact_definitions_path, self._custom_artifacts_path,
        extraction_engine.knowledge_base, self._artifact_filters,
        self._filter_file)

    processing_status = None
    if single_process_mode:
      logger.debug('Starting extraction in single process mode.')

      processing_status = extraction_engine.ProcessSources(
          self._source_path_specs, storage_writer, self._resolver_context,
          configuration, filter_find_specs=filter_find_specs,
          status_update_callback=status_update_callback)

    else:
      logger.debug('Starting extraction in multi process mode.')

      processing_status = extraction_engine.ProcessSources(
          session.identifier, self._source_path_specs, storage_writer,
          configuration,
          enable_sigsegv_handler=self._enable_sigsegv_handler,
          filter_find_specs=filter_find_specs,
          number_of_worker_processes=self._number_of_extraction_workers,
          status_update_callback=status_update_callback)

    self._status_view.PrintExtractionSummary(processing_status)
Esempio n. 14
0
    def _ProcessSources(self, session, storage_writer):
        """Processes the sources and extract events.

    Args:
      session (Session): session in which the sources are processed.
      storage_writer (StorageWriter): storage writer for a session storage.

    Returns:
      ProcessingStatus: processing status.

    Raises:
      BadConfigOption: if an invalid collection filter was specified.
    """
        is_archive = False
        if self._source_type == dfvfs_definitions.SOURCE_TYPE_FILE:
            is_archive = self._IsArchiveFile(self._source_path_specs[0])
            if is_archive:
                self._source_type = definitions.SOURCE_TYPE_ARCHIVE

        single_process_mode = self._single_process_mode
        if self._source_type == dfvfs_definitions.SOURCE_TYPE_FILE:
            if not self._process_archives or not is_archive:
                single_process_mode = True

        if single_process_mode:
            extraction_engine = single_extraction_engine.SingleProcessEngine()
        else:
            extraction_engine = multi_extraction_engine.ExtractionMultiProcessEngine(
                number_of_worker_processes=self._number_of_extraction_workers,
                worker_memory_limit=self._worker_memory_limit,
                worker_timeout=self._worker_timeout)

        # If the source is a directory or a storage media image
        # run pre-processing.
        if self._source_type in self._SOURCE_TYPES_TO_PREPROCESS:
            self._PreprocessSources(extraction_engine, session, storage_writer)

        configuration = self._CreateProcessingConfiguration(
            extraction_engine.knowledge_base)

        session.enabled_parser_names = (
            configuration.parser_filter_expression.split(','))
        session.parser_filter_expression = self._parser_filter_expression

        self._SetExtractionPreferredTimeZone(extraction_engine.knowledge_base)

        # TODO: set mount path in knowledge base with
        # extraction_engine.knowledge_base.SetMountPath()
        extraction_engine.knowledge_base.SetTextPrepend(self._text_prepend)

        try:
            extraction_engine.BuildCollectionFilters(
                self._artifact_definitions_path, self._custom_artifacts_path,
                extraction_engine.knowledge_base, self._artifact_filters,
                self._filter_file)
        except errors.InvalidFilter as exception:
            raise errors.BadConfigOption(
                'Unable to build collection filters with error: {0!s}'.format(
                    exception))

        # TODO: decouple session and storage writer?
        session.source_configurations = (
            extraction_engine.knowledge_base.GetSourceConfigurationArtifacts())

        storage_writer.WriteSessionConfiguration(session)

        status_update_callback = (
            self._status_view.GetExtractionStatusUpdateCallback())

        if single_process_mode:
            force_parser = False
            number_of_parsers = len(
                configuration.parser_filter_expression.split(','))
            if (self._source_type == dfvfs_definitions.SOURCE_TYPE_FILE
                    and not is_archive and number_of_parsers == 1):
                force_parser = True

            logger.debug('Starting extraction in single process mode.')

            processing_status = extraction_engine.ProcessSources(
                session,
                self._source_path_specs,
                storage_writer,
                self._resolver_context,
                configuration,
                force_parser=force_parser,
                status_update_callback=status_update_callback)

        else:
            logger.debug('Starting extraction in multi process mode.')

            # The following overrides are needed because pylint 2.6.0 gets confused
            # about which ProcessSources to check against.
            # pylint: disable=no-value-for-parameter,unexpected-keyword-arg
            processing_status = extraction_engine.ProcessSources(
                session,
                self._source_path_specs,
                storage_writer,
                configuration,
                enable_sigsegv_handler=self._enable_sigsegv_handler,
                status_update_callback=status_update_callback,
                storage_file_path=self._storage_file_path)

        return processing_status
Esempio n. 15
0
    def _ProcessSources(self, session, storage_writer):
        """Processes the sources and extract events.

    Args:
      session (Session): session in which the sources are processed.
      storage_writer (StorageWriter): storage writer for a session storage.

    Returns:
      ProcessingStatus: processing status.

    Raises:
      BadConfigOption: if an invalid collection filter was specified.
    """
        is_archive = False
        if self._source_type == dfvfs_definitions.SOURCE_TYPE_FILE:
            is_archive = self._IsArchiveFile(self._source_path_specs[0])
            if is_archive:
                self._source_type = definitions.SOURCE_TYPE_ARCHIVE

        single_process_mode = self._single_process_mode
        if self._source_type == dfvfs_definitions.SOURCE_TYPE_FILE:
            if not self._process_archives or not is_archive:
                single_process_mode = True

        if single_process_mode:
            extraction_engine = single_extraction_engine.SingleProcessEngine()
        else:
            extraction_engine = multi_extraction_engine.ExtractionMultiProcessEngine(
                number_of_worker_processes=self._number_of_extraction_workers,
                worker_memory_limit=self._worker_memory_limit,
                worker_timeout=self._worker_timeout)

        # If the source is a directory or a storage media image
        # run pre-processing.
        if self._source_type in self._SOURCE_TYPES_TO_PREPROCESS:
            self._PreprocessSources(extraction_engine, session, storage_writer)

        self._expanded_parser_filter_expression = (
            self._GetExpandedParserFilterExpression(
                extraction_engine.knowledge_base))

        enabled_parser_names = self._expanded_parser_filter_expression.split(
            ',')

        number_of_enabled_parsers = len(enabled_parser_names)

        force_parser = False
        if (self._source_type == dfvfs_definitions.SOURCE_TYPE_FILE
                and not is_archive and number_of_enabled_parsers == 1):
            force_parser = True

            self._extract_winevt_resources = False

        elif ('winevt' not in enabled_parser_names
              and 'winevtx' not in enabled_parser_names):
            self._extract_winevt_resources = False

        elif (self._extract_winevt_resources
              and 'pe' not in enabled_parser_names):
            logger.warning(
                'A Windows EventLog parser is enabled in combination with '
                'extraction of Windows EventLog resources, but the Portable '
                'Executable (PE) parser is disabled. Therefore Windows EventLog '
                'resources cannot be extracted.')

            self._extract_winevt_resources = False

        configuration = self._CreateExtractionProcessingConfiguration()

        try:
            extraction_engine.BuildCollectionFilters(
                self._artifact_definitions_path, self._custom_artifacts_path,
                extraction_engine.knowledge_base, self._artifact_filters,
                self._filter_file)
        except errors.InvalidFilter as exception:
            raise errors.BadConfigOption(
                'Unable to build collection filters with error: {0!s}'.format(
                    exception))

        session_configuration = self._CreateExtractionSessionConfiguration(
            session, enabled_parser_names)

        storage_writer.AddAttributeContainer(session_configuration)

        source_configurations = []
        for path_spec in self._source_path_specs:
            source_configuration = artifacts.SourceConfigurationArtifact(
                path_spec=path_spec)
            source_configurations.append(source_configuration)

        # TODO: improve to detect more than 1 system configurations.
        # TODO: improve to add volumes to system configuration.
        system_configuration = (
            extraction_engine.knowledge_base.GetSystemConfigurationArtifact())
        storage_writer.AddAttributeContainer(system_configuration)

        status_update_callback = (
            self._status_view.GetExtractionStatusUpdateCallback())

        if single_process_mode:
            logger.debug('Starting extraction in single process mode.')

            processing_status = extraction_engine.ProcessSources(
                source_configurations,
                storage_writer,
                self._resolver_context,
                configuration,
                force_parser=force_parser,
                status_update_callback=status_update_callback)

        else:
            logger.debug('Starting extraction in multi process mode.')

            # The following overrides are needed because pylint 2.6.0 gets confused
            # about which ProcessSources to check against.
            # pylint: disable=no-value-for-parameter,unexpected-keyword-arg
            processing_status = extraction_engine.ProcessSources(
                source_configurations,
                storage_writer,
                session.identifier,
                configuration,
                enable_sigsegv_handler=self._enable_sigsegv_handler,
                status_update_callback=status_update_callback,
                storage_file_path=self._storage_file_path)

        return processing_status