Exemple #1
0
    def ParseOptions(cls, options, configuration_object):
        """Parses and validates options.

    Args:
      options (argparse.Namespace): parser options.
      configuration_object (CLITool): object to be configured by the argument
          helper.

    Raises:
      BadConfigObject: when the configuration object is of the wrong type.
      BadConfigOption: when the location of the data files cannot be determined.
    """
        if not isinstance(configuration_object, tools.CLITool):
            raise errors.BadConfigObject(
                'Configuration object is not an instance of CLITool')

        data_location = cls._ParseStringOption(options, 'data_location')
        if not data_location:
            # Determine the source root path, which is 3 directories up from
            # the location of the script.
            data_location = os.path.dirname(cls._PATH)
            data_location = os.path.dirname(data_location)
            data_location = os.path.dirname(data_location)
            data_location = os.path.dirname(data_location)

            # There are multiple options to run a tool e.g. running from source or
            # from an egg file.
            data_location_egg = os.path.join(data_location, 'share', 'plaso')
            data_location_source = os.path.join(data_location, 'data')

            data_location = None
            if os.path.exists(data_location_egg) and os.path.isfile(
                    os.path.join(data_location_egg, 'plaso-data.README')):
                data_location = data_location_egg
            elif os.path.exists(data_location_source) and os.path.isfile(
                    os.path.join(data_location_source, 'plaso-data.README')):
                data_location = data_location_source

            if not data_location or not os.path.exists(data_location):
                data_location = os.path.join(sys.prefix, 'share', 'plaso')
            if not os.path.exists(data_location):
                data_location = os.path.join(sys.prefix, 'local', 'share',
                                             'plaso')

            if sys.prefix != '/usr':
                if not os.path.exists(data_location):
                    data_location = os.path.join('/usr', 'share', 'plaso')
                if not os.path.exists(data_location):
                    data_location = os.path.join('/usr', 'local', 'share',
                                                 'plaso')

            if not os.path.exists(data_location) or not os.path.isfile(
                    os.path.join(data_location, 'plaso-data.README')):
                data_location = None

        if not data_location:
            raise errors.BadConfigOption(
                'Unable to determine location of data files.')

        logger.info('Determined data location: {0:s}'.format(data_location))

        setattr(configuration_object, '_data_location', data_location)
Exemple #2
0
    def ParseOptions(self, options):
        """Parses the options and initializes the front-end.

    Args:
      options: the command line arguments (instance of argparse.Namespace).
      source_option: optional name of the source option. The default is source.

    Raises:
      BadConfigOption: if the options are invalid.
    """
        # The data location is required to list signatures.
        self._ParseDataLocationOption(options)

        # Check the list options first otherwise required options will raise.
        signature_identifiers = getattr(options, u'signature_identifiers',
                                        None)
        if signature_identifiers == u'list':
            self.list_signature_identifiers = True

        if self.list_signature_identifiers:
            return

        super(ImageExportTool, self).ParseOptions(options)

        format_string = u'%(asctime)s [%(levelname)s] %(message)s'

        if self._debug_mode:
            log_level = logging.DEBUG
        else:
            log_level = logging.INFO

        log_file = getattr(options, u'log_file', None)
        self._ConfigureLogging(filename=log_file,
                               format_string=format_string,
                               log_level=log_level)

        self._destination_path = getattr(options, u'path', u'export')

        self._ParseFilterOptions(options)

        if (getattr(options, u'no_vss', False)
                or getattr(options, u'include_duplicates', False)):
            self._remove_duplicates = False

        date_filters = getattr(options, u'date_filters', None)
        try:
            self._front_end.ParseDateFilters(date_filters)
        except ValueError as exception:
            raise errors.BadConfigOption(exception)

        extensions_string = getattr(options, u'extensions_string', None)
        self._front_end.ParseExtensionsString(extensions_string)

        names_string = getattr(options, u'names_string', None)
        self._front_end.ParseNamesString(names_string)

        if not self._data_location:
            logging.warning(
                u'Unable to automatically determine data location.')

        signature_identifiers = getattr(options, u'signature_identifiers',
                                        None)
        try:
            self._front_end.ParseSignatureIdentifiers(self._data_location,
                                                      signature_identifiers)
        except (IOError, ValueError) as exception:
            raise errors.BadConfigOption(exception)

        if self._filter_file:
            self.has_filters = True
        else:
            self.has_filters = self._front_end.HasFilters()
Exemple #3
0
    def ParseOptions(self, options):
        """Parses the options.

    Args:
      options: the command line arguments (instance of argparse.Namespace).

    Raises:
      BadConfigOption: if the options are invalid.
    """
        # Check the list options first otherwise required options will raise.
        self._ParseExtractionOptions(options)
        self._front_end.SetUseOldPreprocess(self._old_preprocess)
        self._ParseTimezoneOption(options)

        self.show_info = getattr(options, u'show_info', False)

        if getattr(options, u'use_markdown', False):
            self._views_format_type = cli_views.ViewsFactory.FORMAT_TYPE_MARKDOWN

        if (self.list_hashers or self.list_parsers_and_plugins
                or self.list_timezones or self.show_info):
            return

        super(Log2TimelineTool, self).ParseOptions(options)
        self._ParseOutputOptions(options)
        self._ParseProcessingOptions(options)

        format_string = (
            u'%(asctime)s [%(levelname)s] (%(processName)-10s) PID:%(process)d '
            u'<%(module)s> %(message)s')

        if self._debug_mode:
            logging_level = logging.DEBUG
        elif self._quiet_mode:
            logging_level = logging.WARNING
        else:
            logging_level = logging.INFO

        self.ParseLogFileOptions(options)
        self._ConfigureLogging(filename=self._log_file,
                               format_string=format_string,
                               log_level=logging_level)

        if self._debug_mode:
            logging_filter = log2timeline.LoggingFilter()
            root_logger = logging.getLogger()
            root_logger.addFilter(logging_filter)

        self._output = self.ParseStringOption(options, u'output')
        if not self._output:
            raise errors.BadConfigOption(u'No output defined.')

        # TODO: where is this defined?
        self._operating_system = getattr(options, u'os', None)

        if self._operating_system:
            self._mount_path = getattr(options, u'filename', None)

        self._filter_expression = self.ParseStringOption(options, u'filter')
        if self._filter_expression:
            # TODO: refactor self._filter_object out the tool into the frontend.
            self._filter_object = self._GetMatcher(self._filter_expression)
            if not self._filter_object:
                raise errors.BadConfigOption(
                    u'Invalid filter expression: {0:s}'.format(
                        self._filter_expression))

        self._status_view_mode = getattr(options, u'status_view_mode',
                                         u'linear')
        self._enable_sigsegv_handler = getattr(options, u'sigsegv_handler',
                                               False)
Exemple #4
0
    def ParseOptions(self, options):
        """Parses tool specific options.

    Args:
      options (argparse.Namespace): command line arguments.

    Raises:
      BadConfigOption: if the options are invalid.
    """
        # The extraction options are dependent on the data location.
        helpers_manager.ArgumentHelperManager.ParseOptions(
            options, self, names=['data_location'])

        self._ReadParserPresetsFromFile()
        self._ReadEventFormatters()

        # The output modules options are dependent on the preferred_language
        # and output_time_zone options.
        self._ParseOutputTimeZoneOption(options)

        argument_helper_names = [
            'artifact_definitions', 'hashers', 'language', 'parsers'
        ]
        helpers_manager.ArgumentHelperManager.ParseOptions(
            options, self, names=argument_helper_names)

        self._ParseTimeZoneOption(options)

        self.list_hashers = self._hasher_names_string == 'list'
        self.list_language_identifiers = self._preferred_language == 'list'
        self.list_parsers_and_plugins = self._parser_filter_expression == 'list'

        self.show_troubleshooting = getattr(options, 'show_troubleshooting',
                                            False)

        self.dependencies_check = getattr(options, 'dependencies_check', True)

        # Check the list options first otherwise required options will raise.
        if (self.list_hashers or self.list_language_identifiers
                or self.list_parsers_and_plugins or self.list_time_zones
                or self.show_troubleshooting):
            return

        # Check output modules after the other listable options, otherwise
        # it could raise with "requires an output file".
        helpers_manager.ArgumentHelperManager.ParseOptions(
            options, self, names=['output_modules'])

        self.list_output_modules = self._output_format == 'list'
        if self.list_output_modules:
            return

        self._ParseInformationalOptions(options)

        argument_helper_names = ['extraction', 'status_view']
        helpers_manager.ArgumentHelperManager.ParseOptions(
            options, self, names=argument_helper_names)

        self._ParseLogFileOptions(options)

        self._ParseStorageMediaOptions(options)

        self._ParsePerformanceOptions(options)
        self._ParseProcessingOptions(options)

        self._storage_file_path = getattr(options, 'storage_file', None)
        if not self._storage_file_path:
            self._storage_file_path = self._GenerateStorageFileName()

        self._output_filename = getattr(options, 'write', None)

        if not self._output_filename:
            raise errors.BadConfigOption(
                ('Output format: {0:s} requires an output file '
                 '(-w OUTPUT_FILE)').format(self._output_format))

        if os.path.exists(self._output_filename):
            raise errors.BadConfigOption(
                'Output file already exists: {0:s}.'.format(
                    self._output_filename))

        self._EnforceProcessMemoryLimit(self._process_memory_limit)

        self._output_module = self._CreateOutputModule(options)
Exemple #5
0
    def ParseOptions(cls, options, output_module):
        """Parses and validates options.

    Args:
      options (argparse.Namespace): parser options.
      output_module (OutputModule): output module to configure.

    Raises:
      BadConfigObject: when the output module object is of the wrong type.
      BadConfigOption: when a configuration parameter fails validation.
    """
        if not isinstance(output_module,
                          shared_elastic.SharedElasticsearchOutputModule):
            raise errors.BadConfigObject(
                'Output module is not an instance of ElasticsearchOutputModule'
            )

        index_name = cls._ParseStringOption(
            options, 'index_name', default_value=cls._DEFAULT_INDEX_NAME)
        flush_interval = cls._ParseNumericOption(
            options,
            'flush_interval',
            default_value=cls._DEFAULT_FLUSH_INTERVAL)

        fields = ','.join(cls._DEFAULT_FIELDS)
        additional_fields = cls._ParseStringOption(options,
                                                   'additional_fields')

        if additional_fields:
            fields = ','.join([fields, additional_fields])

        mappings_file_path = cls._ParseStringOption(options,
                                                    'elastic_mappings')
        elastic_user = cls._ParseStringOption(options, 'elastic_user')
        elastic_password = cls._ParseStringOption(options, 'elastic_password')
        use_ssl = getattr(options, 'use_ssl', False)

        ca_certificates_path = cls._ParseStringOption(
            options, 'ca_certificates_file_path')
        elastic_url_prefix = cls._ParseStringOption(options,
                                                    'elastic_url_prefix')

        if elastic_password is None:
            elastic_password = os.getenv('PLASO_ELASTIC_PASSWORD', None)

        if elastic_password is not None:
            logger.warning(
                'Note that specifying your Elasticsearch password via '
                '--elastic_password or the environment PLASO_ELASTIC_PASSWORD can '
                'expose the password to other users on the system.')

        if elastic_user is not None and elastic_password is None:
            elastic_password = getpass.getpass(
                'Enter your Elasticsearch password: '******',')])

        output_module.SetUsername(elastic_user)
        output_module.SetPassword(elastic_password)
        output_module.SetUseSSL(use_ssl)
        output_module.SetCACertificatesPath(ca_certificates_path)
        output_module.SetURLPrefix(elastic_url_prefix)

        if not mappings_file_path or not os.path.isfile(mappings_file_path):
            mappings_filename = output_module.MAPPINGS_FILENAME

            mappings_path = getattr(output_module, 'MAPPINGS_PATH', None)
            if mappings_path:
                mappings_file_path = os.path.join(mappings_path,
                                                  mappings_filename)
            else:
                data_location = getattr(options, '_data_location',
                                        None) or 'data'
                mappings_file_path = os.path.join(data_location,
                                                  mappings_filename)

        if not mappings_file_path or not os.path.isfile(mappings_file_path):
            raise errors.BadConfigOption(
                'No such Elasticsearch mappings file: {0!s}.'.format(
                    mappings_file_path))

        with open(mappings_file_path, 'r') as file_object:
            mappings_json = json.load(file_object)

        output_module.SetMappings(mappings_json)
Exemple #6
0
  def ParseOptions(self, options):
    """Parses the options.

    Args:
      options (argparse.Namespace): command line arguments.

    Raises:
      BadConfigOption: if the options are invalid.
    """
    # The extraction options are dependent on the data location.
    helpers_manager.ArgumentHelperManager.ParseOptions(
        options, self, names=['data_location'])

    self._ReadParserPresetsFromFile()

    # Check the list options first otherwise required options will raise.
    argument_helper_names = ['hashers', 'parsers', 'profiling']
    helpers_manager.ArgumentHelperManager.ParseOptions(
        options, self, names=argument_helper_names)

    self._ParseExtractionOptions(options)

    self.list_hashers = self._hasher_names_string == 'list'
    self.list_parsers_and_plugins = self._parser_filter_expression == 'list'
    self.list_profilers = self._profilers == 'list'

    self.show_info = getattr(options, 'show_info', False)
    self.show_troubleshooting = getattr(options, 'show_troubleshooting', False)

    if getattr(options, 'use_markdown', False):
      self._views_format_type = views.ViewsFactory.FORMAT_TYPE_MARKDOWN

    self.dependencies_check = getattr(options, 'dependencies_check', True)

    if (self.list_hashers or self.list_language_tags or
        self.list_parsers_and_plugins or self.list_profilers or
        self.list_time_zones or self.show_info or self.show_troubleshooting):
      return

    self._ParseInformationalOptions(options)

    argument_helper_names = [
        'artifact_definitions', 'artifact_filters', 'extraction',
        'filter_file', 'status_view', 'storage_format', 'text_prepend',
        'yara_rules']
    helpers_manager.ArgumentHelperManager.ParseOptions(
        options, self, names=argument_helper_names)

    self._ParseLogFileOptions(options)

    self._ParseStorageMediaOptions(options)

    self._ParsePerformanceOptions(options)
    self._ParseProcessingOptions(options)

    self._storage_file_path = self.ParseStringOption(options, 'storage_file')
    if not self._storage_file_path:
      self._storage_file_path = self._GenerateStorageFileName()

    if not self._storage_file_path:
      raise errors.BadConfigOption('Missing storage file option.')

    serializer_format = getattr(
        options, 'serializer_format', definitions.SERIALIZER_FORMAT_JSON)
    if serializer_format not in definitions.SERIALIZER_FORMATS:
      raise errors.BadConfigOption(
          'Unsupported storage serializer format: {0:s}.'.format(
              serializer_format))
    self._storage_serializer_format = serializer_format

    helpers_manager.ArgumentHelperManager.ParseOptions(
        options, self, names=['status_view'])

    self._enable_sigsegv_handler = getattr(options, 'sigsegv_handler', False)

    self._EnforceProcessMemoryLimit(self._process_memory_limit)
Exemple #7
0
    def AnalyzeEvents(self):
        """Analyzes events from a plaso storage file and generate a report.

    Raises:
      BadConfigOption: when a configuration parameter fails validation or the
          storage file cannot be opened with read access.
      RuntimeError: if a non-recoverable situation is encountered.
    """
        session = engine.BaseEngine.CreateSession(
            command_line_arguments=self._command_line_arguments,
            preferred_encoding=self.preferred_encoding)

        storage_reader = storage_factory.StorageFactory.CreateStorageReaderForFile(
            self._storage_file_path)
        if not storage_reader:
            raise errors.BadConfigOption(
                'Format of storage file: {0:s} not supported'.format(
                    self._storage_file_path))

        self._number_of_analysis_reports = (
            storage_reader.GetNumberOfAnalysisReports())
        storage_reader.Close()

        configuration = self._CreateProcessingConfiguration(
            self._knowledge_base)

        counter = collections.Counter()
        if self._output_format != 'null':
            self._status_view.SetMode(self._status_view_mode)
            self._status_view.SetStorageFileInformation(
                self._storage_file_path)

            status_update_callback = (
                self._status_view.GetAnalysisStatusUpdateCallback())

            storage_reader = (
                storage_factory.StorageFactory.CreateStorageReaderForFile(
                    self._storage_file_path))

            # TODO: add single processing support.
            analysis_engine = psort.PsortMultiProcessEngine(
                worker_memory_limit=self._worker_memory_limit,
                worker_timeout=self._worker_timeout)

            analysis_engine.ExportEvents(
                self._knowledge_base,
                storage_reader,
                self._output_module,
                configuration,
                deduplicate_events=self._deduplicate_events,
                status_update_callback=status_update_callback,
                time_slice=self._time_slice,
                use_time_slicer=self._use_time_slicer)

        for item, value in session.analysis_reports_counter.items():
            counter[item] = value

        if self._quiet_mode:
            return

        self._output_writer.Write('Processing completed.\n')

        table_view = views.ViewsFactory.GetTableView(self._views_format_type,
                                                     title='Counter')
        for element, count in counter.most_common():
            if not element:
                element = 'N/A'
            table_view.AddRow([element, count])
        table_view.Write(self._output_writer)

        storage_reader = storage_factory.StorageFactory.CreateStorageReaderForFile(
            self._storage_file_path)
        self._PrintAnalysisReportsDetails(storage_reader,
                                          self._number_of_analysis_reports)

        self._output_writer.Write('Storage file is {0:s}\n'.format(
            self._storage_file_path))
Exemple #8
0
    def ExtractEventsFromSources(self):
        """Processes the sources and extracts events.

    Raises:
      BadConfigOption: if the storage file path is invalid, or the storage
          format not supported, or there was a failure to writing to the
          storage.
      IOError: if the extraction engine could not write to the storage.
      OSError: if the extraction engine could not write to the storage.
      SourceScannerError: if the source scanner could not find a supported
          file system.
      UserAbort: if the user initiated an abort.
    """
        self._CheckStorageFile(self._storage_file_path,
                               warn_about_existing=True)

        self.ScanSource(self._source_path)

        self._status_view.SetMode(self._status_view_mode)
        self._status_view.SetSourceInformation(
            self._source_path,
            self._source_type,
            artifact_filters=self._artifact_filters,
            filter_file=self._filter_file)

        self._output_writer.Write('\n')
        self._status_view.PrintExtractionStatusHeader(None)
        self._output_writer.Write('Processing started.\n')

        session = engine.BaseEngine.CreateSession(
            artifact_filter_names=self._artifact_filters,
            command_line_arguments=self._command_line_arguments,
            debug_mode=self._debug_mode,
            filter_file_path=self._filter_file,
            preferred_encoding=self.preferred_encoding,
            preferred_time_zone=self._preferred_time_zone,
            preferred_year=self._preferred_year,
            text_prepend=self._text_prepend)

        storage_writer = storage_factory.StorageFactory.CreateStorageWriter(
            self._storage_format)
        if not storage_writer:
            raise errors.BadConfigOption(
                'Unsupported storage format: {0:s}'.format(
                    self._storage_format))

        try:
            storage_writer.Open(path=self._storage_file_path)
        except IOError as exception:
            raise IOError(
                'Unable to open storage with error: {0!s}'.format(exception))

        processing_status = None

        try:
            storage_writer.WriteSessionStart(session)

            try:
                processing_status = self._ProcessSources(
                    session, storage_writer)

            finally:
                session.aborted = getattr(processing_status, 'aborted', True)
                storage_writer.WriteSessionCompletion(session)

        except IOError as exception:
            raise IOError(
                'Unable to write to storage with error: {0!s}'.format(
                    exception))

        finally:
            storage_writer.Close()

        self._status_view.PrintExtractionSummary(processing_status)
Exemple #9
0
  def ParseOptions(cls, options, configuration_object):
    """Parses and validates options.

    Args:
      options (argparse.Namespace): parser options.
      configuration_object (CLITool): object to be configured by the argument
          helper.

    Raises:
      BadConfigObject: when the configuration object is of the wrong type.
      BadConfigOption: if the required artifact definitions are not defined.
    """
    if not isinstance(configuration_object, tools.CLITool):
      raise errors.BadConfigObject(
          'Configuration object is not an instance of CLITool')

    artifacts_path = getattr(options, 'artifact_definitions_path', None)

    if ((not artifacts_path or not os.path.exists(artifacts_path)) and
        configuration_object.data_location):
      artifacts_path = os.path.dirname(configuration_object.data_location)
      artifacts_path = os.path.join(artifacts_path, 'artifacts')

      if not os.path.exists(artifacts_path) and 'VIRTUAL_ENV' in os.environ:
        artifacts_path = os.path.join(
            os.environ['VIRTUAL_ENV'], 'share', 'artifacts')

      if not os.path.exists(artifacts_path):
        artifacts_path = os.path.join(sys.prefix, 'share', 'artifacts')
      if not os.path.exists(artifacts_path):
        artifacts_path = os.path.join(sys.prefix, 'local', 'share', 'artifacts')

      if sys.prefix != '/usr':
        if not os.path.exists(artifacts_path):
          artifacts_path = os.path.join('/usr', 'share', 'artifacts')
        if not os.path.exists(artifacts_path):
          artifacts_path = os.path.join('/usr', 'local', 'share', 'artifacts')

      if not os.path.exists(artifacts_path):
        artifacts_path = None

    if not artifacts_path or not os.path.exists(artifacts_path):
      raise errors.BadConfigOption(
          'Unable to determine path to artifact definitions.')

    custom_artifacts_path = getattr(
        options, 'custom_artifact_definitions_path', None)

    if custom_artifacts_path and not os.path.isfile(custom_artifacts_path):
      raise errors.BadConfigOption(
          'No such artifacts filter file: {0:s}.'.format(custom_artifacts_path))

    if custom_artifacts_path:
      logger.info(
          'Custom artifact filter file: {0:s}'.format(custom_artifacts_path))

    registry = artifacts_registry.ArtifactDefinitionsRegistry()
    reader = artifacts_reader.YamlArtifactsReader()

    logger.info(
        'Determined artifact definitions path: {0:s}'.format(artifacts_path))

    try:
      registry.ReadFromDirectory(reader, artifacts_path)

    except (KeyError, artifacts_errors.FormatError) as exception:
      raise errors.BadConfigOption((
          'Unable to read artifact definitions from: {0:s} with error: '
          '{1!s}').format(artifacts_path, exception))

    for name in preprocessors_manager.PreprocessPluginsManager.GetNames():
      if not registry.GetDefinitionByName(name):
        raise errors.BadConfigOption(
            'Missing required artifact definition: {0:s}'.format(name))

    if custom_artifacts_path:
      try:
        registry.ReadFromFile(reader, custom_artifacts_path)

      except (KeyError, artifacts_errors.FormatError) as exception:
        raise errors.BadConfigOption((
            'Unable to read artifact definitions from: {0:s} with error: '
            '{1!s}').format(custom_artifacts_path, exception))

    setattr(configuration_object, '_artifact_definitions_path', artifacts_path)
    setattr(
        configuration_object, '_custom_artifacts_path', custom_artifacts_path)
Exemple #10
0
    def ExtractEventsFromSources(self):
        """Processes the sources and extracts events.

    Raises:
      BadConfigOption: if the storage file path is invalid or the storage
          format not supported or an invalid collection filter was specified.
      SourceScannerError: if the source scanner could not find a supported
          file system.
      UserAbort: if the user initiated an abort.
    """
        self._CheckStorageFile(self._storage_file_path,
                               warn_about_existing=True)

        scan_context = self.ScanSource_CARPE(self._source_path, self.par_name)
        self._source_type = scan_context.source_type

        self._status_view.SetMode(self._status_view_mode)
        self._status_view.SetSourceInformation(
            self._source_path,
            self._source_type,
            artifact_filters=self._artifact_filters,
            filter_file=self._filter_file)

        session = engine.BaseEngine.CreateSession(
            artifact_filter_names=self._artifact_filters,
            command_line_arguments=self._command_line_arguments,
            debug_mode=self._debug_mode,
            filter_file_path=self._filter_file,
            preferred_encoding=self.preferred_encoding,
            preferred_time_zone=self._preferred_time_zone,
            preferred_year=self._preferred_year)

        storage_writer = storage_factory.StorageFactory.CreateStorageWriter(
            self._storage_format, session, self._storage_file_path)
        if not storage_writer:
            raise errors.BadConfigOption(
                'Unsupported storage format: {0:s}'.format(
                    self._storage_format))

        single_process_mode = self._single_process_mode
        if self._source_type == dfvfs_definitions.SOURCE_TYPE_FILE:
            # No need to multi process a single file source.
            single_process_mode = True

        if single_process_mode:
            extraction_engine = single_process_engine.SingleProcessEngine()
        else:
            extraction_engine = multi_process_engine.TaskMultiProcessEngine()

        # If the source is a directory or a storage media image
        # run pre-processing.
        if self._source_type in self._SOURCE_TYPES_TO_PREPROCESS:
            self._PreprocessSources(extraction_engine)

        configuration = self._CreateProcessingConfiguration(
            extraction_engine.knowledge_base)

        self._SetExtractionParsersAndPlugins(configuration, session)
        self._SetExtractionPreferredTimeZone(extraction_engine.knowledge_base)

        try:
            extraction_engine.BuildCollectionFilters(
                self._artifact_definitions_path, self._custom_artifacts_path,
                extraction_engine.knowledge_base, self._artifact_filters,
                self._filter_file)
        except errors.InvalidFilter as exception:
            raise errors.BadConfigOption(
                'Unable to build collection filters with error: {0!s}'.format(
                    exception))

        processing_status = None
        if single_process_mode:
            logger.debug('Starting extraction in single process mode.')

            processing_status = extraction_engine.ProcessSources(
                self._source_path_specs, storage_writer,
                self._resolver_context, configuration)

        else:
            logger.debug('Starting extraction in multi process mode.')

            processing_status = extraction_engine.ProcessSources(
                session.identifier,
                self._source_path_specs,
                storage_writer,
                configuration,
                enable_sigsegv_handler=self._enable_sigsegv_handler,
                number_of_worker_processes=self._number_of_extraction_workers,
                worker_memory_limit=self._worker_memory_limit)
Exemple #11
0
    def _ProcessSources(self, session, storage_writer):
        """Processes the sources and extract events.

    Args:
      session (Session): session in which the sources are processed.
      storage_writer (StorageWriter): storage writer for a session storage.

    Returns:
      ProcessingStatus: processing status.

    Raises:
      BadConfigOption: if an invalid collection filter was specified.
    """
        is_archive = False
        if self._source_type == dfvfs_definitions.SOURCE_TYPE_FILE:
            is_archive = self._IsArchiveFile(self._source_path_specs[0])
            if is_archive:
                self._source_type = definitions.SOURCE_TYPE_ARCHIVE

        single_process_mode = self._single_process_mode
        if self._source_type == dfvfs_definitions.SOURCE_TYPE_FILE:
            if not self._process_archives or not is_archive:
                single_process_mode = True

        if single_process_mode:
            extraction_engine = single_extraction_engine.SingleProcessEngine()
        else:
            extraction_engine = multi_extraction_engine.ExtractionMultiProcessEngine(
                number_of_worker_processes=self._number_of_extraction_workers,
                worker_memory_limit=self._worker_memory_limit,
                worker_timeout=self._worker_timeout)

        # If the source is a directory or a storage media image
        # run pre-processing.
        if self._source_type in self._SOURCE_TYPES_TO_PREPROCESS:
            self._PreprocessSources(extraction_engine, session, storage_writer)

        configuration = self._CreateProcessingConfiguration(
            extraction_engine.knowledge_base)

        session.enabled_parser_names = (
            configuration.parser_filter_expression.split(','))
        session.parser_filter_expression = self._parser_filter_expression

        self._SetExtractionPreferredTimeZone(extraction_engine.knowledge_base)

        # TODO: set mount path in knowledge base with
        # extraction_engine.knowledge_base.SetMountPath()
        extraction_engine.knowledge_base.SetTextPrepend(self._text_prepend)

        try:
            extraction_engine.BuildCollectionFilters(
                self._artifact_definitions_path, self._custom_artifacts_path,
                extraction_engine.knowledge_base, self._artifact_filters,
                self._filter_file)
        except errors.InvalidFilter as exception:
            raise errors.BadConfigOption(
                'Unable to build collection filters with error: {0!s}'.format(
                    exception))

        # TODO: decouple session and storage writer?
        session.source_configurations = (
            extraction_engine.knowledge_base.GetSourceConfigurationArtifacts())

        storage_writer.WriteSessionConfiguration(session)

        status_update_callback = (
            self._status_view.GetExtractionStatusUpdateCallback())

        if single_process_mode:
            force_parser = False
            number_of_parsers = len(
                configuration.parser_filter_expression.split(','))
            if (self._source_type == dfvfs_definitions.SOURCE_TYPE_FILE
                    and not is_archive and number_of_parsers == 1):
                force_parser = True

            logger.debug('Starting extraction in single process mode.')

            processing_status = extraction_engine.ProcessSources(
                session,
                self._source_path_specs,
                storage_writer,
                self._resolver_context,
                configuration,
                force_parser=force_parser,
                status_update_callback=status_update_callback)

        else:
            logger.debug('Starting extraction in multi process mode.')

            # The following overrides are needed because pylint 2.6.0 gets confused
            # about which ProcessSources to check against.
            # pylint: disable=no-value-for-parameter,unexpected-keyword-arg
            processing_status = extraction_engine.ProcessSources(
                session,
                self._source_path_specs,
                storage_writer,
                configuration,
                enable_sigsegv_handler=self._enable_sigsegv_handler,
                status_update_callback=status_update_callback,
                storage_file_path=self._storage_file_path)

        return processing_status
Exemple #12
0
    def ExtractEventsFromSources(self):
        """Processes the sources and extracts events.

    Raises:
      BadConfigOption: if the storage file path is invalid, or the storage
          format not supported, or there was a failure to writing to the
          storage.
      IOError: if the extraction engine could not write to the storage.
      OSError: if the extraction engine could not write to the storage.
      SourceScannerError: if the source scanner could not find a supported
          file system.
      UserAbort: if the user initiated an abort.
    """
        self._CheckStorageFile(self._storage_file_path,
                               warn_about_existing=True)

        try:
            self.ScanSource(self._source_path)
        except dfvfs_errors.UserAbort as exception:
            raise errors.UserAbort(exception)

        self._status_view.SetMode(self._status_view_mode)
        self._status_view.SetSourceInformation(
            self._source_path,
            self._source_type,
            artifact_filters=self._artifact_filters,
            filter_file=self._filter_file)

        self._output_writer.Write('\n')
        self._status_view.PrintExtractionStatusHeader(None)
        self._output_writer.Write('Processing started.\n')

        # TODO: attach processing configuration to session?
        session = engine.BaseEngine.CreateSession()

        storage_writer = storage_factory.StorageFactory.CreateStorageWriter(
            self._storage_format)
        if not storage_writer:
            raise errors.BadConfigOption(
                'Unsupported storage format: {0:s}'.format(
                    self._storage_format))

        try:
            storage_writer.Open(path=self._storage_file_path)
        except IOError as exception:
            raise IOError(
                'Unable to open storage with error: {0!s}'.format(exception))

        processing_status = None

        try:
            session_start = session.CreateSessionStart()
            storage_writer.AddAttributeContainer(session_start)

            try:
                processing_status = self._ProcessSources(
                    session, storage_writer)

            finally:
                session.aborted = getattr(processing_status, 'aborted', True)

                session_completion = session.CreateSessionCompletion()
                storage_writer.AddAttributeContainer(session_completion)

        except IOError as exception:
            raise IOError(
                'Unable to write to storage with error: {0!s}'.format(
                    exception))

        finally:
            storage_writer.Close()

        self._status_view.PrintExtractionSummary(processing_status)
Exemple #13
0
    def _ProcessSources(self, session, storage_writer):
        """Processes the sources and extract events.

    Args:
      session (Session): session in which the sources are processed.
      storage_writer (StorageWriter): storage writer for a session storage.

    Returns:
      ProcessingStatus: processing status.

    Raises:
      BadConfigOption: if an invalid collection filter was specified.
    """
        is_archive = False
        if self._source_type == dfvfs_definitions.SOURCE_TYPE_FILE:
            is_archive = self._IsArchiveFile(self._source_path_specs[0])
            if is_archive:
                self._source_type = definitions.SOURCE_TYPE_ARCHIVE

        single_process_mode = self._single_process_mode
        if self._source_type == dfvfs_definitions.SOURCE_TYPE_FILE:
            if not self._process_archives or not is_archive:
                single_process_mode = True

        if single_process_mode:
            extraction_engine = single_extraction_engine.SingleProcessEngine()
        else:
            extraction_engine = multi_extraction_engine.ExtractionMultiProcessEngine(
                number_of_worker_processes=self._number_of_extraction_workers,
                worker_memory_limit=self._worker_memory_limit,
                worker_timeout=self._worker_timeout)

        # If the source is a directory or a storage media image
        # run pre-processing.
        if self._source_type in self._SOURCE_TYPES_TO_PREPROCESS:
            self._PreprocessSources(extraction_engine, session, storage_writer)

        self._expanded_parser_filter_expression = (
            self._GetExpandedParserFilterExpression(
                extraction_engine.knowledge_base))

        enabled_parser_names = self._expanded_parser_filter_expression.split(
            ',')

        number_of_enabled_parsers = len(enabled_parser_names)

        force_parser = False
        if (self._source_type == dfvfs_definitions.SOURCE_TYPE_FILE
                and not is_archive and number_of_enabled_parsers == 1):
            force_parser = True

            self._extract_winevt_resources = False

        elif ('winevt' not in enabled_parser_names
              and 'winevtx' not in enabled_parser_names):
            self._extract_winevt_resources = False

        elif (self._extract_winevt_resources
              and 'pe' not in enabled_parser_names):
            logger.warning(
                'A Windows EventLog parser is enabled in combination with '
                'extraction of Windows EventLog resources, but the Portable '
                'Executable (PE) parser is disabled. Therefore Windows EventLog '
                'resources cannot be extracted.')

            self._extract_winevt_resources = False

        configuration = self._CreateExtractionProcessingConfiguration()

        try:
            extraction_engine.BuildCollectionFilters(
                self._artifact_definitions_path, self._custom_artifacts_path,
                extraction_engine.knowledge_base, self._artifact_filters,
                self._filter_file)
        except errors.InvalidFilter as exception:
            raise errors.BadConfigOption(
                'Unable to build collection filters with error: {0!s}'.format(
                    exception))

        session_configuration = self._CreateExtractionSessionConfiguration(
            session, enabled_parser_names)

        storage_writer.AddAttributeContainer(session_configuration)

        source_configurations = []
        for path_spec in self._source_path_specs:
            source_configuration = artifacts.SourceConfigurationArtifact(
                path_spec=path_spec)
            source_configurations.append(source_configuration)

        # TODO: improve to detect more than 1 system configurations.
        # TODO: improve to add volumes to system configuration.
        system_configuration = (
            extraction_engine.knowledge_base.GetSystemConfigurationArtifact())
        storage_writer.AddAttributeContainer(system_configuration)

        status_update_callback = (
            self._status_view.GetExtractionStatusUpdateCallback())

        if single_process_mode:
            logger.debug('Starting extraction in single process mode.')

            processing_status = extraction_engine.ProcessSources(
                source_configurations,
                storage_writer,
                self._resolver_context,
                configuration,
                force_parser=force_parser,
                status_update_callback=status_update_callback)

        else:
            logger.debug('Starting extraction in multi process mode.')

            # The following overrides are needed because pylint 2.6.0 gets confused
            # about which ProcessSources to check against.
            # pylint: disable=no-value-for-parameter,unexpected-keyword-arg
            processing_status = extraction_engine.ProcessSources(
                source_configurations,
                storage_writer,
                session.identifier,
                configuration,
                enable_sigsegv_handler=self._enable_sigsegv_handler,
                status_update_callback=status_update_callback,
                storage_file_path=self._storage_file_path)

        return processing_status
Exemple #14
0
    def _GetExpandedParserFilterExpression(self, knowledge_base):
        """Determines the expanded parser filter expression.

    Args:
      knowledge_base (KnowledgeBase): contains information from the source
          data needed for parsing.

    Returns:
      str: expanded parser filter expression.

    Raises:
      BadConfigOption: if presets in the parser filter expression could not
          be expanded or if an invalid parser or plugin name is specified.
    """
        parser_filter_expression = self._parser_filter_expression
        if not parser_filter_expression and not self._single_process_mode:
            operating_system_family = knowledge_base.GetValue(
                'operating_system')
            operating_system_product = knowledge_base.GetValue(
                'operating_system_product')
            operating_system_version = knowledge_base.GetValue(
                'operating_system_version')

            operating_system_artifact = artifacts.OperatingSystemArtifact(
                family=operating_system_family,
                product=operating_system_product,
                version=operating_system_version)

            preset_definitions = self._presets_manager.GetPresetsByOperatingSystem(
                operating_system_artifact)
            if preset_definitions:
                self._parser_filter_expression = ','.join([
                    preset_definition.name
                    for preset_definition in preset_definitions
                ])

                logger.debug(
                    'Parser filter expression set to preset: {0:s}'.format(
                        self._parser_filter_expression))

        parser_filter_helper = parser_filter.ParserFilterExpressionHelper()

        try:
            parser_filter_expression = parser_filter_helper.ExpandPresets(
                self._presets_manager, self._parser_filter_expression)
            logger.debug('Parser filter expression set to: {0:s}'.format(
                parser_filter_expression or 'N/A'))
        except RuntimeError as exception:
            raise errors.BadConfigOption(
                ('Unable to expand presets in parser filter expression with '
                 'error: {0!s}').format(exception))

        parser_elements, invalid_parser_elements = (
            parsers_manager.ParsersManager.CheckFilterExpression(
                parser_filter_expression))

        if invalid_parser_elements:
            invalid_parser_names_string = ','.join(invalid_parser_elements)
            raise errors.BadConfigOption(
                'Unknown parser or plugin names in element(s): "{0:s}" of '
                'parser filter expression: {1:s}'.format(
                    invalid_parser_names_string, parser_filter_expression))

        return ','.join(sorted(parser_elements))
Exemple #15
0
 def ParseOptions(cls, options, unused_config_object):
     """Parse and validate the configuration options."""
     if not getattr(options, 'dynamic', u''):
         raise errors.BadConfigOption(u'Always set this.')
Exemple #16
0
  def ParseOptions(cls, options, configuration_object):
    """Parses and validates options.

    Args:
      options (argparse.Namespace): parser options.
      configuration_object (CLITool): object to be configured by the argument
          helper.

    Raises:
      BadConfigObject: when the configuration object is of the wrong type.
      BadConfigOption: when a configuration parameter fails validation.
    """
    if not isinstance(configuration_object, tools.CLITool):
      raise errors.BadConfigObject(
          'Configuration object is not an instance of CLITool')

    filter_expression = cls._ParseStringOption(options, 'filter')

    event_filter = None
    if filter_expression:
      event_filter = filters_manager.FiltersManager.GetFilterObject(
          filter_expression)
      if not event_filter:
        raise errors.BadConfigOption('Invalid filter expression: {0:s}'.format(
            filter_expression))

    time_slice_event_time_string = getattr(options, 'slice', None)
    time_slice_duration = getattr(options, 'slice_size', 5)
    use_time_slicer = getattr(options, 'slicer', False)

    # The slice and slicer cannot be set at the same time.
    if time_slice_event_time_string and use_time_slicer:
      raise errors.BadConfigOption(
          'Time slice and slicer cannot be used at the same time.')

    time_slice_event_timestamp = None
    if time_slice_event_time_string:
      # Note self._preferred_time_zone is None when not set but represents UTC.
      preferred_time_zone = getattr(
          configuration_object, '_preferred_time_zone', None) or 'UTC'
      timezone = pytz.timezone(preferred_time_zone)
      time_slice_event_timestamp = timelib.Timestamp.FromTimeString(
          time_slice_event_time_string, timezone=timezone)
      if time_slice_event_timestamp is None:
        raise errors.BadConfigOption(
            'Unsupported time slice event date and time: {0:s}'.format(
                time_slice_event_time_string))

    setattr(configuration_object, '_event_filter_expression', filter_expression)

    if event_filter:
      setattr(configuration_object, '_event_filter', event_filter)

    setattr(configuration_object, '_use_time_slicer', use_time_slicer)

    if time_slice_event_timestamp is not None or use_time_slicer:
      # Note that time slicer uses the time slice to determine the duration.
      # TODO: refactor TimeSlice to filters.
      time_slice = time_slices.TimeSlice(
          time_slice_event_timestamp, duration=time_slice_duration)
      setattr(configuration_object, '_time_slice', time_slice)
Exemple #17
0
  def _CreateProcessingConfiguration(self, knowledge_base):
    """Creates a processing configuration.

    Args:
      knowledge_base (KnowledgeBase): contains information from the source
          data needed for parsing.

    Returns:
      ProcessingConfiguration: processing configuration.

    Raises:
      BadConfigOption: if presets in the parser filter expression could not
          be expanded or if an invalid parser or plugin name is specified.
    """
    parser_filter_expression = self._parser_filter_expression
    if not parser_filter_expression:
      operating_system_family = knowledge_base.GetValue('operating_system')
      operating_system_product = knowledge_base.GetValue(
          'operating_system_product')
      operating_system_version = knowledge_base.GetValue(
          'operating_system_version')

      operating_system_artifact = artifacts.OperatingSystemArtifact(
          family=operating_system_family, product=operating_system_product,
          version=operating_system_version)

      preset_definitions = self._presets_manager.GetPresetsByOperatingSystem(
          operating_system_artifact)

      if preset_definitions:
        preset_names = [
            preset_definition.name for preset_definition in preset_definitions]
        filter_expression = ','.join(preset_names)

        logger.info('Parser filter expression set to: {0:s}'.format(
            filter_expression))
        parser_filter_expression = filter_expression

    parser_filter_helper = parser_filter.ParserFilterExpressionHelper()

    try:
      parser_filter_expression = parser_filter_helper.ExpandPresets(
          self._presets_manager, parser_filter_expression)
    except RuntimeError as exception:
      raise errors.BadConfigOption((
          'Unable to expand presets in parser filter expression with '
          'error: {0!s}').format(exception))

    _, invalid_parser_elements = (
        parsers_manager.ParsersManager.CheckFilterExpression(
            parser_filter_expression))

    if invalid_parser_elements:
      invalid_parser_names_string = ','.join(invalid_parser_elements)
      raise errors.BadConfigOption(
          'Unknown parser or plugin names in element(s): "{0:s}" of '
          'parser filter expression: {1:s}'.format(
              invalid_parser_names_string, parser_filter_expression))

    # TODO: pass preferred_encoding.
    configuration = configurations.ProcessingConfiguration()
    configuration.artifact_filters = self._artifact_filters
    configuration.credentials = self._credential_configurations
    configuration.debug_output = self._debug_mode
    configuration.extraction.hasher_file_size_limit = (
        self._hasher_file_size_limit)
    configuration.extraction.hasher_names_string = self._hasher_names_string
    configuration.extraction.process_archives = self._process_archives
    configuration.extraction.process_compressed_streams = (
        self._process_compressed_streams)
    configuration.extraction.yara_rules_string = self._yara_rules_string
    configuration.filter_file = self._filter_file
    configuration.log_filename = self._log_file
    configuration.parser_filter_expression = parser_filter_expression
    configuration.preferred_year = self._preferred_year
    configuration.profiling.directory = self._profiling_directory
    configuration.profiling.sample_rate = self._profiling_sample_rate
    configuration.profiling.profilers = self._profilers
    configuration.task_storage_format = self._task_storage_format
    configuration.temporary_directory = self._temporary_directory

    return configuration
Exemple #18
0
    def ParseOptions(self, options):
        """Parses the options.

    Args:
      options (argparse.Namespace): command line arguments.

    Raises:
      BadConfigOption: if the options are invalid.
    """
        self._ParseInformationalOptions(options)

        self._verbose = getattr(options, 'verbose', False)

        self._sections = getattr(options, 'sections', '')

        self.list_sections = self._sections == 'list'

        self.show_troubleshooting = getattr(options, 'show_troubleshooting',
                                            False)
        if self.list_sections or self.show_troubleshooting:
            return

        if self._sections != 'all':
            self._sections = self._sections.split(',')

        self._output_filename = getattr(options, 'write', None)

        argument_helper_names = ['process_resources', 'storage_file']
        helpers_manager.ArgumentHelperManager.ParseOptions(
            options, self, names=argument_helper_names)

        # TODO: move check into _CheckStorageFile.
        if not self._storage_file_path:
            raise errors.BadConfigOption('Missing storage file option.')

        if not os.path.isfile(self._storage_file_path):
            raise errors.BadConfigOption('No such storage file: {0:s}.'.format(
                self._storage_file_path))

        compare_storage_file_path = self.ParseStringOption(
            options, 'compare_storage_file')
        if compare_storage_file_path:
            if not os.path.isfile(compare_storage_file_path):
                raise errors.BadConfigOption(
                    'No such storage file: {0:s}.'.format(
                        compare_storage_file_path))

            self._compare_storage_file_path = compare_storage_file_path
            self.compare_storage_information = True

        self._output_format = self.ParseStringOption(options, 'output_format')

        if self._output_filename:
            if os.path.exists(self._output_filename):
                raise errors.BadConfigOption(
                    'Output file already exists: {0:s}.'.format(
                        self._output_filename))
            output_file_object = open(self._output_filename, 'wb')
            self._output_writer = tools.FileObjectOutputWriter(
                output_file_object)

        self._EnforceProcessMemoryLimit(self._process_memory_limit)
    def _ExtractWithFilter(self,
                           source_path_specs,
                           destination_path,
                           output_writer,
                           artifact_filters,
                           filter_file,
                           artifact_definitions_path,
                           custom_artifacts_path,
                           skip_duplicates=True):
        """Extracts files using a filter expression.

    This method runs the file extraction process on the image and
    potentially on every VSS if that is wanted.

    Args:
      source_path_specs (list[dfvfs.PathSpec]): path specifications to extract.
      destination_path (str): path where the extracted files should be stored.
      output_writer (CLIOutputWriter): output writer.
      artifact_definitions_path (str): path to artifact definitions file.
      custom_artifacts_path (str): path to custom artifact definitions file.
      artifact_filters (list[str]): names of artifact definitions that are
          used for filtering file system and Windows Registry key paths.
      filter_file (str): path of the file that contains the filter file path
          filters.
      skip_duplicates (Optional[bool]): True if files with duplicate content
          should be skipped.

    Raises:
      BadConfigOption: if an invalid collection filter was specified.
    """
        extraction_engine = engine.BaseEngine()

        # If the source is a directory or a storage media image
        # run pre-processing.
        if self._source_type in self._SOURCE_TYPES_TO_PREPROCESS:
            self._PreprocessSources(extraction_engine)

        for source_path_spec in source_path_specs:
            file_system, mount_point = self._GetSourceFileSystem(
                source_path_spec, resolver_context=self._resolver_context)

            display_name = path_helper.PathHelper.GetDisplayNameForPathSpec(
                source_path_spec)
            output_writer.Write(
                'Extracting file entries from: {0:s}\n'.format(display_name))

            try:
                extraction_engine.BuildCollectionFilters(
                    artifact_definitions_path, custom_artifacts_path,
                    extraction_engine.knowledge_base, artifact_filters,
                    filter_file)
            except errors.InvalidFilter as exception:
                raise errors.BadConfigOption(
                    'Unable to build collection filters with error: {0!s}'.
                    format(exception))

            searcher = file_system_searcher.FileSystemSearcher(
                file_system, mount_point)
            filters_helper = extraction_engine.collection_filters_helper
            for path_spec in searcher.Find(find_specs=(
                    filters_helper.included_file_system_find_specs)):
                self._ExtractFileEntry(path_spec,
                                       destination_path,
                                       output_writer,
                                       skip_duplicates=skip_duplicates)

            file_system.Close()
Exemple #20
0
    def ProcessStorage(self):
        """Processes a plaso storage file.

    Raises:
      BadConfigOption: when a configuration parameter fails validation or the
          storage file cannot be opened with read access.
      RuntimeError: if a non-recoverable situation is encountered.
    """
        self._CheckStorageFile(self._storage_file_path)

        self._status_view.SetMode(self._status_view_mode)
        self._status_view.SetStorageFileInformation(self._storage_file_path)

        status_update_callback = (
            self._status_view.GetAnalysisStatusUpdateCallback())

        session = engine.BaseEngine.CreateSession(
            command_line_arguments=self._command_line_arguments,
            preferred_encoding=self.preferred_encoding)

        storage_reader = storage_factory.StorageFactory.CreateStorageReaderForFile(
            self._storage_file_path)
        if not storage_reader:
            raise errors.BadConfigOption(
                'Format of storage file: {0:s} not supported'.format(
                    self._storage_file_path))

        for session in storage_reader.GetSessions():
            if not session.source_configurations:
                storage_reader.ReadSystemConfiguration(self._knowledge_base)
            else:
                for source_configuration in session.source_configurations:
                    self._knowledge_base.ReadSystemConfigurationArtifact(
                        source_configuration.system_configuration,
                        session_identifier=session.identifier)

            self._knowledge_base.SetTextPrepend(session.text_prepend)

        self._number_of_analysis_reports = (
            storage_reader.GetNumberOfAnalysisReports())
        storage_reader.Close()

        configuration = configurations.ProcessingConfiguration()
        configuration.data_location = self._data_location
        configuration.debug_output = self._debug_mode
        configuration.log_filename = self._log_file
        configuration.profiling.directory = self._profiling_directory
        configuration.profiling.sample_rate = self._profiling_sample_rate
        configuration.profiling.profilers = self._profilers

        analysis_counter = None
        if self._analysis_plugins:
            storage_writer = (
                storage_factory.StorageFactory.CreateStorageWriterForFile(
                    session, self._storage_file_path))
            if not storage_writer:
                raise errors.BadConfigOption(
                    'Format of storage file: {0:s} not supported for writing'.
                    format(self._storage_file_path))

            # TODO: add single processing support.
            analysis_engine = psort.PsortMultiProcessEngine(
                worker_memory_limit=self._worker_memory_limit,
                worker_timeout=self._worker_timeout)

            analysis_engine.AnalyzeEvents(
                self._knowledge_base,
                storage_writer,
                self._data_location,
                self._analysis_plugins,
                configuration,
                event_filter=self._event_filter,
                event_filter_expression=self._event_filter_expression,
                status_update_callback=status_update_callback)

            analysis_counter = collections.Counter()
            for item, value in session.analysis_reports_counter.items():
                analysis_counter[item] = value

        if self._output_format != 'null':
            storage_reader = (
                storage_factory.StorageFactory.CreateStorageReaderForFile(
                    self._storage_file_path))

            # TODO: add single processing support.
            analysis_engine = psort.PsortMultiProcessEngine(
                worker_memory_limit=self._worker_memory_limit,
                worker_timeout=self._worker_timeout)

            analysis_engine.ExportEvents(
                self._knowledge_base,
                storage_reader,
                self._output_module,
                configuration,
                deduplicate_events=self._deduplicate_events,
                event_filter=self._event_filter,
                status_update_callback=status_update_callback,
                time_slice=self._time_slice,
                use_time_slicer=self._use_time_slicer)

            self._output_module.Close()
            self._output_module = None

        if self._quiet_mode:
            return

        self._output_writer.Write('Processing completed.\n')

        if analysis_counter:
            table_view = views.ViewsFactory.GetTableView(
                self._views_format_type, title='Analysis reports generated')
            for element, count in analysis_counter.most_common():
                if element != 'total':
                    table_view.AddRow([element, count])

            table_view.AddRow(['Total', analysis_counter['total']])
            table_view.Write(self._output_writer)

        storage_reader = storage_factory.StorageFactory.CreateStorageReaderForFile(
            self._storage_file_path)
        self._PrintAnalysisReportsDetails(storage_reader)
Exemple #21
0
    def ExtractEventsFromSources(self):
        """Processes the sources and extract events.

    This is a stripped down copy of tools/log2timeline.py that doesn't support
    the full set of flags. The defaults for these are hard coded in the
    constructor of this class.

    Raises:
      BadConfigOption: if the storage file path is invalid or the storage
          format not supported or an invalid collection filter was specified.
      SourceScannerError: if the source scanner could not find a supported
          file system.
      UserAbort: if the user initiated an abort.
    """
        self._CheckStorageFile(self._storage_file_path,
                               warn_about_existing=True)

        scan_context = self.ScanSource(self._source_path)
        source_type = scan_context.source_type

        self._status_view.SetMode(self._status_view_mode)
        self._status_view.SetSourceInformation(
            self._source_path,
            source_type,
            artifact_filters=self._artifact_filters,
            filter_file=self._filter_file)

        status_update_callback = (
            self._status_view.GetExtractionStatusUpdateCallback())

        self._output_writer.Write('\n')
        self._status_view.PrintExtractionStatusHeader(None)
        self._output_writer.Write('Processing started.\n')

        session = engine.BaseEngine.CreateSession(
            artifact_filter_names=self._artifact_filters,
            command_line_arguments=self._command_line_arguments,
            filter_file_path=self._filter_file,
            preferred_encoding=self.preferred_encoding,
            preferred_time_zone=self._preferred_time_zone,
            preferred_year=self._preferred_year)

        storage_writer = storage_factory.StorageFactory.CreateStorageWriter(
            self._storage_format, session, self._storage_file_path)
        if not storage_writer:
            raise errors.BadConfigOption(
                'Unsupported storage format: {0:s}'.format(
                    self._storage_format))

        single_process_mode = self._single_process_mode
        if source_type == dfvfs_definitions.SOURCE_TYPE_FILE:
            # No need to multi process a single file source.
            single_process_mode = True

        if single_process_mode:
            extraction_engine = single_process_engine.SingleProcessEngine()
        else:
            extraction_engine = multi_process_engine.TaskMultiProcessEngine(
                number_of_worker_processes=self._number_of_extraction_workers,
                worker_memory_limit=self._worker_memory_limit,
                worker_timeout=self._worker_timeout)

        # If the source is a directory or a storage media image
        # run pre-processing.
        if source_type in self._SOURCE_TYPES_TO_PREPROCESS:
            self._PreprocessSources(extraction_engine)

        configuration = self._CreateProcessingConfiguration(
            extraction_engine.knowledge_base)

        self._SetExtractionParsersAndPlugins(configuration, session)
        self._SetExtractionPreferredTimeZone(extraction_engine.knowledge_base)

        # TODO: set mount path in knowledge base with
        # extraction_engine.knowledge_base.SetMountPath()
        extraction_engine.knowledge_base.SetTextPrepend(self._text_prepend)

        try:
            extraction_engine.BuildCollectionFilters(
                self._artifact_definitions_path, self._custom_artifacts_path,
                extraction_engine.knowledge_base, self._artifact_filters,
                self._filter_file)
        except errors.InvalidFilter as exception:
            raise errors.BadConfigOption(
                'Unable to build collection filters with error: {0!s}'.format(
                    exception))

        processing_status = None
        if single_process_mode:
            logger.debug('Starting extraction in single process mode.')

            processing_status = extraction_engine.ProcessSources(
                session,
                self._source_path_specs,
                storage_writer,
                self._resolver_context,
                configuration,
                status_update_callback=status_update_callback)

        else:
            logger.debug('Starting extraction in multi process mode.')

            processing_status = extraction_engine.ProcessSources(
                session,
                self._source_path_specs,
                storage_writer,
                configuration,
                enable_sigsegv_handler=self._enable_sigsegv_handler,
                status_update_callback=status_update_callback)

        self._status_view.PrintExtractionSummary(processing_status)
    def ParseOptions(cls, options, configuration_object):
        """Parses and validates options.

    Args:
      options (argparse.Namespace): parser options.
      configuration_object (CLITool): object to be configured by the argument
          helper.

    Raises:
      BadConfigObject: when the configuration object is of the wrong type.
      BadConfigOption: when a configuration parameter fails validation.
    """
        if not isinstance(configuration_object, tools.CLITool):
            raise errors.BadConfigObject(
                'Configuration object is not an instance of CLITool')

        filter_expression = cls._ParseStringOption(options, 'filter')

        filter_object = None
        if filter_expression:
            filter_object = event_filter.EventObjectFilter()

            try:
                filter_object.CompileFilter(filter_expression)
            except errors.ParseError as exception:
                raise errors.BadConfigOption(
                    ('Unable to compile filter expression with error: '
                     '{0!s}').format(exception))

        time_slice_event_time_string = getattr(options, 'slice', None)
        time_slice_duration = getattr(options, 'slice_size', 5)
        use_time_slicer = getattr(options, 'slicer', False)

        # The slice and slicer cannot be set at the same time.
        if time_slice_event_time_string and use_time_slicer:
            raise errors.BadConfigOption(
                'Time slice and slicer cannot be used at the same time.')

        time_slice_event_timestamp = None
        if time_slice_event_time_string:
            if ' ' in time_slice_event_time_string:
                raise errors.BadConfigOption(
                    'Time slice date and time must be defined in ISO 8601 format, '
                    'for example: 20200619T20:09:23+02:00.')

            date_time = dfdatetime_time_elements.TimeElements()

            try:
                date_time.CopyFromStringISO8601(time_slice_event_time_string)
            except ValueError:
                raise errors.BadConfigOption((
                    'Unsupported time slice date and time: {0:s}. The date and time '
                    'must be defined in ISO 8601 format, for example: '
                    '20200619T20:09:23+02:00'
                ).format(time_slice_event_time_string))

            # TODO: directly use dfDateTime objects in time slice.
            time_slice_event_timestamp = date_time.GetPlasoTimestamp()

        setattr(configuration_object, '_event_filter_expression',
                filter_expression)

        if filter_object:
            setattr(configuration_object, '_event_filter', filter_object)

        setattr(configuration_object, '_use_time_slicer', use_time_slicer)

        if time_slice_event_timestamp is not None or use_time_slicer:
            # Note that time slicer uses the time slice to determine the duration.
            # TODO: refactor TimeSlice to filters.
            time_slice = time_slices.TimeSlice(time_slice_event_timestamp,
                                               duration=time_slice_duration)
            setattr(configuration_object, '_time_slice', time_slice)
    def _ParseStorageMediaImageOptions(self, options):
        """Parses the storage media image options.

    Args:
      options (argparse.Namespace): command line arguments.

    Raises:
      BadConfigOption: if the options are invalid.
    """
        partitions = getattr(options, 'partitions', None)
        self._partitions = self._ParsePartitionsString(partitions)

        partition = getattr(options, 'partition', None)

        if self._partitions and partition is not None:
            raise errors.BadConfigOption(
                ('Option "--partition" can not be used in combination '
                 'with "--partitions".'))

        if not self._partitions and partition is not None:
            self._partitions = self._ParsePartitionsString(partition)

        image_offset_bytes = getattr(options, 'image_offset_bytes', None)

        if self._partitions and image_offset_bytes is not None:
            raise errors.BadConfigOption((
                'Option "--image_offset_bytes" can not be used in combination '
                'with "--partitions" or "--partition".'))

        image_offset = getattr(options, 'image_offset', None)

        if self._partitions and image_offset is not None:
            raise errors.BadConfigOption(
                ('Option "--image_offset" can not be used in combination with '
                 '"--partitions" or "--partition".'))

        if (image_offset_bytes is not None
                and isinstance(image_offset_bytes, py2to3.STRING_TYPES)):
            try:
                image_offset_bytes = int(image_offset_bytes, 10)
            except ValueError:
                raise errors.BadConfigOption(
                    'Invalid image offset bytes: {0:s}.'.format(
                        image_offset_bytes))

        if image_offset_bytes is None and image_offset is not None:
            bytes_per_sector = getattr(options, 'bytes_per_sector',
                                       self._DEFAULT_BYTES_PER_SECTOR)

            if isinstance(image_offset, py2to3.STRING_TYPES):
                try:
                    image_offset = int(image_offset, 10)
                except ValueError:
                    raise errors.BadConfigOption(
                        'Invalid image offset: {0:s}.'.format(image_offset))

            if isinstance(bytes_per_sector, py2to3.STRING_TYPES):
                try:
                    bytes_per_sector = int(bytes_per_sector, 10)
                except ValueError:
                    raise errors.BadConfigOption(
                        'Invalid bytes per sector: {0:s}.'.format(
                            bytes_per_sector))

        if image_offset_bytes:
            self._partition_offset = image_offset_bytes
        elif image_offset:
            self._partition_offset = image_offset * bytes_per_sector
Exemple #24
0
    def ParseOptions(self, options):
        """Parses tool specific options.

    Args:
      options (argparse.Namespace): command line arguments.

    Raises:
      BadConfigOption: if the options are invalid.
    """

        # The extraction options are dependent on the data location.
        helpers_manager.ArgumentHelperManager.ParseOptions(
            options, self, names=['data_location'])

        # The output modules options are dependent on the preferred language
        # and preferred time zone options.
        self._ParseTimezoneOption(options)

        argument_helper_names = [
            'artifact_definitions', 'hashers', 'language', 'parsers'
        ]
        helpers_manager.ArgumentHelperManager.ParseOptions(
            options, self, names=argument_helper_names)

        if self._preferred_language == 'list':
            self.list_language_identifiers = True

        if self._hasher_names_string == 'list':
            self.list_hashers = True

        if self._parser_filter_expression == 'list':
            self.list_parsers_and_plugins = True

        if (self.list_language_identifiers or self.list_timezones
                or self.list_hashers or self.list_parsers_and_plugins
                or self.list_hashers):
            return

        # Check the list options first otherwise required options will raise.
        if self.list_timezones or self.list_output_modules:
            return

        # Check output modules after the other listable options, as otherwise
        # a required argument will raise.
        helpers_manager.ArgumentHelperManager.ParseOptions(
            options, self, names=['output_modules'])

        if self._output_format == 'list':
            self.list_output_modules = True
            return

        self._ParseInformationalOptions(options)

        argument_helper_names = ['extraction', 'status_view']
        helpers_manager.ArgumentHelperManager.ParseOptions(
            options, self, names=argument_helper_names)

        self._ParseLogFileOptions(options)

        self._ParseStorageMediaOptions(options)

        # These arguments are parsed from argparse.Namespace, so we can make
        # tests consistents with the log2timeline/psort ones.
        self._single_process_mode = getattr(options, 'single_process', False)

        self._storage_file_path = getattr(options, u'storage_file', None)
        if not self._storage_file_path:
            self._storage_file_path = self._GenerateStorageFileName()

        self._output_filename = getattr(options, u'write', None)

        if not self._output_filename:
            raise errors.BadConfigOption((
                u'Output format: {0:s} requires an output file (-w OUTPUT_FILE)'
            ).format(self._output_format))
        if os.path.exists(self._output_filename):
            raise errors.BadConfigOption(
                u'Output file already exists: {0:s}.'.format(
                    self._output_filename))

        self._output_module = self._CreateOutputModule(options)
Exemple #25
0
  def ParseOptions(self, options):
    """Parses the options.

    Args:
      options (argparse.Namespace): command line arguments.

    Raises:
      BadConfigOption: if the options are invalid.
    """
    # The output modules options are dependent on the preferred language
    # and preferred time zone options.
    self._ParseTimezoneOption(options)

    names = ['analysis_plugins', 'language', 'profiling']
    helpers_manager.ArgumentHelperManager.ParseOptions(
        options, self, names=names)

    self.list_analysis_plugins = self._analysis_plugins == 'list'
    self.list_language_identifiers = self._preferred_language == 'list'
    self.list_profilers = self._profilers == 'list'

    if (self.list_analysis_plugins or self.list_language_identifiers or
        self.list_profilers or self.list_timezones):
      return

    # Check output modules after the other listable options, otherwise
    # it could raise with "requires an output file".
    helpers_manager.ArgumentHelperManager.ParseOptions(
        options, self, names=['output_modules'])

    self.list_output_modules = self._output_format == 'list'
    if self.list_output_modules:
      return

    self._ParseInformationalOptions(options)

    helpers_manager.ArgumentHelperManager.ParseOptions(
        options, self, names=['data_location'])

    self._ParseLogFileOptions(options)

    self._ParseProcessingOptions(options)

    helpers_manager.ArgumentHelperManager.ParseOptions(
        options, self, names=['event_filters'])

    self._deduplicate_events = getattr(options, 'dedup', True)

    if self._data_location:
      # Update the data location with the calculated value.
      options.data_location = self._data_location
    else:
      logging.warning('Unable to automatically determine data location.')

    self._command_line_arguments = self.GetCommandLineArguments()

    helpers_manager.ArgumentHelperManager.ParseOptions(
        options, self, names=['storage_file'])

    # TODO: move check into _CheckStorageFile.
    if not self._storage_file_path:
      raise errors.BadConfigOption('Missing storage file option.')

    if not os.path.isfile(self._storage_file_path):
      raise errors.BadConfigOption(
          'No such storage file: {0:s}.'.format(self._storage_file_path))

    self._analysis_plugins = self._CreateAnalysisPlugins(options)
    self._output_module = self._CreateOutputModule(options)
Exemple #26
0
    def ExtractEventsFromSources(self):
        """Processes the sources and extracts events.

    Raises:
      BadConfigOption: if the storage file path is invalid or the storage
          format not supported.
      SourceScannerError: if the source scanner could not find a supported
          file system.
      UserAbort: if the user initiated an abort.
    """
        self._CheckStorageFile(self._storage_file_path,
                               warn_about_existing=True)

        scan_context = self.ScanSource(self._source_path)
        self._source_type = scan_context.source_type

        self._status_view.SetMode(self._status_view_mode)
        self._status_view.SetSourceInformation(self._source_path,
                                               self._source_type,
                                               filter_file=self._filter_file)

        status_update_callback = (
            self._status_view.GetExtractionStatusUpdateCallback())

        self._output_writer.Write('\n')
        self._status_view.PrintExtractionStatusHeader(None)
        self._output_writer.Write('Processing started.\n')

        session = engine.BaseEngine.CreateSession(
            command_line_arguments=self._command_line_arguments,
            debug_mode=self._debug_mode,
            filter_file=self._filter_file,
            preferred_encoding=self.preferred_encoding,
            preferred_time_zone=self._preferred_time_zone,
            preferred_year=self._preferred_year)

        storage_writer = storage_factory.StorageFactory.CreateStorageWriter(
            self._storage_format, session, self._storage_file_path)
        if not storage_writer:
            raise errors.BadConfigOption(
                'Unsupported storage format: {0:s}'.format(
                    self._storage_format))

        single_process_mode = self._single_process_mode
        if self._source_type == dfvfs_definitions.SOURCE_TYPE_FILE:
            # No need to multi process a single file source.
            single_process_mode = True

        if single_process_mode:
            extraction_engine = single_process_engine.SingleProcessEngine()
        else:
            extraction_engine = multi_process_engine.TaskMultiProcessEngine(
                use_zeromq=self._use_zeromq)

        # If the source is a directory or a storage media image
        # run pre-processing.
        if self._source_type in self._SOURCE_TYPES_TO_PREPROCESS:
            self._PreprocessSources(extraction_engine)

        configuration = self._CreateProcessingConfiguration(
            extraction_engine.knowledge_base)

        self._SetExtractionParsersAndPlugins(configuration, session)
        self._SetExtractionPreferredTimeZone(extraction_engine.knowledge_base)

        filter_find_specs = None
        if configuration.filter_file:
            environment_variables = (
                extraction_engine.knowledge_base.GetEnvironmentVariables())
            filter_file_object = filter_file.FilterFile(
                configuration.filter_file)
            filter_find_specs = filter_file_object.BuildFindSpecs(
                environment_variables=environment_variables)

        processing_status = None
        if single_process_mode:
            logging.debug('Starting extraction in single process mode.')

            processing_status = extraction_engine.ProcessSources(
                self._source_path_specs,
                storage_writer,
                self._resolver_context,
                configuration,
                filter_find_specs=filter_find_specs,
                status_update_callback=status_update_callback)

        else:
            logging.debug('Starting extraction in multi process mode.')

            processing_status = extraction_engine.ProcessSources(
                session.identifier,
                self._source_path_specs,
                storage_writer,
                configuration,
                enable_sigsegv_handler=self._enable_sigsegv_handler,
                filter_find_specs=filter_find_specs,
                number_of_worker_processes=self._number_of_extraction_workers,
                status_update_callback=status_update_callback,
                worker_memory_limit=self._worker_memory_limit)

        self._status_view.PrintExtractionSummary(processing_status)
Exemple #27
0
  def ParseOptions(self, options):
    """Parses the options.

    Args:
      options (argparse.Namespace): command line arguments.

    Raises:
      BadConfigOption: if the options are invalid.
    """
    # The extraction options are dependent on the data location.
    helpers_manager.ArgumentHelperManager.ParseOptions(
        options, self, names=['data_location'])

    # Check the list options first otherwise required options will raise.
    argument_helper_names = ['hashers', 'parsers', 'profiling']
    helpers_manager.ArgumentHelperManager.ParseOptions(
        options, self, names=argument_helper_names)

    self._ParseTimezoneOption(options)

    self.list_hashers = self._hasher_names_string == 'list'
    self.list_parsers_and_plugins = self._parser_filter_expression == 'list'
    self.list_profilers = self._profilers == 'list'

    self.show_info = getattr(options, 'show_info', False)

    if getattr(options, 'use_markdown', False):
      self._views_format_type = views.ViewsFactory.FORMAT_TYPE_MARKDOWN

    self.dependencies_check = getattr(options, 'dependencies_check', True)

    if (self.list_hashers or self.list_parsers_and_plugins or
        self.list_profilers or self.list_timezones or self.show_info):
      return

    self._ParseInformationalOptions(options)

    argument_helper_names = [
        'artifact_definitions', 'extraction', 'filter_file', 'status_view',
        'storage_file', 'storage_format', 'text_prepend']
    helpers_manager.ArgumentHelperManager.ParseOptions(
        options, self, names=argument_helper_names)

    self._ParseLogFileOptions(options)

    self._ParseStorageMediaOptions(options)

    self._ParsePerformanceOptions(options)
    self._ParseProcessingOptions(options)

    format_string = (
        '%(asctime)s [%(levelname)s] (%(processName)-10s) PID:%(process)d '
        '<%(module)s> %(message)s')

    if self._debug_mode:
      logging_level = logging.DEBUG
    elif self._quiet_mode:
      logging_level = logging.WARNING
    else:
      logging_level = logging.INFO

    self._ConfigureLogging(
        filename=self._log_file, format_string=format_string,
        log_level=logging_level)

    if self._debug_mode:
      log_filter = logging_filter.LoggingFilter()
      root_logger = logging.getLogger()
      root_logger.addFilter(log_filter)

    if not self._storage_file_path:
      raise errors.BadConfigOption('Missing storage file option.')

    serializer_format = getattr(
        options, 'serializer_format', definitions.SERIALIZER_FORMAT_JSON)
    if serializer_format not in definitions.SERIALIZER_FORMATS:
      raise errors.BadConfigOption(
          'Unsupported storage serializer format: {0:s}.'.format(
              serializer_format))
    self._storage_serializer_format = serializer_format

    # TODO: where is this defined?
    self._operating_system = getattr(options, 'os', None)

    if self._operating_system:
      self._mount_path = getattr(options, 'filename', None)

    helpers_manager.ArgumentHelperManager.ParseOptions(
        options, self, names=['status_view'])

    self._enable_sigsegv_handler = getattr(options, 'sigsegv_handler', False)
Exemple #28
0
    def ParseOptions(self, options):
        """Parses the options and initializes the front-end.

    Args:
      options: the command line arguments (instance of argparse.Namespace).
      source_option: optional name of the source option. The default is source.

    Raises:
      BadConfigOption: if the options are invalid.
    """
        super(ImageExportTool, self).ParseOptions(options)

        format_str = u'%(asctime)s [%(levelname)s] %(message)s'

        debug = getattr(options, u'debug', False)
        if debug:
            logging.basicConfig(level=logging.DEBUG, format=format_str)
        else:
            logging.basicConfig(level=logging.INFO, format=format_str)

        self._destination_path = getattr(options, u'path', u'export')

        filter_file = getattr(options, u'filter', None)
        if filter_file and not os.path.isfile(filter_file):
            raise errors.BadConfigOption(
                u'Unable to proceed, filter file: {0:s} does not exist.'.
                format(filter_file))

        self._filter_file = filter_file

        if (getattr(options, u'no_vss', False)
                or getattr(options, u'include_duplicates', False)):
            self._remove_duplicates = False

        # TODO: move data location code to a location shared with psort.
        data_location = getattr(options, u'data_location', None)
        if not data_location:
            # Determine if we are running from the source directory.
            data_location = os.path.dirname(__file__)
            data_location = os.path.dirname(data_location)
            data_location = os.path.join(data_location, u'data')

            if not os.path.exists(data_location):
                # Otherwise determine if there is shared plaso data location.
                data_location = os.path.join(sys.prefix, u'share', u'plaso')

            if not os.path.exists(data_location):
                logging.warning(
                    u'Unable to automatically determine data location.')
                data_location = None

        self._data_location = data_location

        date_filters = getattr(options, u'date_filters', None)
        try:
            self._front_end.ParseDateFilters(date_filters)
        except ValueError as exception:
            raise errors.BadConfigOption(exception)

        extensions_string = getattr(options, u'extensions_string', None)
        self._front_end.ParseExtensionsString(extensions_string)

        names_string = getattr(options, u'names_string', None)
        self._front_end.ParseNamesString(names_string)

        signature_identifiers = getattr(options, u'signature_identifiers',
                                        None)
        if signature_identifiers == u'list':
            self.list_signature_identifiers = True
        else:
            try:
                self._frontend.ParseSignatureIdentifiers(
                    self._data_location, signature_identifiers)
            except (IOError, ValueError) as exception:
                raise errors.BadConfigOption(exception)

        self.has_filters = self._frontend.HasFilters()
Exemple #29
0
    def _Extract(self,
                 source_path_specs,
                 destination_path,
                 output_writer,
                 artifact_filters,
                 filter_file,
                 artifact_definitions_path,
                 custom_artifacts_path,
                 skip_duplicates=True):
        """Extracts files.

    This method runs the file extraction process on the image and
    potentially on every VSS if that is wanted.

    Args:
      source_path_specs (list[dfvfs.PathSpec]): path specifications to extract.
      destination_path (str): path where the extracted files should be stored.
      output_writer (CLIOutputWriter): output writer.
      artifact_definitions_path (str): path to artifact definitions file.
      custom_artifacts_path (str): path to custom artifact definitions file.
      artifact_filters (list[str]): names of artifact definitions that are
          used for filtering file system and Windows Registry key paths.
      filter_file (str): path of the file that contains the filter file path
          filters.
      skip_duplicates (Optional[bool]): True if files with duplicate content
          should be skipped.

    Raises:
      BadConfigOption: if an invalid collection filter was specified.
    """
        extraction_engine = engine.BaseEngine()

        # If the source is a directory or a storage media image
        # run pre-processing.
        if self._source_type in self._SOURCE_TYPES_TO_PREPROCESS:
            self._PreprocessSources(extraction_engine)

        try:
            extraction_engine.BuildCollectionFilters(
                artifact_definitions_path, custom_artifacts_path,
                extraction_engine.knowledge_base, artifact_filters,
                filter_file)
        except errors.InvalidFilter as exception:
            raise errors.BadConfigOption(
                'Unable to build collection filters with error: {0!s}'.format(
                    exception))

        filters_helper = extraction_engine.collection_filters_helper

        excluded_find_specs = None
        included_find_specs = None
        if filters_helper:
            excluded_find_specs = filters_helper.excluded_file_system_find_specs
            included_find_specs = filters_helper.included_file_system_find_specs

        output_writer.Write('Extracting file entries.\n')
        path_spec_generator = self._path_spec_extractor.ExtractPathSpecs(
            source_path_specs,
            find_specs=included_find_specs,
            resolver_context=self._resolver_context)

        for path_spec in path_spec_generator:
            file_entry = path_spec_resolver.Resolver.OpenFileEntry(
                path_spec, resolver_context=self._resolver_context)

            if not file_entry:
                logger.warning(
                    'Unable to open file entry for path spec: {0:s}'.format(
                        path_spec.comparable))
                continue

            skip_file_entry = False
            for find_spec in excluded_find_specs or []:
                skip_file_entry = find_spec.CompareLocation(file_entry)
                if skip_file_entry:
                    break

            if skip_file_entry:
                logger.info(
                    'Skipped: {0:s} because of exclusion filter.'.format(
                        file_entry.path_spec.location))
                continue

            self._ExtractFileEntry(file_entry,
                                   destination_path,
                                   skip_duplicates=skip_duplicates)
Exemple #30
0
    def ProcessStorage(self):
        """Processes a plaso storage file.

    Raises:
      BadConfigOption: when a configuration parameter fails validation.
      RuntimeError: if a non-recoverable situation is encountered.
    """
        output_module = self._front_end.CreateOutputModule(
            self._output_format,
            preferred_encoding=self.preferred_encoding,
            timezone=self._timezone)

        if isinstance(output_module, output_interface.LinearOutputModule):
            if not self._output_filename:
                # TODO: Remove "no longer supported" after 1.5 release.
                raise errors.BadConfigOption((
                    u'Output format: {0:s} requires an output file, output to stdout '
                    u'is no longer supported.').format(self._output_format))

            if self._output_filename and os.path.exists(self._output_filename):
                raise errors.BadConfigOption(
                    (u'Output file already exists: {0:s}. Aborting.').format(
                        self._output_filename))

            output_file_object = open(self._output_filename, u'wb')
            output_writer = cli_tools.FileObjectOutputWriter(
                output_file_object)

            output_module.SetOutputWriter(output_writer)

        helpers_manager.ArgumentHelperManager.ParseOptions(
            self._options, output_module)

        # Check if there are parameters that have not been defined and need to
        # in order for the output module to continue. Prompt user to supply
        # those that may be missing.
        missing_parameters = output_module.GetMissingArguments()
        while missing_parameters:
            # TODO: refactor this.
            configuration_object = PsortOptions()
            setattr(configuration_object, u'output_format', output_module.NAME)
            for parameter in missing_parameters:
                value = self._PromptUserForInput(
                    u'Missing parameter {0:s} for output module'.format(
                        parameter))
                if value is None:
                    logging.warning(
                        u'Unable to set the missing parameter for: {0:s}'.
                        format(parameter))
                    continue

                setattr(configuration_object, parameter, value)

            helpers_manager.ArgumentHelperManager.ParseOptions(
                configuration_object, output_module)
            missing_parameters = output_module.GetMissingArguments()

        analysis_plugins = self._front_end.GetAnalysisPlugins(
            self._analysis_plugins)
        for analysis_plugin in analysis_plugins:
            helpers_manager.ArgumentHelperManager.ParseOptions(
                self._options, analysis_plugin)

        if self._status_view_mode == u'linear':
            status_update_callback = self._PrintStatusUpdateStream
        elif self._status_view_mode == u'window':
            status_update_callback = self._PrintStatusUpdate
        else:
            status_update_callback = None

        session = self._front_end.CreateSession(
            command_line_arguments=self._command_line_arguments,
            preferred_encoding=self.preferred_encoding)

        storage_reader = self._front_end.CreateStorageReader(
            self._storage_file_path)
        self._number_of_analysis_reports = (
            storage_reader.GetNumberOfAnalysisReports())
        storage_reader.Close()

        if analysis_plugins:
            storage_writer = self._front_end.CreateStorageWriter(
                session, self._storage_file_path)
            # TODO: handle errors.BadConfigOption

            self._front_end.AnalyzeEvents(
                storage_writer,
                analysis_plugins,
                status_update_callback=status_update_callback)

        counter = collections.Counter()
        if self._output_format != u'null':
            storage_reader = self._front_end.CreateStorageReader(
                self._storage_file_path)

            events_counter = self._front_end.ExportEvents(
                storage_reader,
                output_module,
                deduplicate_events=self._deduplicate_events,
                status_update_callback=status_update_callback,
                time_slice=self._time_slice,
                use_time_slicer=self._use_time_slicer)

            counter += events_counter

        for item, value in iter(session.analysis_reports_counter.items()):
            counter[item] = value

        if self._quiet_mode:
            return

        self._output_writer.Write(u'Processing completed.\n')

        table_view = cli_views.ViewsFactory.GetTableView(
            self._views_format_type, title=u'Counter')
        for element, count in counter.most_common():
            if not element:
                element = u'N/A'
            table_view.AddRow([element, count])
        table_view.Write(self._output_writer)

        storage_reader = self._front_end.CreateStorageReader(
            self._storage_file_path)
        self._PrintAnalysisReportsDetails(storage_reader)