Example #1
0
    def _BuildFindSpecsFromRegistrySourceKey(self, key_path):
        """Build find specifications from a Windows Registry source type.

    Args:
      key_path (str): Windows Registry key path defined by the source.

    Returns:
      list[dfwinreg.FindSpec]: find specifications for the Windows Registry
          source type.
    """
        find_specs = []
        for key_path_glob in path_helper.PathHelper.ExpandGlobStars(
                key_path, '\\'):
            logger.debug('building find spec from key path glob: {0:s}'.format(
                key_path_glob))

            key_path_glob_upper = key_path_glob.upper()
            if key_path_glob_upper.startswith(
                    'HKEY_LOCAL_MACHINE\\SYSTEM\\CURRENTCONTROLSET'):
                # Rewrite CurrentControlSet to ControlSet* for Windows NT.
                key_path_glob = 'HKEY_LOCAL_MACHINE\\System\\ControlSet*{0:s}'.format(
                    key_path_glob[43:])

            elif key_path_glob_upper.startswith('HKEY_USERS\\%%USERS.SID%%'):
                key_path_glob = 'HKEY_CURRENT_USER{0:s}'.format(
                    key_path_glob[26:])

            find_spec = registry_searcher.FindSpec(key_path_glob=key_path_glob)
            find_specs.append(find_spec)

        return find_specs
Example #2
0
  def BuildFindSpecs(self, artifact_filter_names, environment_variables=None):
    """Builds find specifications from artifact definitions.

    Args:
      artifact_filter_names (list[str]): names of artifact definitions that are
          used for filtering file system and Windows Registry key paths.
      environment_variables (Optional[list[EnvironmentVariableArtifact]]):
          environment variables.
    """
    find_specs = []
    for name in artifact_filter_names:
      definition = self._artifacts_registry.GetDefinitionByName(name)
      if not definition:
        logger.debug('undefined artifact definition: {0:s}'.format(name))
        continue

      logger.debug('building find spec from artifact definition: {0:s}'.format(
          name))
      artifact_find_specs = self._BuildFindSpecsFromArtifact(
          definition, environment_variables)
      find_specs.extend(artifact_find_specs)

    for find_spec in find_specs:
      if isinstance(find_spec, file_system_searcher.FindSpec):
        self.included_file_system_find_specs.append(find_spec)

      elif isinstance(find_spec, registry_searcher.FindSpec):
        self.registry_find_specs.append(find_spec)

      else:
        logger.warning('Unsupported find specification type: {0!s}'.format(
            type(find_spec)))
Example #3
0
    def _BuildFindSpecsFromRegistrySourceKey(self, key_path):
        """Build find specifications from a Windows Registry source type.

    Args:
      key_path (str): Windows Registry key path defined by the source.

    Returns:
      list[dfwinreg.FindSpec]: find specifications for the Windows Registry
          source type.
    """
        find_specs = []
        for key_path_glob in path_helper.PathHelper.ExpandRecursiveGlobs(
                key_path, '\\'):
            logger.debug('building find spec from key path glob: {0:s}'.format(
                key_path_glob))

            key_path_glob_upper = key_path_glob.upper()
            if key_path_glob_upper.startswith('HKEY_USERS\\%%USERS.SID%%'):
                key_path_glob = 'HKEY_CURRENT_USER{0:s}'.format(
                    key_path_glob[26:])

            find_spec = registry_searcher.FindSpec(key_path_glob=key_path_glob)
            find_specs.append(find_spec)

        return find_specs
Example #4
0
  def BuildFindSpecs(self, artifact_filter_names, environment_variables=None):
    """Builds find specifications from artifact definitions.

    Args:
      artifact_filter_names (list[str]): names of artifact definitions that are
          used for filtering file system and Windows Registry key paths.
      environment_variables (Optional[list[EnvironmentVariableArtifact]]):
          environment variables.
    """
    find_specs = []
    for name in artifact_filter_names:
      definition = self._artifacts_registry.GetDefinitionByName(name)
      if not definition:
        logger.debug('undefined artifact definition: {0:s}'.format(name))
        continue

      logger.debug('building find spec from artifact definition: {0:s}'.format(
          name))
      artifact_find_specs = self._BuildFindSpecsFromArtifact(
          definition, environment_variables)
      find_specs.extend(artifact_find_specs)

    for find_spec in find_specs:
      if isinstance(find_spec, file_system_searcher.FindSpec):
        self.included_file_system_find_specs.append(find_spec)

      elif isinstance(find_spec, registry_searcher.FindSpec):
        self.registry_find_specs.append(find_spec)

      else:
        logger.warning('Unsupported find specification type: {0:s}'.format(
            type(find_spec)))
Example #5
0
    def _ParseFileEntryWithParser(self,
                                  parser_mediator,
                                  parser,
                                  file_entry,
                                  file_object=None):
        """Parses a file entry with a specific parser.

    Args:
      parser_mediator (ParserMediator): parser mediator.
      parser (BaseParser): parser.
      file_entry (dfvfs.FileEntry): file entry.
      file_object (Optional[file]): file-like object to parse.
          If not set the parser will use the parser mediator to open
          the file entry's default data stream as a file-like object.

    Returns:
      int: parse result which is _PARSE_RESULT_FAILURE if the file entry
          could not be parsed, _PARSE_RESULT_SUCCESS if the file entry
          successfully was parsed or _PARSE_RESULT_UNSUPPORTED when
          UnableToParseFile was raised.

    Raises:
      TypeError: if parser object is not a supported parser type.
    """
        if not isinstance(parser, (parsers_interface.FileEntryParser,
                                   parsers_interface.FileObjectParser)):
            raise TypeError('Unsupported parser object type.')

        parser_mediator.ClearParserChain()

        parser_mediator.SampleStartTiming(parser.NAME)

        try:
            if isinstance(parser, parsers_interface.FileEntryParser):
                parser.Parse(parser_mediator)
            elif isinstance(parser, parsers_interface.FileObjectParser):
                parser.Parse(parser_mediator, file_object)
            result = self._PARSE_RESULT_SUCCESS

        # We catch IOError so we can determine the parser that generated the error.
        except (IOError, dfvfs_errors.BackEndError) as exception:
            display_name = parser_mediator.GetDisplayName(file_entry)
            logger.warning(
                '{0:s} unable to parse file: {1:s} with error: {2!s}'.format(
                    parser.NAME, display_name, exception))
            result = self._PARSE_RESULT_FAILURE

        except errors.UnableToParseFile as exception:
            display_name = parser_mediator.GetDisplayName(file_entry)
            logger.debug(
                '{0:s} unable to parse file: {1:s} with error: {2!s}'.format(
                    parser.NAME, display_name, exception))
            result = self._PARSE_RESULT_UNSUPPORTED

        finally:
            parser_mediator.SampleStopTiming(parser.NAME)
            parser_mediator.SampleMemoryUsage(parser.NAME)

        return result
Example #6
0
    def _ProcessCompressedStreamTypes(self, mediator, path_spec,
                                      type_indicators):
        """Processes a data stream containing compressed stream types such as: bz2.

    Args:
      mediator (ParserMediator): mediates the interactions between
          parsers and other components, such as storage and abort signals.
      path_spec (dfvfs.PathSpec): path specification.
      type_indicators(list[str]): dfVFS archive type indicators found in
          the data stream.
    """
        number_of_type_indicators = len(type_indicators)
        if number_of_type_indicators == 0:
            return

        self.processing_status = definitions.STATUS_INDICATOR_COLLECTING

        if number_of_type_indicators > 1:
            display_name = mediator.GetDisplayName()
            logger.debug(('Found multiple format type indicators: {0:s} for '
                          'compressed stream file: {1:s}').format(
                              type_indicators, display_name))

        for type_indicator in type_indicators:
            if type_indicator == dfvfs_definitions.TYPE_INDICATOR_BZIP2:
                compressed_stream_path_spec = path_spec_factory.Factory.NewPathSpec(
                    dfvfs_definitions.TYPE_INDICATOR_COMPRESSED_STREAM,
                    compression_method=dfvfs_definitions.
                    COMPRESSION_METHOD_BZIP2,
                    parent=path_spec)

            elif type_indicator == dfvfs_definitions.TYPE_INDICATOR_GZIP:
                compressed_stream_path_spec = path_spec_factory.Factory.NewPathSpec(
                    dfvfs_definitions.TYPE_INDICATOR_GZIP, parent=path_spec)

            elif type_indicator == dfvfs_definitions.TYPE_INDICATOR_XZ:
                compressed_stream_path_spec = path_spec_factory.Factory.NewPathSpec(
                    dfvfs_definitions.TYPE_INDICATOR_COMPRESSED_STREAM,
                    compression_method=dfvfs_definitions.COMPRESSION_METHOD_XZ,
                    parent=path_spec)

            else:
                compressed_stream_path_spec = None

                warning_message = (
                    'unsupported compressed stream format type indicators: '
                    '{0:s}').format(type_indicator)
                mediator.ProduceExtractionWarning(warning_message,
                                                  path_spec=path_spec)

            if compressed_stream_path_spec:
                event_source = event_sources.FileEntryEventSource(
                    path_spec=compressed_stream_path_spec)
                event_source.file_entry_type = dfvfs_definitions.FILE_ENTRY_TYPE_FILE
                mediator.ProduceEventSource(event_source)

                self.last_activity_timestamp = time.time()
Example #7
0
    def BuildFilterFindSpecs(cls,
                             artifact_definitions_path,
                             custom_artifacts_path,
                             knowledge_base_object,
                             artifact_filter_names=None,
                             filter_file_path=None):
        """Builds find specifications from artifacts or filter file if available.

    Args:
       artifact_definitions_path (str): path to artifact definitions file.
       custom_artifacts_path (str): path to custom artifact definitions file.
       knowledge_base_object (KnowledgeBase): knowledge base.
       artifact_filter_names (Optional[list[str]]): names of artifact
          definitions that are used for filtering file system and Windows
          Registry key paths.
       filter_file_path (Optional[str]): path of filter file.

    Returns:
      list[dfvfs.FindSpec]: find specifications for the file source type.

    Raises:
      InvalidFilter: if no valid FindSpecs are built.
    """
        environment_variables = knowledge_base_object.GetEnvironmentVariables()
        find_specs = None
        if artifact_filter_names:
            logger.debug(
                'building find specification based on artifacts: {0:s}'.format(
                    ', '.join(artifact_filter_names)))

            artifacts_registry_object = cls.BuildArtifactsRegistry(
                artifact_definitions_path, custom_artifacts_path)
            artifact_filters_object = (
                artifact_filters.ArtifactDefinitionsFilterHelper(
                    artifacts_registry_object, artifact_filter_names,
                    knowledge_base_object))
            artifact_filters_object.BuildFindSpecs(
                environment_variables=environment_variables)
            find_specs = knowledge_base_object.GetValue(
                artifact_filters_object.KNOWLEDGE_BASE_VALUE)[
                    artifact_types.TYPE_INDICATOR_FILE]

        elif filter_file_path:
            logger.debug(
                'building find specification based on filter file: {0:s}'.
                format(filter_file_path))

            filter_file_object = filter_file.FilterFile(filter_file_path)
            find_specs = filter_file_object.BuildFindSpecs(
                environment_variables=environment_variables)

        if (artifact_filter_names or filter_file_path) and not find_specs:
            raise errors.InvalidFilter(
                'Error processing filters, no valid specifications built.')

        return find_specs
Example #8
0
    def _ParseFileEntryWithParsers(self,
                                   parser_mediator,
                                   parser_names,
                                   file_entry,
                                   file_object=None):
        """Parses a file entry with a specific parsers.

    Args:
      parser_mediator (ParserMediator): parser mediator.
      parser_names (list[str]): names of parsers.
      file_entry (dfvfs.FileEntry): file entry.
      file_object (Optional[file]): file-like object to parse.
          If not set the parser will use the parser mediator to open
          the file entry's default data stream as a file-like object.

    Returns:
      int: parse result which is _PARSE_RESULT_FAILURE if the file entry
          could not be parsed, _PARSE_RESULT_SUCCESS if the file entry
          successfully was parsed or _PARSE_RESULT_UNSUPPORTED when
          UnableToParseFile was raised or no names of parser were provided.

    Raises:
      RuntimeError: if the parser object is missing.
    """
        parse_results = self._PARSE_RESULT_UNSUPPORTED
        for parser_name in parser_names:
            parser = self._parsers.get(parser_name, None)
            if not parser:
                raise RuntimeError(
                    'Parser object missing for parser: {0:s}'.format(
                        parser_name))

            if parser.FILTERS:
                if not self._CheckParserCanProcessFileEntry(
                        parser, file_entry):
                    parse_results = self._PARSE_RESULT_SUCCESS
                    continue

            display_name = parser_mediator.GetDisplayName(file_entry)
            logger.debug((
                '[ParseFileEntryWithParsers] parsing file: {0:s} with parser: '
                '{1:s}').format(display_name, parser_name))

            parse_result = self._ParseFileEntryWithParser(
                parser_mediator, parser, file_entry, file_object=file_object)

            if parse_result == self._PARSE_RESULT_FAILURE:
                return self._PARSE_RESULT_FAILURE

            if parse_result == self._PARSE_RESULT_SUCCESS:
                parse_results = self._PARSE_RESULT_SUCCESS

        return parse_results
Example #9
0
    def PopItem(self):
        """Pops an item off the queue.

    If no ZeroMQ socket has been created, one will be created the first
    time this method is called.

    Returns:
      object: item from the queue.

    Raises:
      KeyboardInterrupt: if the process is sent a KeyboardInterrupt while
          popping an item.
      QueueEmpty: if the queue is empty, and no item could be popped within the
          queue timeout.
      RuntimeError: if terminate event is missing.
      zmq.error.ZMQError: if an error occurs in ZeroMQ.
    """
        if not self._zmq_socket:
            self._CreateZMQSocket()

        if not self._terminate_event:
            raise RuntimeError('Missing terminate event.')

        logger.debug('Pop on {0:s} queue, port {1:d}'.format(
            self.name, self.port))

        last_retry_time = time.time() + self.timeout_seconds
        while not self._terminate_event.is_set():
            try:
                self._zmq_socket.send_pyobj(None)
                break

            except zmq.error.Again:
                # The existing socket is now out of sync, so we need to open a new one.
                self._CreateZMQSocket()
                if time.time() > last_retry_time:
                    logger.warning('{0:s} timeout requesting item'.format(
                        self.name))
                    raise errors.QueueEmpty

                continue

        while not self._terminate_event.is_set():
            try:
                return self._ReceiveItemOnActivity(self._zmq_socket)
            except errors.QueueEmpty:
                continue

            except KeyboardInterrupt:
                self.Close(abort=True)
                raise
Example #10
0
  def _ProcessCompressedStreamTypes(self, mediator, path_spec, type_indicators):
    """Processes a data stream containing compressed stream types such as: bz2.

    Args:
      mediator (ParserMediator): mediates the interactions between
          parsers and other components, such as storage and abort signals.
      path_spec (dfvfs.PathSpec): path specification.
      type_indicators(list[str]): dfVFS archive type indicators found in
          the data stream.
    """
    number_of_type_indicators = len(type_indicators)
    if number_of_type_indicators == 0:
      return

    self.processing_status = definitions.PROCESSING_STATUS_COLLECTING

    if number_of_type_indicators > 1:
      display_name = mediator.GetDisplayName()
      logger.debug((
          'Found multiple format type indicators: {0:s} for '
          'compressed stream file: {1:s}').format(
              type_indicators, display_name))

    for type_indicator in type_indicators:
      if type_indicator == dfvfs_definitions.TYPE_INDICATOR_BZIP2:
        compressed_stream_path_spec = path_spec_factory.Factory.NewPathSpec(
            dfvfs_definitions.TYPE_INDICATOR_COMPRESSED_STREAM,
            compression_method=dfvfs_definitions.COMPRESSION_METHOD_BZIP2,
            parent=path_spec)

      elif type_indicator == dfvfs_definitions.TYPE_INDICATOR_GZIP:
        compressed_stream_path_spec = path_spec_factory.Factory.NewPathSpec(
            dfvfs_definitions.TYPE_INDICATOR_GZIP, parent=path_spec)

      else:
        compressed_stream_path_spec = None

        error_message = (
            'unsupported compressed stream format type indicators: '
            '{0:s}').format(type_indicator)
        mediator.ProduceExtractionError(
            error_message, path_spec=path_spec)

      if compressed_stream_path_spec:
        event_source = event_sources.FileEntryEventSource(
            path_spec=compressed_stream_path_spec)
        event_source.file_entry_type = dfvfs_definitions.FILE_ENTRY_TYPE_FILE
        mediator.ProduceEventSource(event_source)

        self.last_activity_timestamp = time.time()
Example #11
0
    def _ZeroMQResponder(self, source_queue):
        """Listens for requests and replies to clients.

    Args:
      source_queue (Queue.queue): queue to use to pull items from.

    Raises:
      RuntimeError: if closed or terminate event is missing.
    """
        if not self._closed_event or not self._terminate_event:
            raise RuntimeError('Missing closed or terminate event.')

        logger.debug('{0:s} responder thread started'.format(self.name))

        item = None
        while not self._terminate_event.is_set():
            if not item:
                try:
                    if self._closed_event.is_set():
                        item = source_queue.get_nowait()
                    else:
                        item = source_queue.get(True,
                                                self._buffer_timeout_seconds)

                except Queue.Empty:
                    if self._closed_event.is_set():
                        break

                    continue

            try:
                # We need to receive a request before we can reply with the item.
                self._ReceiveItemOnActivity(self._zmq_socket)

            except errors.QueueEmpty:
                if self._closed_event.is_set() and self._queue.empty():
                    break

                continue

            sent_successfully = self._SendItem(self._zmq_socket, item)
            item = None
            if not sent_successfully:
                logger.error('Queue {0:s} unable to send item.'.format(
                    self.name))
                break

        logger.info('Queue {0:s} responder exiting.'.format(self.name))
        self._zmq_socket.close(self._linger_seconds)
Example #12
0
    def _AnalyzeDataStream(self, file_entry, data_stream_name, display_name,
                           event_data_stream):
        """Analyzes the contents of a specific data stream of a file entry.

    The results of the analyzers are set in the event data stream as
    attributes that are added to produced event objects. Note that some
    file systems allow directories to have data streams, such as NTFS.

    Args:
      file_entry (dfvfs.FileEntry): file entry whose data stream is to be
          analyzed.
      data_stream_name (str): name of the data stream.
      display_name (str): human readable representation of the file entry
          currently being analyzed.
      event_data_stream (EventDataStream): event data stream attribute
           container.

    Raises:
      RuntimeError: if the file-like object cannot be retrieved from
          the file entry.
    """
        logger.debug(
            '[AnalyzeDataStream] analyzing file: {0:s}'.format(display_name))

        if self._processing_profiler:
            self._processing_profiler.StartTiming('analyzing')

        try:
            file_object = file_entry.GetFileObject(
                data_stream_name=data_stream_name)
            if not file_object:
                raise RuntimeError(
                    ('Unable to retrieve file-like object for file entry: '
                     '{0:s}.').format(display_name))

            try:
                self._AnalyzeFileObject(file_object, display_name,
                                        event_data_stream)
            finally:
                file_object.close()

        finally:
            if self._processing_profiler:
                self._processing_profiler.StopTiming('analyzing')

        logger.debug(
            '[AnalyzeDataStream] completed analyzing file: {0:s}'.format(
                display_name))
Example #13
0
  def _ParseFileEntryWithParsers(
      self, parser_mediator, parser_names, file_entry, file_object=None):
    """Parses a file entry with a specific parsers.

    Args:
      parser_mediator (ParserMediator): parser mediator.
      parser_names (list[str]): names of parsers.
      file_entry (dfvfs.FileEntry): file entry.
      file_object (Optional[file]): file-like object to parse.
          If not set the parser will use the parser mediator to open
          the file entry's default data stream as a file-like object.

    Returns:
      int: parse result which is _PARSE_RESULT_FAILURE if the file entry
          could not be parsed, _PARSE_RESULT_SUCCESS if the file entry
          successfully was parsed or _PARSE_RESULT_UNSUPPORTED when
          UnableToParseFile was raised or no names of parser were provided.

    Raises:
      RuntimeError: if the parser object is missing.
    """
    parse_results = self._PARSE_RESULT_UNSUPPORTED
    for parser_name in parser_names:
      parser = self._parsers.get(parser_name, None)
      if not parser:
        raise RuntimeError(
            'Parser object missing for parser: {0:s}'.format(parser_name))

      if parser.FILTERS:
        if not self._CheckParserCanProcessFileEntry(parser, file_entry):
          parse_results = self._PARSE_RESULT_SUCCESS
          continue

      display_name = parser_mediator.GetDisplayName(file_entry)
      logger.debug((
          '[ParseFileEntryWithParsers] parsing file: {0:s} with parser: '
          '{1:s}').format(display_name, parser_name))

      parse_result = self._ParseFileEntryWithParser(
          parser_mediator, parser, file_entry, file_object=file_object)
      if parse_result == self._PARSE_RESULT_FAILURE:
        return self._PARSE_RESULT_FAILURE

      elif parse_result == self._PARSE_RESULT_SUCCESS:
        parse_results = self._PARSE_RESULT_SUCCESS

    return parse_results
Example #14
0
    def _AnalyzeDataStream(self, mediator, file_entry, data_stream_name):
        """Analyzes the contents of a specific data stream of a file entry.

    The results of the analyzers are set in the parser mediator as attributes
    that are added to produced event objects. Note that some file systems
    allow directories to have data streams, e.g. NTFS.

    Args:
      mediator (ParserMediator): mediates the interactions between
          parsers and other components, such as storage and abort signals.
      file_entry (dfvfs.FileEntry): file entry whose data stream is to be
          analyzed.
      data_stream_name (str): name of the data stream.

    Raises:
      RuntimeError: if the file-like object cannot be retrieved from
          the file entry.
    """
        display_name = mediator.GetDisplayName()
        logger.debug(
            '[AnalyzeDataStream] analyzing file: {0:s}'.format(display_name))

        if self._processing_profiler:
            self._processing_profiler.StartTiming('analyzing')

        try:
            file_object = file_entry.GetFileObject(
                data_stream_name=data_stream_name)
            if not file_object:
                raise RuntimeError(
                    ('Unable to retrieve file-like object for file entry: '
                     '{0:s}.').format(display_name))

            try:
                self._AnalyzeFileObject(mediator, file_object)
            finally:
                file_object.close()

        finally:
            if self._processing_profiler:
                self._processing_profiler.StopTiming('analyzing')

        logger.debug(
            '[AnalyzeDataStream] completed analyzing file: {0:s}'.format(
                display_name))
Example #15
0
    def Close(self, abort=False):
        """Closes the queue.

    Args:
      abort (Optional[bool]): whether the Close is the result of an abort
          condition. If True, queue contents may be lost.

    Raises:
      QueueAlreadyClosed: If the queue is not started, or has already been
          closed.
      RuntimeError: if closed or terminate event is missing.
    """
        if not self._closed_event or not self._terminate_event:
            raise RuntimeError('Missing closed or terminate event.')

        if not abort and self._closed_event.is_set():
            raise errors.QueueAlreadyClosed()

        self._closed_event.set()

        if abort:
            if not self._closed_event.is_set():
                logger.warning(
                    '{0:s} queue aborting. Contents may be lost.'.format(
                        self.name))

            # We can't determine whether a there might be an operation being performed
            # on the socket in a separate method or thread, so we'll signal that any
            # such operation should cease.
            self._terminate_event.set()

            self._linger_seconds = 0

            if self._zmq_thread:
                logger.debug('[{0:s}] Waiting for thread to exit.'.format(
                    self.name))
                self._zmq_thread.join(timeout=self.timeout_seconds)
                if self._zmq_thread.isAlive():
                    logger.error((
                        '{0:s} ZMQ responder thread did not exit within timeout'
                    ).format(self.name))
        else:
            logger.debug(
                '{0:s} queue closing, will linger for up to {1:d} seconds'.
                format(self.name, self._linger_seconds))
Example #16
0
  def _AnalyzeDataStream(self, mediator, file_entry, data_stream_name):
    """Analyzes the contents of a specific data stream of a file entry.

    The results of the analyzers are set in the parser mediator as attributes
    that are added to produced event objects. Note that some file systems
    allow directories to have data streams, e.g. NTFS.

    Args:
      mediator (ParserMediator): mediates the interactions between
          parsers and other components, such as storage and abort signals.
      file_entry (dfvfs.FileEntry): file entry whose data stream is to be
          analyzed.
      data_stream_name (str): name of the data stream.

    Raises:
      RuntimeError: if the file-like object cannot be retrieved from
          the file entry.
    """
    display_name = mediator.GetDisplayName()
    logger.debug('[AnalyzeDataStream] analyzing file: {0:s}'.format(
        display_name))

    if self._processing_profiler:
      self._processing_profiler.StartTiming('analyzing')

    try:
      file_object = file_entry.GetFileObject(data_stream_name=data_stream_name)
      if not file_object:
        raise RuntimeError((
            'Unable to retrieve file-like object for file entry: '
            '{0:s}.').format(display_name))

      try:
        self._AnalyzeFileObject(mediator, file_object)
      finally:
        file_object.close()

    finally:
      if self._processing_profiler:
        self._processing_profiler.StopTiming('analyzing')

    logger.debug(
        '[AnalyzeDataStream] completed analyzing file: {0:s}'.format(
            display_name))
Example #17
0
    def _InitializeParserObjects(self, parser_filter_expression=None):
        """Initializes the parser objects.

    Args:
      parser_filter_expression (Optional[str]): the parser filter expression,
          None represents all parsers and plugins.

          The parser filter expression is a comma separated value string that
          denotes a list of parser names to include and/or exclude. Each entry
          can have the value of:

          * An exact match of a list of parsers, or a preset (see
            plaso/parsers/presets.py for a full list of available presets).
          * A name of a single parser (case insensitive), e.g. msiecf.
          * A glob name for a single parser, e.g. '*msie*' (case insensitive).
    """
        self._specification_store, non_sigscan_parser_names = (
            parsers_manager.ParsersManager.GetSpecificationStore(
                parser_filter_expression=parser_filter_expression))

        self._non_sigscan_parser_names = []
        for parser_name in non_sigscan_parser_names:
            if parser_name in ('filestat', 'usnjrnl'):
                continue
            self._non_sigscan_parser_names.append(parser_name)

        self._file_scanner = parsers_manager.ParsersManager.GetScanner(
            self._specification_store)

        self._parsers = parsers_manager.ParsersManager.GetParserObjects(
            parser_filter_expression=parser_filter_expression)

        active_parser_names = ', '.join(sorted(self._parsers.keys()))
        logger.debug('Active parsers: {0:s}'.format(active_parser_names))

        self._filestat_parser = self._parsers.get('filestat', None)
        if 'filestat' in self._parsers:
            del self._parsers['filestat']

        self._mft_parser = self._parsers.get('mft', None)

        self._usnjrnl_parser = self._parsers.get('usnjrnl', None)
        if 'usnjrnl' in self._parsers:
            del self._parsers['usnjrnl']
Example #18
0
    def PushItem(self, item, block=True):
        """Push an item on to the queue.

    If no ZeroMQ socket has been created, one will be created the first time
    this method is called.

    Args:
      item (object): item to push on the queue.
      block (Optional[bool]): whether the push should be performed in blocking
          or non-block mode.

    Raises:
      KeyboardInterrupt: if the process is sent a KeyboardInterrupt while
          pushing an item.
      QueueFull: if it was not possible to push the item to the queue
          within the timeout.
      RuntimeError: if terminate event is missing.
      zmq.error.ZMQError: if a ZeroMQ specific error occurs.
    """
        if not self._zmq_socket:
            self._CreateZMQSocket()

        if not self._terminate_event:
            raise RuntimeError('Missing terminate event.')

        logger.debug('Push on {0:s} queue, port {1:d}'.format(
            self.name, self.port))

        last_retry_timestamp = time.time() + self.timeout_seconds
        while not self._terminate_event.is_set():
            try:
                send_successful = self._SendItem(self._zmq_socket, item, block)
                if send_successful:
                    break

                if time.time() > last_retry_timestamp:
                    logger.error('{0:s} unable to push item, raising.'.format(
                        self.name))
                    raise errors.QueueFull

            except KeyboardInterrupt:
                self.Close(abort=True)
                raise
Example #19
0
    def BuildFindSpecs(self, environment_variables=None):
        """Builds find specifications from artifact definitions.

    The resulting find specifications are set in the knowledge base.

    Args:
      environment_variables (Optional[list[EnvironmentVariableArtifact]]):
          environment variables.
    """
        find_specs = []
        for name in self._artifacts:
            definition = self._artifacts_registry.GetDefinitionByName(name)
            if not definition:
                logger.debug(
                    'undefined artifact definition: {0:s}'.format(name))
                continue

            logger.debug(
                'building find spec from artifact definition: {0:s}'.format(
                    name))
            artifact_find_specs = self._BuildFindSpecsFromArtifact(
                definition, environment_variables)
            find_specs.extend(artifact_find_specs)

        find_specs_per_source_type = defaultdict(list)
        for find_spec in find_specs:
            if isinstance(find_spec, registry_searcher.FindSpec):
                artifact_list = find_specs_per_source_type[
                    artifact_types.TYPE_INDICATOR_WINDOWS_REGISTRY_KEY]
                artifact_list.append(find_spec)
                continue

            if isinstance(find_spec, file_system_searcher.FindSpec):
                artifact_list = find_specs_per_source_type[
                    artifact_types.TYPE_INDICATOR_FILE]
                artifact_list.append(find_spec)
                continue

            logger.warning('Unknown find specification type: {0:s}'.format(
                type(find_spec)))

        self._knowledge_base.SetValue(self.KNOWLEDGE_BASE_VALUE,
                                      find_specs_per_source_type)
Example #20
0
  def _InitializeParserObjects(self, parser_filter_expression=None):
    """Initializes the parser objects.

    Args:
      parser_filter_expression (Optional[str]): the parser filter expression,
          None represents all parsers and plugins.

          The parser filter expression is a comma separated value string that
          denotes a list of parser names to include and/or exclude. Each entry
          can have the value of:

          * An exact match of a list of parsers, or a preset (see
            plaso/parsers/presets.py for a full list of available presets).
          * A name of a single parser (case insensitive), e.g. msiecf.
          * A glob name for a single parser, e.g. '*msie*' (case insensitive).
    """
    self._formats_with_signatures, non_sigscan_parser_names = (
        parsers_manager.ParsersManager.GetFormatsWithSignatures(
            parser_filter_expression=parser_filter_expression))

    self._non_sigscan_parser_names = []
    for parser_name in non_sigscan_parser_names:
      if parser_name not in ('filestat', 'usnjrnl'):
        self._non_sigscan_parser_names.append(parser_name)

    self._file_scanner = parsers_manager.ParsersManager.CreateSignatureScanner(
        self._formats_with_signatures)

    self._parsers = parsers_manager.ParsersManager.GetParserObjects(
        parser_filter_expression=parser_filter_expression)

    active_parser_names = ', '.join(sorted(self._parsers.keys()))
    logger.debug('Active parsers: {0:s}'.format(active_parser_names))

    self._filestat_parser = self._parsers.get('filestat', None)
    if 'filestat' in self._parsers:
      del self._parsers['filestat']

    self._mft_parser = self._parsers.get('mft', None)

    self._usnjrnl_parser = self._parsers.get('usnjrnl', None)
    if 'usnjrnl' in self._parsers:
      del self._parsers['usnjrnl']
Example #21
0
    def _InitializeParserObjects(self, parser_filter_expression=None):
        """Initializes the parser objects.

    Args:
      parser_filter_expression (Optional[str]): parser filter expression,
          where None represents all parsers and plugins.

          The parser filter expression is a comma separated value string that
          denotes a list of parser names to include and/or exclude. Each entry
          can have the value of:

          * A name of a single parser (case insensitive), such as msiecf.
          * A glob name for a single parser, such as '*msie*' (case
            insensitive).
    """
        self._formats_with_signatures, non_sigscan_parser_names = (
            parsers_manager.ParsersManager.GetFormatsWithSignatures(
                parser_filter_expression=parser_filter_expression))

        self._non_sigscan_parser_names = []
        for parser_name in non_sigscan_parser_names:
            if parser_name not in ('filestat', 'usnjrnl'):
                self._non_sigscan_parser_names.append(parser_name)

        self._file_scanner = parsers_manager.ParsersManager.CreateSignatureScanner(
            self._formats_with_signatures)

        self._parsers = parsers_manager.ParsersManager.GetParserObjects(
            parser_filter_expression=parser_filter_expression)

        active_parser_names = ', '.join(sorted(self._parsers.keys()))
        logger.debug('Active parsers: {0:s}'.format(active_parser_names))

        self._filestat_parser = self._parsers.get('filestat', None)
        if 'filestat' in self._parsers:
            del self._parsers['filestat']

        self._mft_parser = self._parsers.get('mft', None)

        self._usnjrnl_parser = self._parsers.get('usnjrnl', None)
        if 'usnjrnl' in self._parsers:
            del self._parsers['usnjrnl']
Example #22
0
    def _ParserFileEntryWithParsers(self,
                                    parser_mediator,
                                    parser_names,
                                    file_entry,
                                    file_object=None):
        """Parses a file entry with a specific parsers.

    Args:
      parser_mediator (ParserMediator): parser mediator.
      parser_names (list[str]): names of parsers.
      file_entry (dfvfs.FileEntry): file entry.
      file_object (Optional[file]): file-like object to parse.
          If not set the parser will use the parser mediator to open
          the file entry's default data stream as a file-like object.

    Returns:
      bool: False if the file could not be parsed and UnableToParseFile
          was raised.

    Raises:
      RuntimeError: if the parser object is missing.
    """
        for parser_name in parser_names:
            parser = self._parsers.get(parser_name, None)
            if not parser:
                raise RuntimeError(
                    'Parser object missing for parser: {0:s}'.format(
                        parser_name))

            if parser.FILTERS:
                if not self._CheckParserCanProcessFileEntry(
                        parser, file_entry):
                    continue

            display_name = parser_mediator.GetDisplayName(file_entry)
            logger.debug(('[ParseDataStream] parsing file: {0:s} with parser: '
                          '{1:s}').format(display_name, parser_name))

            self._ParseFileEntryWithParser(parser_mediator,
                                           parser,
                                           file_entry,
                                           file_object=file_object)
Example #23
0
    def _CreateZMQSocket(self):
        """Creates a ZeroMQ socket."""
        logger.debug('Creating socket for {0:s}'.format(self.name))

        if not self._zmq_context:
            self._zmq_context = zmq.Context()

        # The terminate and close threading events need to be created when the
        # socket is opened. Threading events are unpickleable objects and cannot
        # passed in multiprocessing on Windows.

        if not self._terminate_event:
            self._terminate_event = threading.Event()

        if not self._closed_event:
            self._closed_event = threading.Event()

        if self._zmq_socket:
            logger.debug('Closing old socket for {0:s}'.format(self.name))
            self._zmq_socket.close()
            self._zmq_socket = None

        self._zmq_socket = self._zmq_context.socket(self._SOCKET_TYPE)
        self._SetSocketTimeouts()
        self._SetSocketHighWaterMark()

        if self.port:
            address = '{0:s}:{1:d}'.format(self._SOCKET_ADDRESS, self.port)
            if self.SOCKET_CONNECTION_TYPE == self.SOCKET_CONNECTION_CONNECT:
                self._zmq_socket.connect(address)
                logger.debug('{0:s} connected to {1:s}'.format(
                    self.name, address))
            else:
                self._zmq_socket.bind(address)
                logger.debug('{0:s} bound to specified port {1:s}'.format(
                    self.name, address))
        else:
            self.port = self._zmq_socket.bind_to_random_port(
                self._SOCKET_ADDRESS)
            logger.debug('{0:s} bound to random port {1:d}'.format(
                self.name, self.port))
Example #24
0
    def _SendItem(self, zmq_socket, item, block=True):
        """Attempts to send an item to a ZeroMQ socket.

    Args:
      zmq_socket (zmq.Socket): used to the send the item.
      item (object): sent on the queue. Will be pickled prior to sending.

    Returns:
      bool: whether the item was sent successfully.
    """
        try:
            logger.debug('{0:s} sending item'.format(self.name))
            if block:
                zmq_socket.send_pyobj(item)
            else:
                zmq_socket.send_pyobj(item, zmq.DONTWAIT)
            logger.debug('{0:s} sent item'.format(self.name))
            return True

        except zmq.error.Again:
            logger.debug('{0:s} could not send an item'.format(self.name))

        except zmq.error.ZMQError as exception:
            if exception.errno == errno.EINTR:
                logger.error('ZMQ syscall interrupted in {0:s}.'.format(
                    self.name))

        return False
Example #25
0
    def PopItem(self):
        """Pops an item off the queue.

    If no ZeroMQ socket has been created, one will be created the first
    time this method is called.

    Returns:
      object: item from the queue.

    Raises:
      KeyboardInterrupt: if the process is sent a KeyboardInterrupt while
          popping an item.
      QueueEmpty: if the queue is empty, and no item could be popped within the
          queue timeout.
      RuntimeError: if closed or terminate event is missing.
      zmq.error.ZMQError: if a ZeroMQ error occurs.
    """
        if not self._zmq_socket:
            self._CreateZMQSocket()

        if not self._closed_event or not self._terminate_event:
            raise RuntimeError('Missing closed or terminate event.')

        logger.debug('Pop on {0:s} queue, port {1:d}'.format(
            self.name, self.port))

        last_retry_timestamp = time.time() + self.timeout_seconds
        while not self._closed_event.is_set(
        ) or not self._terminate_event.is_set():
            try:
                return self._ReceiveItemOnActivity(self._zmq_socket)

            except errors.QueueEmpty:
                if time.time() > last_retry_timestamp:
                    raise

            except KeyboardInterrupt:
                self.Close(abort=True)
                raise
Example #26
0
    def _ProcessFileEntry(self, mediator, file_entry):
        """Processes a file entry.

    Args:
      mediator (ParserMediator): mediates the interactions between
          parsers and other components, such as storage and abort signals.
      file_entry (dfvfs.FileEntry): file entry.
    """
        display_name = mediator.GetDisplayName()
        logger.debug('[ProcessFileEntry] processing file entry: {0:s}'.format(
            display_name))

        reference_count = mediator.resolver_context.GetFileObjectReferenceCount(
            file_entry.path_spec)

        try:
            if self._IsMetadataFile(file_entry):
                self._ProcessMetadataFile(mediator, file_entry)

            else:
                file_entry_processed = False
                for data_stream in file_entry.data_streams:
                    if self._abort:
                        break

                    if self._CanSkipDataStream(file_entry, data_stream):
                        logger.debug(
                            ('[ProcessFileEntry] Skipping datastream {0:s} '
                             'for {1:s}: {2:s}').format(
                                 data_stream.name, file_entry.type,
                                 display_name))
                        continue

                    self._ProcessFileEntryDataStream(mediator, file_entry,
                                                     data_stream)

                    file_entry_processed = True

                if not file_entry_processed:
                    # For when the file entry does not contain a data stream.
                    self._ProcessFileEntryDataStream(mediator, file_entry,
                                                     None)

        finally:
            new_reference_count = (
                mediator.resolver_context.GetFileObjectReferenceCount(
                    file_entry.path_spec))
            if reference_count != new_reference_count:
                # Clean up after parsers that do not call close explicitly.
                if mediator.resolver_context.ForceRemoveFileObject(
                        file_entry.path_spec):
                    logger.warning(
                        'File-object not explicitly closed for file: {0:s}'.
                        format(display_name))

        logger.debug(
            '[ProcessFileEntry] done processing file entry: {0:s}'.format(
                display_name))
Example #27
0
    def _InitializeParserObjects(self, parser_filter_expression=None):
        """Initializes the parser objects.

    Args:
      parser_filter_expression (Optional[str]): parser filter expression,
          where None represents all parsers and plugins.

          A parser filter expression is a comma separated value string that
          denotes which parsers and plugins should be used. See
          filters/parser_filter.py for details of the expression syntax.
    """
        self._formats_with_signatures, non_sigscan_parser_names = (
            parsers_manager.ParsersManager.GetFormatsWithSignatures(
                parser_filter_expression=parser_filter_expression))

        self._non_sigscan_parser_names = []
        for parser_name in non_sigscan_parser_names:
            if parser_name not in ('filestat', 'usnjrnl'):
                self._non_sigscan_parser_names.append(parser_name)

        self._file_scanner = parsers_manager.ParsersManager.CreateSignatureScanner(
            self._formats_with_signatures)

        self._parsers = parsers_manager.ParsersManager.GetParserObjects(
            parser_filter_expression=parser_filter_expression)

        active_parser_names = ', '.join(sorted(self._parsers.keys()))
        logger.debug('Active parsers: {0:s}'.format(active_parser_names))

        self._filestat_parser = self._parsers.get('filestat', None)
        if 'filestat' in self._parsers:
            del self._parsers['filestat']

        self._mft_parser = self._parsers.get('mft', None)

        self._usnjrnl_parser = self._parsers.get('usnjrnl', None)
        if 'usnjrnl' in self._parsers:
            del self._parsers['usnjrnl']
Example #28
0
  def _ExtractMetadataFromFileEntry(self, mediator, file_entry, data_stream):
    """Extracts metadata from a file entry.

    Args:
      mediator (ParserMediator): mediates the interactions between
          parsers and other components, such as storage and abort signals.
      file_entry (dfvfs.FileEntry): file entry to extract metadata from.
      data_stream (dfvfs.DataStream): data stream or None if the file entry
          has no data stream.
    """
    # Do not extract metadata from the root file entry when it is virtual.
    if file_entry.IsRoot() and file_entry.type_indicator not in (
        self._TYPES_WITH_ROOT_METADATA):
      return

    # We always want to extract the file entry metadata but we only want
    # to parse it once per file entry, so we only use it if we are
    # processing the default data stream of regular files.
    if data_stream and not data_stream.IsDefault():
      return

    display_name = mediator.GetDisplayName()
    logger.debug(
        '[ExtractMetadataFromFileEntry] processing file entry: {0:s}'.format(
            display_name))

    self.processing_status = definitions.PROCESSING_STATUS_EXTRACTING

    if self._processing_profiler:
      self._processing_profiler.StartTiming('extracting')

    self._event_extractor.ParseFileEntryMetadata(mediator, file_entry)

    if self._processing_profiler:
      self._processing_profiler.StopTiming('extracting')

    self.processing_status = definitions.PROCESSING_STATUS_RUNNING
Example #29
0
    def _ExtractMetadataFromFileEntry(self, mediator, file_entry, data_stream):
        """Extracts metadata from a file entry.

    Args:
      mediator (ParserMediator): mediates the interactions between
          parsers and other components, such as storage and abort signals.
      file_entry (dfvfs.FileEntry): file entry to extract metadata from.
      data_stream (dfvfs.DataStream): data stream or None if the file entry
          has no data stream.
    """
        # Do not extract metadata from the root file entry when it is virtual.
        if file_entry.IsRoot() and file_entry.type_indicator not in (
                self._TYPES_WITH_ROOT_METADATA):
            return

        # We always want to extract the file entry metadata but we only want
        # to parse it once per file entry, so we only use it if we are
        # processing the default data stream of regular files.
        if data_stream and not data_stream.IsDefault():
            return

        display_name = mediator.GetDisplayName()
        logger.debug(
            '[ExtractMetadataFromFileEntry] processing file entry: {0:s}'.
            format(display_name))

        self.processing_status = definitions.PROCESSING_STATUS_EXTRACTING

        if self._processing_profiler:
            self._processing_profiler.StartTiming('extracting')

        self._event_extractor.ParseFileEntryMetadata(mediator, file_entry)

        if self._processing_profiler:
            self._processing_profiler.StopTiming('extracting')

        self.processing_status = definitions.PROCESSING_STATUS_RUNNING
Example #30
0
  def _BuildFindSpecsFromRegistrySourceKey(self, key_path):
    """Build find specifications from a Windows Registry source type.

    Args:
      key_path (str): Windows Registry key path defined by the source.

    Returns:
      list[dfwinreg.FindSpec]: find specifications for the Windows Registry
          source type.
    """
    find_specs = []
    for key_path_glob in path_helper.PathHelper.ExpandRecursiveGlobs(
        key_path, '\\'):
      logger.debug('building find spec from key path glob: {0:s}'.format(
          key_path_glob))

      key_path_glob_upper = key_path_glob.upper()
      if key_path_glob_upper.startswith('HKEY_USERS\\%%USERS.SID%%'):
        key_path_glob = 'HKEY_CURRENT_USER{0:s}'.format(key_path_glob[26:])

      find_spec = registry_searcher.FindSpec(key_path_glob=key_path_glob)
      find_specs.append(find_spec)

    return find_specs
Example #31
0
  def _ProcessFileEntry(self, mediator, file_entry):
    """Processes a file entry.

    Args:
      mediator (ParserMediator): mediates the interactions between
          parsers and other components, such as storage and abort signals.
      file_entry (dfvfs.FileEntry): file entry.
    """
    display_name = mediator.GetDisplayName()
    logger.debug(
        '[ProcessFileEntry] processing file entry: {0:s}'.format(display_name))

    reference_count = mediator.resolver_context.GetFileObjectReferenceCount(
        file_entry.path_spec)

    try:
      if self._IsMetadataFile(file_entry):
        self._ProcessMetadataFile(mediator, file_entry)

      else:
        file_entry_processed = False
        for data_stream in file_entry.data_streams:
          if self._abort:
            break

          if self._CanSkipDataStream(file_entry, data_stream):
            logger.debug((
                '[ProcessFileEntry] Skipping datastream {0:s} for {1:s}: '
                '{2:s}').format(
                    data_stream.name, file_entry.type_indicator, display_name))
            continue

          self._ProcessFileEntryDataStream(mediator, file_entry, data_stream)

          file_entry_processed = True

        if not file_entry_processed:
          # For when the file entry does not contain a data stream.
          self._ProcessFileEntryDataStream(mediator, file_entry, None)

    finally:
      new_reference_count = (
          mediator.resolver_context.GetFileObjectReferenceCount(
              file_entry.path_spec))
      if reference_count != new_reference_count:
        # Clean up after parsers that do not call close explicitly.
        if mediator.resolver_context.ForceRemoveFileObject(
            file_entry.path_spec):
          logger.warning(
              'File-object not explicitly closed for file: {0:s}'.format(
                  display_name))

    logger.debug(
        '[ProcessFileEntry] done processing file entry: {0:s}'.format(
            display_name))
Example #32
0
    def _BuildFindSpecsFromFileSourcePath(self, source_path, path_separator,
                                          environment_variables,
                                          user_accounts):
        """Builds find specifications from a file source type.

    Args:
      source_path (str): file system path defined by the source.
      path_separator (str): file system path segment separator.
      environment_variables (list[str]): environment variable attributes used to
          dynamically populate environment variables in key.
      user_accounts (list[str]): identified user accounts stored in the
          knowledge base.

    Returns:
      list[dfvfs.FindSpec]: find specifications for the file source type.
    """
        find_specs = []
        for path_glob in path_helper.PathHelper.ExpandGlobStars(
                source_path, path_separator):
            logger.debug(
                'building find spec from path glob: {0:s}'.format(path_glob))

            for path in path_helper.PathHelper.ExpandUsersVariablePath(
                    path_glob, path_separator, user_accounts):
                logger.debug(
                    'building find spec from path: {0:s}'.format(path))

                if '%' in path:
                    path = path_helper.PathHelper.ExpandWindowsPath(
                        path, environment_variables)
                    logger.debug(
                        'building find spec from expanded path: {0:s}'.format(
                            path))

                if not path.startswith(path_separator):
                    logger.warning((
                        'The path filter must be defined as an absolute path: '
                        '"{0:s}"').format(path))
                    continue

                try:
                    find_spec = file_system_searcher.FindSpec(
                        case_sensitive=False,
                        location_glob=path,
                        location_separator=path_separator)
                except ValueError as exception:
                    logger.error((
                        'Unable to build find specification for path: "{0:s}" with '
                        'error: {1!s}').format(path, exception))
                    continue

                find_specs.append(find_spec)

        return find_specs
Example #33
0
    def _ProcessFileEntry(self, mediator, file_entry):
        """Processes a file entry.

    Args:
      mediator (ParserMediator): mediates the interactions between
          parsers and other components, such as storage and abort signals.
      file_entry (dfvfs.FileEntry): file entry.
    """
        display_name = mediator.GetDisplayName()
        logger.debug('[ProcessFileEntry] processing file entry: {0:s}'.format(
            display_name))

        if self._IsMetadataFile(file_entry):
            self._ProcessMetadataFile(mediator, file_entry)

        else:
            file_entry_processed = False
            for data_stream in file_entry.data_streams:
                if self._abort:
                    break

                if self._CanSkipDataStream(file_entry, data_stream):
                    logger.debug((
                        '[ProcessFileEntry] Skipping datastream {0:s} for {1:s}: '
                        '{2:s}').format(data_stream.name,
                                        file_entry.type_indicator,
                                        display_name))
                    continue

                self._ProcessFileEntryDataStream(mediator, file_entry,
                                                 data_stream)

                file_entry_processed = True

            if not file_entry_processed:
                # For when the file entry does not contain a data stream.
                self._ProcessFileEntryDataStream(mediator, file_entry, None)

        logger.debug(
            '[ProcessFileEntry] done processing file entry: {0:s}'.format(
                display_name))
Example #34
0
  def _BuildFindSpecsFromFileSourcePath(
      self, source_path, path_separator, environment_variables, user_accounts):
    """Builds find specifications from a file source type.

    Args:
      source_path (str): file system path defined by the source.
      path_separator (str): file system path segment separator.
      environment_variables (list[str]): environment variable attributes used to
          dynamically populate environment variables in key.
      user_accounts (list[str]): identified user accounts stored in the
          knowledge base.

    Returns:
      list[dfvfs.FindSpec]: find specifications for the file source type.
    """
    find_specs = []
    for path_glob in path_helper.PathHelper.ExpandRecursiveGlobs(
        source_path, path_separator):
      logger.debug('building find spec from path glob: {0:s}'.format(
          path_glob))

      for path in path_helper.PathHelper.ExpandUsersVariablePath(
          path_glob, path_separator, user_accounts):
        logger.debug('building find spec from path: {0:s}'.format(path))

        if '%' in path:
          path = path_helper.PathHelper.ExpandWindowsPath(
              path, environment_variables)
          logger.debug('building find spec from expanded path: {0:s}'.format(
              path))

        if not path.startswith(path_separator):
          logger.warning((
              'The path filter must be defined as an absolute path: '
              '"{0:s}"').format(path))
          continue

        # Convert the path filters into a list of path segments and
        # strip the root path segment.
        path_segments = path.split(path_separator)

        # Remove initial root entry
        path_segments.pop(0)

        if not path_segments[-1]:
          logger.warning(
              'Empty last path segment in path filter: "{0:s}"'.format(path))
          path_segments.pop(-1)

        try:
          find_spec = file_system_searcher.FindSpec(
              location_glob=path_segments, case_sensitive=False)
        except ValueError as exception:
          logger.error((
              'Unable to build find specification for path: "{0:s}" with '
              'error: {1!s}').format(path, exception))
          continue

        find_specs.append(find_spec)

    return find_specs
Example #35
0
  def _ProcessArchiveTypes(self, mediator, path_spec, type_indicators):
    """Processes a data stream containing archive types such as: TAR or ZIP.

    Args:
      mediator (ParserMediator): mediates the interactions between
          parsers and other components, such as storage and abort signals.
      path_spec (dfvfs.PathSpec): path specification.
      type_indicators(list[str]): dfVFS archive type indicators found in
          the data stream.
    """
    number_of_type_indicators = len(type_indicators)
    if number_of_type_indicators == 0:
      return

    self.processing_status = definitions.PROCESSING_STATUS_COLLECTING

    if number_of_type_indicators > 1:
      display_name = mediator.GetDisplayName()
      logger.debug((
          'Found multiple format type indicators: {0:s} for '
          'archive file: {1:s}').format(type_indicators, display_name))

    for type_indicator in type_indicators:
      if type_indicator == dfvfs_definitions.TYPE_INDICATOR_TAR:
        archive_path_spec = path_spec_factory.Factory.NewPathSpec(
            dfvfs_definitions.TYPE_INDICATOR_TAR, location='/',
            parent=path_spec)

      elif type_indicator == dfvfs_definitions.TYPE_INDICATOR_ZIP:
        archive_path_spec = path_spec_factory.Factory.NewPathSpec(
            dfvfs_definitions.TYPE_INDICATOR_ZIP, location='/',
            parent=path_spec)

      else:
        archive_path_spec = None

        error_message = (
            'unsupported archive format type indicator: {0:s}').format(
                type_indicator)
        mediator.ProduceExtractionError(
            error_message, path_spec=path_spec)

      if archive_path_spec:
        try:
          path_spec_generator = self._path_spec_extractor.ExtractPathSpecs(
              [archive_path_spec], resolver_context=mediator.resolver_context)

          for generated_path_spec in path_spec_generator:
            if self._abort:
              break

            event_source = event_sources.FileEntryEventSource(
                path_spec=generated_path_spec)
            event_source.file_entry_type = (
                dfvfs_definitions.FILE_ENTRY_TYPE_FILE)
            mediator.ProduceEventSource(event_source)

            self.last_activity_timestamp = time.time()

        except (IOError, errors.MaximumRecursionDepth) as exception:
          error_message = (
              'unable to process archive file with error: {0!s}').format(
                  exception)
          mediator.ProduceExtractionError(
              error_message, path_spec=generated_path_spec)
Example #36
0
    def _ProcessFileEntryDataStream(self, mediator, file_entry, data_stream):
        """Processes a specific data stream of a file entry.

    Args:
      mediator (ParserMediator): mediates the interactions between
          parsers and other components, such as storage and abort signals.
      file_entry (dfvfs.FileEntry): file entry containing the data stream.
      data_stream (dfvfs.DataStream): data stream or None if the file entry
          has no data stream.
    """
        display_name = mediator.GetDisplayName()
        data_stream_name = getattr(data_stream, 'name', '') or ''
        logger.debug(
            ('[ProcessFileEntryDataStream] processing data stream: "{0:s}" of '
             'file entry: {1:s}').format(data_stream_name, display_name))

        mediator.ClearEventAttributes()

        if data_stream and self._analyzers:
            # Since AnalyzeDataStream generates event attributes it needs to be
            # called before producing events.
            self._AnalyzeDataStream(mediator, file_entry, data_stream.name)

        self._ExtractMetadataFromFileEntry(mediator, file_entry, data_stream)

        # Not every file entry has a data stream. In such cases we want to
        # extract the metadata only.
        if not data_stream:
            return

        # Determine if the content of the file entry should not be extracted.
        skip_content_extraction = self._CanSkipContentExtraction(file_entry)
        if skip_content_extraction:
            display_name = mediator.GetDisplayName()
            logger.debug(
                'Skipping content extraction of: {0:s}'.format(display_name))
            self.processing_status = definitions.PROCESSING_STATUS_IDLE
            return

        path_spec = copy.deepcopy(file_entry.path_spec)
        if data_stream and not data_stream.IsDefault():
            path_spec.data_stream = data_stream.name

        archive_types = []
        compressed_stream_types = []

        if self._process_compressed_streams:
            compressed_stream_types = self._GetCompressedStreamTypes(
                mediator, path_spec)

        if not compressed_stream_types:
            archive_types = self._GetArchiveTypes(mediator, path_spec)

        if archive_types:
            if self._process_archives:
                self._ProcessArchiveTypes(mediator, path_spec, archive_types)

            if dfvfs_definitions.TYPE_INDICATOR_ZIP in archive_types:
                # ZIP files are the base of certain file formats like docx.
                self._ExtractContentFromDataStream(mediator, file_entry,
                                                   data_stream.name)

        elif compressed_stream_types:
            self._ProcessCompressedStreamTypes(mediator, path_spec,
                                               compressed_stream_types)

        else:
            self._ExtractContentFromDataStream(mediator, file_entry,
                                               data_stream.name)
Example #37
0
  def _AnalyzeFileObject(self, mediator, file_object):
    """Processes a file-like object with analyzers.

    Args:
      mediator (ParserMediator): mediates the interactions between
          parsers and other components, such as storage and abort signals.
      file_object (dfvfs.FileIO): file-like object to process.
    """
    maximum_read_size = max([
        analyzer_object.SIZE_LIMIT for analyzer_object in self._analyzers])

    hashers_only = True
    for analyzer_object in self._analyzers:
      if not isinstance(analyzer_object, hashing_analyzer.HashingAnalyzer):
        hashers_only = False
        break

    file_size = file_object.get_size()

    if (hashers_only and self._hasher_file_size_limit and
        file_size > self._hasher_file_size_limit):
      return

    file_object.seek(0, os.SEEK_SET)

    data = file_object.read(maximum_read_size)
    while data:
      if self._abort:
        break

      for analyzer_object in self._analyzers:
        if self._abort:
          break

        if (not analyzer_object.INCREMENTAL_ANALYZER and
            file_size > analyzer_object.SIZE_LIMIT):
          continue

        if (isinstance(analyzer_object, hashing_analyzer.HashingAnalyzer) and
            self._hasher_file_size_limit and
            file_size > self._hasher_file_size_limit):
          continue

        self.processing_status = analyzer_object.PROCESSING_STATUS_HINT

        analyzer_object.Analyze(data)

        self.last_activity_timestamp = time.time()

      data = file_object.read(maximum_read_size)

    display_name = mediator.GetDisplayName()
    for analyzer_object in self._analyzers:
      if self._abort:
        break

      for result in analyzer_object.GetResults():
        logger.debug((
            '[AnalyzeFileObject] attribute {0:s}:{1:s} calculated for '
            'file: {2:s}.').format(
                result.attribute_name, result.attribute_value, display_name))

        mediator.AddEventAttribute(
            result.attribute_name, result.attribute_value)

      analyzer_object.Reset()

    self.processing_status = definitions.PROCESSING_STATUS_RUNNING
Example #38
0
  def ProcessSources(
      self, source_path_specs, storage_writer, resolver_context,
      processing_configuration, filter_find_specs=None,
      status_update_callback=None):
    """Processes the sources.

    Args:
      source_path_specs (list[dfvfs.PathSpec]): path specifications of
          the sources to process.
      storage_writer (StorageWriter): storage writer for a session storage.
      resolver_context (dfvfs.Context): resolver context.
      processing_configuration (ProcessingConfiguration): processing
          configuration.
      filter_find_specs (Optional[list[dfvfs.FindSpec]]): find specifications
          used in path specification extraction.
      status_update_callback (Optional[function]): callback function for status
          updates.

    Returns:
      ProcessingStatus: processing status.
    """
    parser_mediator = parsers_mediator.ParserMediator(
        storage_writer, self.knowledge_base,
        preferred_year=processing_configuration.preferred_year,
        resolver_context=resolver_context,
        temporary_directory=processing_configuration.temporary_directory)

    parser_mediator.SetEventExtractionConfiguration(
        processing_configuration.event_extraction)

    parser_mediator.SetInputSourceConfiguration(
        processing_configuration.input_source)

    extraction_worker = worker.EventExtractionWorker(
        parser_filter_expression=(
            processing_configuration.parser_filter_expression))

    extraction_worker.SetExtractionConfiguration(
        processing_configuration.extraction)

    self._processing_configuration = processing_configuration
    self._status_update_callback = status_update_callback

    logger.debug('Processing started.')

    parser_mediator.StartProfiling(
        self._processing_configuration.profiling, self._name,
        self._process_information)
    self._StartProfiling(self._processing_configuration.profiling)

    if self._processing_profiler:
      extraction_worker.SetProcessingProfiler(self._processing_profiler)

    if self._serializers_profiler:
      storage_writer.SetSerializersProfiler(self._serializers_profiler)

    if self._storage_profiler:
      storage_writer.SetStorageProfiler(self._storage_profiler)

    storage_writer.Open()
    storage_writer.WriteSessionStart()

    try:
      storage_writer.WritePreprocessingInformation(self.knowledge_base)

      self._ProcessSources(
          source_path_specs, extraction_worker, parser_mediator,
          storage_writer, filter_find_specs=filter_find_specs)

    finally:
      storage_writer.WriteSessionCompletion(aborted=self._abort)

      storage_writer.Close()

      if self._processing_profiler:
        extraction_worker.SetProcessingProfiler(None)

      if self._serializers_profiler:
        storage_writer.SetSerializersProfiler(None)

      if self._storage_profiler:
        storage_writer.SetStorageProfiler(None)

      self._StopProfiling()
      parser_mediator.StopProfiling()

    if self._abort:
      logger.debug('Processing aborted.')
      self._processing_status.aborted = True
    else:
      logger.debug('Processing completed.')

    self._processing_configuration = None
    self._status_update_callback = None

    return self._processing_status
Example #39
0
  def _ParseFileEntryWithParser(
      self, parser_mediator, parser, file_entry, file_object=None):
    """Parses a file entry with a specific parser.

    Args:
      parser_mediator (ParserMediator): parser mediator.
      parser (BaseParser): parser.
      file_entry (dfvfs.FileEntry): file entry.
      file_object (Optional[file]): file-like object to parse.
          If not set the parser will use the parser mediator to open
          the file entry's default data stream as a file-like object.

    Returns:
      int: parse result which is _PARSE_RESULT_FAILURE if the file entry
          could not be parsed, _PARSE_RESULT_SUCCESS if the file entry
          successfully was parsed or _PARSE_RESULT_UNSUPPORTED when
          UnableToParseFile was raised.

    Raises:
      TypeError: if parser object is not a supported parser type.
    """
    if not isinstance(parser, (
        parsers_interface.FileEntryParser, parsers_interface.FileObjectParser)):
      raise TypeError('Unsupported parser object type.')

    parser_mediator.ClearParserChain()

    reference_count = (
        parser_mediator.resolver_context.GetFileObjectReferenceCount(
            file_entry.path_spec))

    parser_mediator.SampleStartTiming(parser.NAME)

    try:
      if isinstance(parser, parsers_interface.FileEntryParser):
        parser.Parse(parser_mediator)
      elif isinstance(parser, parsers_interface.FileObjectParser):
        parser.Parse(parser_mediator, file_object)
      result = self._PARSE_RESULT_SUCCESS

    # We catch IOError so we can determine the parser that generated the error.
    except (IOError, dfvfs_errors.BackEndError) as exception:
      display_name = parser_mediator.GetDisplayName(file_entry)
      logger.warning(
          '{0:s} unable to parse file: {1:s} with error: {2!s}'.format(
              parser.NAME, display_name, exception))
      result = self._PARSE_RESULT_FAILURE

    except errors.UnableToParseFile as exception:
      display_name = parser_mediator.GetDisplayName(file_entry)
      logger.debug(
          '{0:s} unable to parse file: {1:s} with error: {2!s}'.format(
              parser.NAME, display_name, exception))
      result = self._PARSE_RESULT_UNSUPPORTED

    finally:
      parser_mediator.SampleStopTiming(parser.NAME)
      parser_mediator.SampleMemoryUsage(parser.NAME)

      new_reference_count = (
          parser_mediator.resolver_context.GetFileObjectReferenceCount(
              file_entry.path_spec))
      if reference_count != new_reference_count:
        display_name = parser_mediator.GetDisplayName(file_entry)
        logger.warning((
            '[{0:s}] did not explicitly close file-object for file: '
            '{1:s}.').format(parser.NAME, display_name))

    return result
Example #40
0
    def _ParseFileEntryWithParser(self,
                                  parser_mediator,
                                  parser,
                                  file_entry,
                                  file_object=None):
        """Parses a file entry with a specific parser.

    Args:
      parser_mediator (ParserMediator): parser mediator.
      parser (BaseParser): parser.
      file_entry (dfvfs.FileEntry): file entry.
      file_object (Optional[file]): file-like object to parse.
          If not set the parser will use the parser mediator to open
          the file entry's default data stream as a file-like object.

    Returns:
      bool: False if the file could not be parsed and UnableToParseFile
          was raised.

    Raises:
      TypeError: if parser object is not a supported parser type.
    """
        if not isinstance(parser, (parsers_interface.FileEntryParser,
                                   parsers_interface.FileObjectParser)):
            raise TypeError('Unsupported parser object type.')

        parser_mediator.ClearParserChain()

        reference_count = (
            parser_mediator.resolver_context.GetFileObjectReferenceCount(
                file_entry.path_spec))

        if self._parsers_profiler:
            self._parsers_profiler.StartTiming(parser.NAME)

        result = True
        try:
            if isinstance(parser, parsers_interface.FileEntryParser):
                parser.Parse(parser_mediator)
            elif isinstance(parser, parsers_interface.FileObjectParser):
                parser.Parse(parser_mediator, file_object)

        # We catch IOError so we can determine the parser that generated the error.
        except (IOError, dfvfs_errors.BackEndError) as exception:
            display_name = parser_mediator.GetDisplayName(file_entry)
            logger.warning(
                '{0:s} unable to parse file: {1:s} with error: {2!s}'.format(
                    parser.NAME, display_name, exception))

        except errors.UnableToParseFile as exception:
            display_name = parser_mediator.GetDisplayName(file_entry)
            logger.debug(
                '{0:s} unable to parse file: {1:s} with error: {2!s}'.format(
                    parser.NAME, display_name, exception))
            result = False

        finally:
            if self._parsers_profiler:
                self._parsers_profiler.StopTiming(parser.NAME)

            new_reference_count = (
                parser_mediator.resolver_context.GetFileObjectReferenceCount(
                    file_entry.path_spec))
            if reference_count != new_reference_count:
                display_name = parser_mediator.GetDisplayName(file_entry)
                logger.warning(
                    ('[{0:s}] did not explicitly close file-object for file: '
                     '{1:s}.').format(parser.NAME, display_name))

        return result
Example #41
0
  def BuildCollectionFilters(
      self, artifact_definitions_path, custom_artifacts_path,
      knowledge_base_object, artifact_filter_names=None, filter_file_path=None):
    """Builds collection filters from artifacts or filter file if available.

    Args:
      artifact_definitions_path (str): path to artifact definitions file.
      custom_artifacts_path (str): path to custom artifact definitions file.
      knowledge_base_object (KnowledgeBase): knowledge base.
      artifact_filter_names (Optional[list[str]]): names of artifact
          definitions that are used for filtering file system and Windows
          Registry key paths.
      filter_file_path (Optional[str]): path of filter file.

    Raises:
      InvalidFilter: if no valid file system find specifications are built.
    """
    environment_variables = knowledge_base_object.GetEnvironmentVariables()
    if artifact_filter_names:
      logger.debug(
          'building find specification based on artifacts: {0:s}'.format(
              ', '.join(artifact_filter_names)))

      artifacts_registry_object = BaseEngine.BuildArtifactsRegistry(
          artifact_definitions_path, custom_artifacts_path)
      self.collection_filters_helper = (
          artifact_filters.ArtifactDefinitionsFiltersHelper(
              artifacts_registry_object, knowledge_base_object))
      self.collection_filters_helper.BuildFindSpecs(
          artifact_filter_names, environment_variables=environment_variables)

      # If the user selected Windows Registry artifacts we have to ensure
      # the Windows Registry files are parsed.
      if self.collection_filters_helper.registry_find_specs:
        self.collection_filters_helper.BuildFindSpecs(
            self._WINDOWS_REGISTRY_FILES_ARTIFACT_NAMES,
            environment_variables=environment_variables)

      if not self.collection_filters_helper.included_file_system_find_specs:
        raise errors.InvalidFilter(
            'No valid file system find specifications were built from '
            'artifacts.')

    elif filter_file_path:
      logger.debug(
          'building find specification based on filter file: {0:s}'.format(
              filter_file_path))

      filter_file_path_lower = filter_file_path.lower()
      if (filter_file_path_lower.endswith('.yaml') or
          filter_file_path_lower.endswith('.yml')):
        filter_file_object = yaml_filter_file.YAMLFilterFile()
      else:
        filter_file_object = filter_file.FilterFile()

      filter_file_path_filters = filter_file_object.ReadFromFile(
          filter_file_path)

      self.collection_filters_helper = (
          path_filters.PathCollectionFiltersHelper())
      self.collection_filters_helper.BuildFindSpecs(
          filter_file_path_filters, environment_variables=environment_variables)

      if (not self.collection_filters_helper.excluded_file_system_find_specs and
          not self.collection_filters_helper.included_file_system_find_specs):
        raise errors.InvalidFilter((
            'No valid file system find specifications were built from filter '
            'file: {0:s}.').format(filter_file_path))
Example #42
0
    def _AnalyzeFileObject(self, mediator, file_object):
        """Processes a file-like object with analyzers.

    Args:
      mediator (ParserMediator): mediates the interactions between
          parsers and other components, such as storage and abort signals.
      file_object (dfvfs.FileIO): file-like object to process.
    """
        maximum_read_size = max([
            analyzer_object.SIZE_LIMIT for analyzer_object in self._analyzers
        ])

        hashers_only = True
        for analyzer_object in self._analyzers:
            if not isinstance(analyzer_object,
                              hashing_analyzer.HashingAnalyzer):
                hashers_only = False
                break

        file_size = file_object.get_size()

        if (hashers_only and self._hasher_file_size_limit
                and file_size > self._hasher_file_size_limit):
            return

        file_object.seek(0, os.SEEK_SET)

        data = file_object.read(maximum_read_size)
        while data:
            if self._abort:
                break

            for analyzer_object in self._analyzers:
                if self._abort:
                    break

                if (not analyzer_object.INCREMENTAL_ANALYZER
                        and file_size > analyzer_object.SIZE_LIMIT):
                    continue

                if (isinstance(analyzer_object,
                               hashing_analyzer.HashingAnalyzer)
                        and self._hasher_file_size_limit
                        and file_size > self._hasher_file_size_limit):
                    continue

                self.processing_status = analyzer_object.PROCESSING_STATUS_HINT

                analyzer_object.Analyze(data)

                self.last_activity_timestamp = time.time()

            data = file_object.read(maximum_read_size)

        display_name = mediator.GetDisplayName()
        for analyzer_object in self._analyzers:
            if self._abort:
                break

            for result in analyzer_object.GetResults():
                logger.debug((
                    '[AnalyzeFileObject] attribute {0:s}:{1:s} calculated for '
                    'file: {2:s}.').format(result.attribute_name,
                                           result.attribute_value,
                                           display_name))

                mediator.AddEventAttribute(result.attribute_name,
                                           result.attribute_value)

            analyzer_object.Reset()

        self.processing_status = definitions.PROCESSING_STATUS_RUNNING
Example #43
0
  def ProcessSources(
      self, source_configurations, storage_writer, resolver_context,
      processing_configuration, force_parser=False,
      status_update_callback=None):
    """Processes the sources.

    Args:
      source_configurations (list[SourceConfigurationArtifact]): configurations
          of the sources to process.
      storage_writer (StorageWriter): storage writer for a session storage.
      resolver_context (dfvfs.Context): resolver context.
      processing_configuration (ProcessingConfiguration): processing
          configuration.
      force_parser (Optional[bool]): True if a specified parser should be forced
          to be used to extract events.
      status_update_callback (Optional[function]): callback function for status
          updates.

    Returns:
      ProcessingStatus: processing status.
    """
    parser_mediator = self._CreateParserMediator(
        self.knowledge_base, resolver_context, processing_configuration)
    parser_mediator.SetStorageWriter(storage_writer)

    self._extraction_worker = worker.EventExtractionWorker(
        force_parser=force_parser, parser_filter_expression=(
            processing_configuration.parser_filter_expression))

    self._extraction_worker.SetExtractionConfiguration(
        processing_configuration.extraction)

    self._parser_mediator = parser_mediator
    self._processing_configuration = processing_configuration
    self._resolver_context = resolver_context
    self._status_update_callback = status_update_callback
    self._storage_writer = storage_writer

    logger.debug('Processing started.')

    parser_mediator.StartProfiling(
        self._processing_configuration.profiling, self._name,
        self._process_information)
    self._StartProfiling(self._processing_configuration.profiling)

    if self._analyzers_profiler:
      self._extraction_worker.SetAnalyzersProfiler(self._analyzers_profiler)

    if self._processing_profiler:
      self._extraction_worker.SetProcessingProfiler(self._processing_profiler)

    if self._serializers_profiler:
      self._storage_writer.SetSerializersProfiler(self._serializers_profiler)

    if self._storage_profiler:
      self._storage_writer.SetStorageProfiler(self._storage_profiler)

    self._StartStatusUpdateThread()

    self._parsers_counter = collections.Counter({
        parser_count.name: parser_count
        for parser_count in self._storage_writer.GetAttributeContainers(
            'parser_count')})

    try:
      self._ProcessSources(source_configurations, parser_mediator)

    finally:
      # Stop the status update thread after close of the storage writer
      # so we include the storage sync to disk in the status updates.
      self._StopStatusUpdateThread()

      if self._analyzers_profiler:
        self._extraction_worker.SetAnalyzersProfiler(None)

      if self._processing_profiler:
        self._extraction_worker.SetProcessingProfiler(None)

      if self._serializers_profiler:
        self._storage_writer.SetSerializersProfiler(None)

      if self._storage_profiler:
        self._storage_writer.SetStorageProfiler(None)

      self._StopProfiling()
      parser_mediator.StopProfiling()

    for key, value in parser_mediator.parsers_counter.items():
      parser_count = self._parsers_counter.get(key, None)
      if parser_count:
        parser_count.number_of_events += value
        self._storage_writer.UpdateAttributeContainer(parser_count)
      else:
        parser_count = counts.ParserCount(name=key, number_of_events=value)
        self._parsers_counter[key] = parser_count
        self._storage_writer.AddAttributeContainer(parser_count)

    if self._abort:
      logger.debug('Processing aborted.')
      self._processing_status.aborted = True
    else:
      logger.debug('Processing completed.')

    # Update the status view one last time.
    self._UpdateStatus()

    self._extraction_worker = None
    self._file_system_cache = []
    self._parser_mediator = None
    self._processing_configuration = None
    self._resolver_context = None
    self._status_update_callback = None
    self._storage_writer = None

    return self._processing_status
Example #44
0
  def _ProcessFileEntryDataStream(self, mediator, file_entry, data_stream):
    """Processes a specific data stream of a file entry.

    Args:
      mediator (ParserMediator): mediates the interactions between
          parsers and other components, such as storage and abort signals.
      file_entry (dfvfs.FileEntry): file entry containing the data stream.
      data_stream (dfvfs.DataStream): data stream or None if the file entry
          has no data stream.
    """
    display_name = mediator.GetDisplayName()
    data_stream_name = getattr(data_stream, 'name', '') or ''
    logger.debug((
        '[ProcessFileEntryDataStream] processing data stream: "{0:s}" of '
        'file entry: {1:s}').format(data_stream_name, display_name))

    mediator.ClearEventAttributes()

    if data_stream and self._analyzers:
      # Since AnalyzeDataStream generates event attributes it needs to be
      # called before producing events.
      self._AnalyzeDataStream(mediator, file_entry, data_stream.name)

    self._ExtractMetadataFromFileEntry(mediator, file_entry, data_stream)

    # Not every file entry has a data stream. In such cases we want to
    # extract the metadata only.
    if not data_stream:
      return

    # Determine if the content of the file entry should not be extracted.
    skip_content_extraction = self._CanSkipContentExtraction(file_entry)
    if skip_content_extraction:
      display_name = mediator.GetDisplayName()
      logger.debug(
          'Skipping content extraction of: {0:s}'.format(display_name))
      self.processing_status = definitions.PROCESSING_STATUS_IDLE
      return

    path_spec = copy.deepcopy(file_entry.path_spec)
    if data_stream and not data_stream.IsDefault():
      path_spec.data_stream = data_stream.name

    archive_types = []
    compressed_stream_types = []

    if self._process_compressed_streams:
      compressed_stream_types = self._GetCompressedStreamTypes(
          mediator, path_spec)

    if not compressed_stream_types:
      archive_types = self._GetArchiveTypes(mediator, path_spec)

    if archive_types:
      if self._process_archives:
        self._ProcessArchiveTypes(mediator, path_spec, archive_types)

      if dfvfs_definitions.TYPE_INDICATOR_ZIP in archive_types:
        # ZIP files are the base of certain file formats like docx.
        self._ExtractContentFromDataStream(
            mediator, file_entry, data_stream.name)

    elif compressed_stream_types:
      self._ProcessCompressedStreamTypes(
          mediator, path_spec, compressed_stream_types)

    else:
      self._ExtractContentFromDataStream(
          mediator, file_entry, data_stream.name)
Example #45
0
    def _ProcessArchiveTypes(self, mediator, path_spec, type_indicators):
        """Processes a data stream containing archive types such as: TAR or ZIP.

    Args:
      mediator (ParserMediator): mediates the interactions between
          parsers and other components, such as storage and abort signals.
      path_spec (dfvfs.PathSpec): path specification.
      type_indicators(list[str]): dfVFS archive type indicators found in
          the data stream.
    """
        number_of_type_indicators = len(type_indicators)
        if number_of_type_indicators == 0:
            return

        self.processing_status = definitions.PROCESSING_STATUS_COLLECTING

        if number_of_type_indicators > 1:
            display_name = mediator.GetDisplayName()
            logger.debug(
                ('Found multiple format type indicators: {0:s} for '
                 'archive file: {1:s}').format(type_indicators, display_name))

        for type_indicator in type_indicators:
            if type_indicator == dfvfs_definitions.TYPE_INDICATOR_TAR:
                archive_path_spec = path_spec_factory.Factory.NewPathSpec(
                    dfvfs_definitions.TYPE_INDICATOR_TAR,
                    location='/',
                    parent=path_spec)

            elif type_indicator == dfvfs_definitions.TYPE_INDICATOR_ZIP:
                archive_path_spec = path_spec_factory.Factory.NewPathSpec(
                    dfvfs_definitions.TYPE_INDICATOR_ZIP,
                    location='/',
                    parent=path_spec)

            else:
                archive_path_spec = None

                error_message = (
                    'unsupported archive format type indicator: {0:s}'
                ).format(type_indicator)
                mediator.ProduceExtractionError(error_message,
                                                path_spec=path_spec)

            if archive_path_spec:
                try:
                    path_spec_generator = self._path_spec_extractor.ExtractPathSpecs(
                        [archive_path_spec],
                        resolver_context=mediator.resolver_context)

                    for generated_path_spec in path_spec_generator:
                        if self._abort:
                            break

                        event_source = event_sources.FileEntryEventSource(
                            path_spec=generated_path_spec)
                        event_source.file_entry_type = (
                            dfvfs_definitions.FILE_ENTRY_TYPE_FILE)
                        mediator.ProduceEventSource(event_source)

                        self.last_activity_timestamp = time.time()

                except (IOError, errors.MaximumRecursionDepth) as exception:
                    error_message = (
                        'unable to process archive file with error: {0!s}'
                    ).format(exception)
                    mediator.ProduceExtractionError(
                        error_message, path_spec=generated_path_spec)