Beispiel #1
0
    def testBuildFindSpecs(self):
        """Tests the BuildFindSpecs function."""
        filter_file_path = ''
        with tempfile.NamedTemporaryFile(delete=False) as temp_file:
            test_filter_file = filter_file.FilterFile(temp_file.name)
            # 2 hits.
            temp_file.write(b'/test_data/testdir/filter_.+.txt\n')
            # A single hit.
            temp_file.write(b'/test_data/.+evtx\n')
            # A single hit.
            temp_file.write(b'/AUTHORS\n')
            temp_file.write(b'/does_not_exist/some_file_[0-9]+txt\n')
            # Path expansion.
            temp_file.write(b'{systemroot}/Tasks/.+[.]job\n')
            # This should not compile properly, missing file information.
            temp_file.write(b'failing/\n')
            # This should not fail during initial loading, but fail later on.
            temp_file.write(b'bad re (no close on that parenthesis/file\n')

        environment_variable = artifacts.EnvironmentVariableArtifact(
            case_sensitive=False, name='SystemRoot', value='C:\\Windows')

        find_specs = test_filter_file.BuildFindSpecs(
            environment_variables=[environment_variable])

        try:
            os.remove(filter_file_path)
        except (OSError, IOError) as exception:
            logging.warning(
                'Unable to remove filter file: {0:s} with error: {1!s}'.format(
                    filter_file_path, exception))

        self.assertEqual(len(find_specs), 5)

        path_spec = path_spec_factory.Factory.NewPathSpec(
            dfvfs_definitions.TYPE_INDICATOR_OS, location='.')
        file_system = path_spec_resolver.Resolver.OpenFileSystem(path_spec)
        searcher = file_system_searcher.FileSystemSearcher(
            file_system, path_spec)

        path_spec_generator = searcher.Find(find_specs=find_specs)
        self.assertIsNotNone(path_spec_generator)

        path_specs = list(path_spec_generator)
        # Two evtx, one symbolic link to evtx, one AUTHORS, two filter_*.txt files,
        # total 6 path specifications.
        self.assertEqual(len(path_specs), 6)

        with self.assertRaises(IOError):
            test_filter_file = filter_file.FilterFile('thisfiledoesnotexist')
            test_filter_file.BuildFindSpecs()

        file_system.Close()
Beispiel #2
0
    def testBuildFindSpecs(self):
        """Tests the BuildFindSpecs function."""
        test_filter_file = filter_file.FilterFile()
        test_path_filters = test_filter_file._ReadFromFileObject(
            io.StringIO(self._FILTER_FILE_DATA))

        environment_variable = artifacts.EnvironmentVariableArtifact(
            case_sensitive=False, name='SystemRoot', value='C:\\Windows')

        test_helper = path_filters.PathCollectionFiltersHelper()
        test_helper.BuildFindSpecs(
            test_path_filters, environment_variables=[environment_variable])

        self.assertEqual(len(test_helper.included_file_system_find_specs), 5)

        path_spec = path_spec_factory.Factory.NewPathSpec(
            dfvfs_definitions.TYPE_INDICATOR_OS, location='.')
        file_system = path_spec_resolver.Resolver.OpenFileSystem(path_spec)
        searcher = file_system_searcher.FileSystemSearcher(
            file_system, path_spec)

        path_spec_generator = searcher.Find(
            find_specs=test_helper.included_file_system_find_specs)
        self.assertIsNotNone(path_spec_generator)

        path_specs = list(path_spec_generator)

        file_system.Close()

        # Two evtx, one symbolic link to evtx, one AUTHORS, two filter_*.txt files,
        # total 6 path specifications.
        self.assertEqual(len(path_specs), 6)
Beispiel #3
0
    def BuildFilterFindSpecs(cls,
                             artifact_definitions_path,
                             custom_artifacts_path,
                             knowledge_base_object,
                             artifact_filter_names=None,
                             filter_file_path=None):
        """Builds find specifications from artifacts or filter file if available.

    Args:
       artifact_definitions_path (str): path to artifact definitions file.
       custom_artifacts_path (str): path to custom artifact definitions file.
       knowledge_base_object (KnowledgeBase): knowledge base.
       artifact_filter_names (Optional[list[str]]): names of artifact
          definitions that are used for filtering file system and Windows
          Registry key paths.
       filter_file_path (Optional[str]): path of filter file.

    Returns:
      list[dfvfs.FindSpec]: find specifications for the file source type.

    Raises:
      InvalidFilter: if no valid FindSpecs are built.
    """
        environment_variables = knowledge_base_object.GetEnvironmentVariables()
        find_specs = None
        if artifact_filter_names:
            logger.debug(
                'building find specification based on artifacts: {0:s}'.format(
                    ', '.join(artifact_filter_names)))

            artifacts_registry_object = cls.BuildArtifactsRegistry(
                artifact_definitions_path, custom_artifacts_path)
            artifact_filters_object = (
                artifact_filters.ArtifactDefinitionsFilterHelper(
                    artifacts_registry_object, artifact_filter_names,
                    knowledge_base_object))
            artifact_filters_object.BuildFindSpecs(
                environment_variables=environment_variables)
            find_specs = knowledge_base_object.GetValue(
                artifact_filters_object.KNOWLEDGE_BASE_VALUE)[
                    artifact_types.TYPE_INDICATOR_FILE]

        elif filter_file_path:
            logger.debug(
                'building find specification based on filter file: {0:s}'.
                format(filter_file_path))

            filter_file_object = filter_file.FilterFile(filter_file_path)
            find_specs = filter_file_object.BuildFindSpecs(
                environment_variables=environment_variables)

        if (artifact_filter_names or filter_file_path) and not find_specs:
            raise errors.InvalidFilter(
                'Error processing filters, no valid specifications built.')

        return find_specs
Beispiel #4
0
    def testReadFromFileObject(self):
        """Tests the _ReadFromFileObject function."""
        test_file_path = self._GetTestFilePath(
            ['filter_files', 'format_test.txt'])
        self._SkipIfPathNotExists(test_file_path)

        test_filter_file = filter_file.FilterFile()
        with io.open(test_file_path, 'r', encoding='utf-8') as file_object:
            path_filters = list(
                test_filter_file._ReadFromFileObject(file_object))

        self.assertEqual(len(path_filters), 1)
Beispiel #5
0
    def testReadFromFile(self):
        """Tests the ReadFromFile function."""
        test_file_path = self._GetTestFilePath(
            ['filter_files', 'format_test.txt'])
        self._SkipIfPathNotExists(test_file_path)

        test_filter_file = filter_file.FilterFile()
        path_filters = test_filter_file.ReadFromFile(test_file_path)

        self.assertEqual(len(path_filters), 1)

        self.assertEqual(path_filters[0].path_separator, '/')
        self.assertEqual(path_filters[0].paths,
                         ['/usr/bin', '/Windows/System32'])
    def _ExtractWithFilter(self,
                           source_path_specs,
                           destination_path,
                           output_writer,
                           filter_file_path,
                           skip_duplicates=True):
        """Extracts files using a filter expression.

    This method runs the file extraction process on the image and
    potentially on every VSS if that is wanted.

    Args:
      source_path_specs (list[dfvfs.PathSpec]): path specifications to extract.
      destination_path (str): path where the extracted files should be stored.
      output_writer (CLIOutputWriter): output writer.
      filter_file_path (str): path of the file that contains the filter
          expressions.
      skip_duplicates (Optional[bool]): True if files with duplicate content
          should be skipped.
    """
        for source_path_spec in source_path_specs:
            file_system, mount_point = self._GetSourceFileSystem(
                source_path_spec, resolver_context=self._resolver_context)

            if self._knowledge_base is None:
                self._Preprocess(file_system, mount_point)

            display_name = path_helper.PathHelper.GetDisplayNameForPathSpec(
                source_path_spec)
            output_writer.Write(
                'Extracting file entries from: {0:s}\n'.format(display_name))

            environment_variables = self._knowledge_base.GetEnvironmentVariables(
            )
            filter_file_object = filter_file.FilterFile(filter_file_path)
            find_specs = filter_file_object.BuildFindSpecs(
                environment_variables=environment_variables)

            searcher = file_system_searcher.FileSystemSearcher(
                file_system, mount_point)
            for path_spec in searcher.Find(find_specs=find_specs):
                self._ExtractFileEntry(path_spec,
                                       destination_path,
                                       output_writer,
                                       skip_duplicates=skip_duplicates)

            file_system.Close()
Beispiel #7
0
  def ExtractEventsFromSources(self):
    """Processes the sources and extracts events.

    Raises:
      BadConfigOption: if the storage file path is invalid or the storage
          format not supported.
      SourceScannerError: if the source scanner could not find a supported
          file system.
      UserAbort: if the user initiated an abort.
    """
    self._CheckStorageFile(self._storage_file_path, warn_about_existing=True)

    scan_context = self.ScanSource(self._source_path)
    self._source_type = scan_context.source_type

    self._status_view.SetMode(self._status_view_mode)
    self._status_view.SetSourceInformation(
        self._source_path, self._source_type, filter_file=self._filter_file)

    status_update_callback = (
        self._status_view.GetExtractionStatusUpdateCallback())

    self._output_writer.Write('\n')
    self._status_view.PrintExtractionStatusHeader(None)
    self._output_writer.Write('Processing started.\n')

    session = engine.BaseEngine.CreateSession(
        command_line_arguments=self._command_line_arguments,
        debug_mode=self._debug_mode,
        filter_file=self._filter_file,
        preferred_encoding=self.preferred_encoding,
        preferred_time_zone=self._preferred_time_zone,
        preferred_year=self._preferred_year)

    storage_writer = storage_factory.StorageFactory.CreateStorageWriter(
        self._storage_format, session, self._storage_file_path)
    if not storage_writer:
      raise errors.BadConfigOption(
          'Unsupported storage format: {0:s}'.format(self._storage_format))

    single_process_mode = self._single_process_mode
    if self._source_type == dfvfs_definitions.SOURCE_TYPE_FILE:
      # No need to multi process a single file source.
      single_process_mode = True

    if single_process_mode:
      extraction_engine = single_process_engine.SingleProcessEngine()
    else:
      extraction_engine = multi_process_engine.TaskMultiProcessEngine(
          use_zeromq=self._use_zeromq)

    # If the source is a directory or a storage media image
    # run pre-processing.
    if self._source_type in self._SOURCE_TYPES_TO_PREPROCESS:
      self._PreprocessSources(extraction_engine)

    configuration = self._CreateProcessingConfiguration(
        extraction_engine.knowledge_base)

    self._SetExtractionParsersAndPlugins(configuration, session)
    self._SetExtractionPreferredTimeZone(extraction_engine.knowledge_base)

    filter_find_specs = None
    if configuration.filter_file:
      environment_variables = (
          extraction_engine.knowledge_base.GetEnvironmentVariables())
      filter_file_object = filter_file.FilterFile(configuration.filter_file)
      filter_find_specs = filter_file_object.BuildFindSpecs(
          environment_variables=environment_variables)

    processing_status = None
    if single_process_mode:
      logger.debug('Starting extraction in single process mode.')

      processing_status = extraction_engine.ProcessSources(
          self._source_path_specs, storage_writer, self._resolver_context,
          configuration, filter_find_specs=filter_find_specs,
          status_update_callback=status_update_callback)

    else:
      logger.debug('Starting extraction in multi process mode.')

      processing_status = extraction_engine.ProcessSources(
          session.identifier, self._source_path_specs, storage_writer,
          configuration, enable_sigsegv_handler=self._enable_sigsegv_handler,
          filter_find_specs=filter_find_specs,
          number_of_worker_processes=self._number_of_extraction_workers,
          status_update_callback=status_update_callback,
          worker_memory_limit=self._worker_memory_limit)

    self._status_view.PrintExtractionSummary(processing_status)
Beispiel #8
0
    def BuildCollectionFilters(self,
                               artifact_definitions_path,
                               custom_artifacts_path,
                               knowledge_base_object,
                               artifact_filter_names=None,
                               filter_file_path=None):
        """Builds collection filters from artifacts or filter file if available.

    Args:
      artifact_definitions_path (str): path to artifact definitions file.
      custom_artifacts_path (str): path to custom artifact definitions file.
      knowledge_base_object (KnowledgeBase): knowledge base.
      artifact_filter_names (Optional[list[str]]): names of artifact
          definitions that are used for filtering file system and Windows
          Registry key paths.
      filter_file_path (Optional[str]): path of filter file.

    Raises:
      InvalidFilter: if no valid file system find specifications are built.
    """
        environment_variables = knowledge_base_object.GetEnvironmentVariables()
        if artifact_filter_names:
            logger.debug(
                'building find specification based on artifacts: {0:s}'.format(
                    ', '.join(artifact_filter_names)))

            artifacts_registry_object = BaseEngine.BuildArtifactsRegistry(
                artifact_definitions_path, custom_artifacts_path)
            self.collection_filters_helper = (
                artifact_filters.ArtifactDefinitionsFiltersHelper(
                    artifacts_registry_object, knowledge_base_object))
            self.collection_filters_helper.BuildFindSpecs(
                artifact_filter_names,
                environment_variables=environment_variables)

            # If the user selected Windows Registry artifacts we have to ensure
            # the Windows Registry files are parsed.
            if self.collection_filters_helper.registry_find_specs:
                self.collection_filters_helper.BuildFindSpecs(
                    self._WINDOWS_REGISTRY_FILES_ARTIFACT_NAMES,
                    environment_variables=environment_variables)

            if not self.collection_filters_helper.included_file_system_find_specs:
                raise errors.InvalidFilter(
                    'No valid file system find specifications were built from '
                    'artifacts.')

        elif filter_file_path:
            logger.debug(
                'building find specification based on filter file: {0:s}'.
                format(filter_file_path))

            filter_file_path_lower = filter_file_path.lower()
            if (filter_file_path_lower.endswith('.yaml')
                    or filter_file_path_lower.endswith('.yml')):
                filter_file_object = yaml_filter_file.YAMLFilterFile()
            else:
                filter_file_object = filter_file.FilterFile()

            filter_file_path_filters = filter_file_object.ReadFromFile(
                filter_file_path)

            self.collection_filters_helper = (
                path_filters.PathCollectionFiltersHelper())
            self.collection_filters_helper.BuildFindSpecs(
                filter_file_path_filters,
                environment_variables=environment_variables)

            if (not self.collection_filters_helper.
                    excluded_file_system_find_specs and not self.
                    collection_filters_helper.included_file_system_find_specs):
                raise errors.InvalidFilter((
                    'No valid file system find specifications were built from filter '
                    'file: {0:s}.').format(filter_file_path))
Beispiel #9
0
  def ExtractEventsFromSources(self):
    """Processes the sources and extract events.

    This is a stripped down copy of tools/log2timeline.py that doesn't support
    the full set of flags. The defaults for these are hard coded in the
    constructor of this class.

    Raises:
      SourceScannerError: if the source scanner could not find a supported
          file system.
      UserAbort: if the user initiated an abort.
    """
    self._CheckStorageFile(self._storage_file_path, warn_about_existing=True)

    scan_context = self.ScanSource(self._source_path)
    source_type = scan_context.source_type

    self._status_view.SetMode(self._status_view_mode)
    self._status_view.SetSourceInformation(
        self._source_path, source_type, filter_file=self._filter_file)

    status_update_callback = (
        self._status_view.GetExtractionStatusUpdateCallback())

    self._output_writer.Write('\n')
    self._status_view.PrintExtractionStatusHeader(None)
    self._output_writer.Write('Processing started.\n')

    session = engine.BaseEngine.CreateSession(
        command_line_arguments=self._command_line_arguments,
        filter_file=self._filter_file,
        preferred_encoding=self.preferred_encoding,
        preferred_time_zone=self._preferred_time_zone,
        preferred_year=self._preferred_year)

    storage_writer = storage_zip_file.ZIPStorageFileWriter(
        session, self._storage_file_path)

    configuration = self._CreateProcessingConfiguration()

    single_process_mode = self._single_process_mode
    if source_type == dfvfs_definitions.SOURCE_TYPE_FILE:
      # No need to multi process a single file source.
      single_process_mode = True


    if single_process_mode:
      extraction_engine = single_process_engine.SingleProcessEngine()
    else:
      extraction_engine = multi_process_engine.TaskMultiProcessEngine(
          use_zeromq=self._use_zeromq)

    # If the source is a directory or a storage media image
    # run pre-processing.
    if source_type in self._SOURCE_TYPES_TO_PREPROCESS:
      self._PreprocessSources(extraction_engine)

    if not configuration.parser_filter_expression:
      operating_system = extraction_engine.knowledge_base.GetValue(
          'operating_system')
      operating_system_product = extraction_engine.knowledge_base.GetValue(
          'operating_system_product')
      operating_system_version = extraction_engine.knowledge_base.GetValue(
          'operating_system_version')
      parser_filter_expression = (
          self._parsers_manager.GetPresetForOperatingSystem(
              operating_system, operating_system_product,
              operating_system_version))

      if parser_filter_expression:
        logging.info('Parser filter expression changed to: {0:s}'.format(
            parser_filter_expression))

      configuration.parser_filter_expression = parser_filter_expression
      session.enabled_parser_names = list(
          self._parsers_manager.GetParserAndPluginNames(
              parser_filter_expression=configuration.parser_filter_expression))
      session.parser_filter_expression = configuration.parser_filter_expression

    # Note session.preferred_time_zone will default to UTC but
    # self._preferred_time_zone is None when not set.
    if self._preferred_time_zone:
      try:
        extraction_engine.knowledge_base.SetTimeZone(self._preferred_time_zone)
      except ValueError:
        # pylint: disable=protected-access
        logging.warning(
            'Unsupported time zone: {0:s}, defaulting to {1:s}'.format(
                self._preferred_time_zone,
                extraction_engine.knowledge_base._time_zone.zone))

    filter_find_specs = None
    if configuration.filter_file:
      environment_variables = (
          extraction_engine.knowledge_base.GetEnvironmentVariables())
      filter_file_object = filter_file.FilterFile(configuration.filter_file)
      filter_find_specs = filter_file_object.BuildFindSpecs(
          environment_variables=environment_variables)

    processing_status = None
    if single_process_mode:
      logging.debug('Starting extraction in single process mode.')

      processing_status = extraction_engine.ProcessSources(
          self._source_path_specs, storage_writer, self._resolver_context,
          configuration, filter_find_specs=filter_find_specs,
          status_update_callback=status_update_callback)

    else:
      logging.debug('Starting extraction in multi process mode.')

      processing_status = extraction_engine.ProcessSources(
          session.identifier, self._source_path_specs, storage_writer,
          configuration,
          enable_sigsegv_handler=self._enable_sigsegv_handler,
          filter_find_specs=filter_find_specs,
          number_of_worker_processes=self._number_of_extraction_workers,
          status_update_callback=status_update_callback)

    self._status_view.PrintExtractionSummary(processing_status)
Beispiel #10
0
    def BuildFilterFindSpecs(self,
                             artifact_definitions_path,
                             custom_artifacts_path,
                             knowledge_base_object,
                             artifact_filter_names=None,
                             filter_file_path=None):
        """Builds find specifications from artifacts or filter file if available.

    Args:
      artifact_definitions_path (str): path to artifact definitions file.
      custom_artifacts_path (str): path to custom artifact definitions file.
      knowledge_base_object (KnowledgeBase): knowledge base.
      artifact_filter_names (Optional[list[str]]): names of artifact
          definitions that are used for filtering file system and Windows
          Registry key paths.
      filter_file_path (Optional[str]): path of filter file.

    Returns:
      list[dfvfs.FindSpec]: find specifications for the file source type.

    Raises:
      InvalidFilter: if no valid FindSpecs are built.
    """
        environment_variables = knowledge_base_object.GetEnvironmentVariables()
        find_specs = None
        if artifact_filter_names:
            logger.debug(
                'building find specification based on artifacts: {0:s}'.format(
                    ', '.join(artifact_filter_names)))

            artifacts_registry_object = BaseEngine.BuildArtifactsRegistry(
                artifact_definitions_path, custom_artifacts_path)
            self._artifacts_filter_helper = (
                artifact_filters.ArtifactDefinitionsFilterHelper(
                    artifacts_registry_object, knowledge_base_object))
            self._artifacts_filter_helper.BuildFindSpecs(
                artifact_filter_names,
                environment_variables=environment_variables)

            # If the user selected Windows Registry artifacts we have to ensure
            # the Windows Registry files are parsed.
            if self._artifacts_filter_helper.registry_find_specs:
                self._artifacts_filter_helper.BuildFindSpecs(
                    self._WINDOWS_REGISTRY_FILES_ARTIFACT_NAMES,
                    environment_variables=environment_variables)

            find_specs = self._artifacts_filter_helper.file_system_find_specs

            if not find_specs:
                raise errors.InvalidFilter(
                    'No valid file system find specifications were built from '
                    'artifacts.')

        elif filter_file_path:
            logger.debug(
                'building find specification based on filter file: {0:s}'.
                format(filter_file_path))

            filter_file_object = filter_file.FilterFile(filter_file_path)
            find_specs = filter_file_object.BuildFindSpecs(
                environment_variables=environment_variables)

            if not find_specs:
                raise errors.InvalidFilter(
                    'No valid file system find specifications were built from filter '
                    'file.')

        return find_specs