예제 #1
0
    def _ExtractWithExtensions(self, extensions, destination_path):
        """Extracts files using extensions.

    Args:
      extensions: a list of extensions.
      destination_path: the path where the extracted files should be stored.
    """
        logging.info(
            u'Finding files with extensions: {0:s}'.format(extensions))

        if not os.path.isdir(destination_path):
            os.makedirs(destination_path)

        input_queue = queue.SingleThreadedQueue()

        # TODO: add support to handle multiple partitions.
        self._source_path_spec = self.GetSourcePathSpec()

        image_collector = collector.Collector(input_queue, self._source_path,
                                              self._source_path_spec)

        image_collector.Collect()

        FileSaver.calc_md5 = self._remove_duplicates

        input_queue_consumer = ImageExtractorQueueConsumer(
            input_queue, extensions, destination_path)
        input_queue_consumer.ConsumePathSpecs()
예제 #2
0
  def testFileSystemCollection(self):
    """Test collection on the file system."""
    test_files = [
        self._GetTestFilePath([u'syslog.tgz']),
        self._GetTestFilePath([u'syslog.zip']),
        self._GetTestFilePath([u'syslog.bz2']),
        self._GetTestFilePath([u'wtmp.1'])]

    with shared_test_lib.TempDirectory() as dirname:
      for a_file in test_files:
        shutil.copy(a_file, dirname)

      path_spec = path_spec_factory.Factory.NewPathSpec(
          dfvfs_definitions.TYPE_INDICATOR_OS, location=dirname)

      test_path_spec_queue = single_process.SingleProcessQueue()
      resolver_context = context.Context()
      test_collector = collector.Collector(
          test_path_spec_queue, resolver_context=resolver_context)
      test_collector.Collect([path_spec])

      test_collector_queue_consumer = TestCollectorQueueConsumer(
          test_path_spec_queue)
      test_collector_queue_consumer.ConsumeItems()

      self.assertEqual(test_collector_queue_consumer.number_of_path_specs, 4)
예제 #3
0
    def _Extract(self,
                 source_path_specs,
                 destination_path,
                 remove_duplicates=True):
        """Extracts files.

    Args:
      source_path_specs: list of path specifications (instances of
                         dfvfs.PathSpec) to process.
      destination_path: the path where the extracted files should be stored.
      remove_duplicates: optional boolean value to indicate if files with
                         duplicate content should be removed. The default
                         is True.
    """
        if not os.path.isdir(destination_path):
            os.makedirs(destination_path)

        input_queue = single_process.SingleProcessQueue()
        image_collector = collector.Collector(input_queue)
        image_collector.Collect(source_path_specs)

        file_saver = FileSaver(skip_duplicates=remove_duplicates)
        input_queue_consumer = ImageExtractorQueueConsumer(
            input_queue, file_saver, destination_path, self._filter_collection)
        input_queue_consumer.ConsumeItems()
예제 #4
0
  def _Extract(self, destination_path, remove_duplicates=True):
    """Extracts files.

    Args:
      destination_path: the path where the extracted files should be stored.
      remove_duplicates: optional boolean value to indicate if files with
                         duplicate content should be removed. The default
                         is True.
    """
    if not os.path.isdir(destination_path):
      os.makedirs(destination_path)

    input_queue = single_process.SingleProcessQueue()

    # TODO: add support to handle multiple partitions.
    self._source_path_spec = self.GetSourcePathSpec()

    image_collector = collector.Collector(
        input_queue, self._source_path, self._source_path_spec)

    image_collector.Collect()

    file_saver = FileSaver(skip_duplicates=remove_duplicates)
    input_queue_consumer = ImageExtractorQueueConsumer(
        input_queue, file_saver, destination_path, self._filter_collection)
    input_queue_consumer.ConsumeItems()
예제 #5
0
  def testFileSystemWithFilterCollection(self):
    """Test collection on the file system with a filter."""
    dirname = u'.'
    path_spec = path_spec_factory.Factory.NewPathSpec(
        dfvfs_definitions.TYPE_INDICATOR_OS, location=dirname)

    filter_name = ''
    with tempfile.NamedTemporaryFile(delete=False) as temp_file:
      filter_name = temp_file.name
      temp_file.write('/test_data/testdir/filter_.+.txt\n')
      temp_file.write('/test_data/.+evtx\n')
      temp_file.write('/AUTHORS\n')
      temp_file.write('/does_not_exist/some_file_[0-9]+txt\n')

    test_collection_queue = queue.SingleThreadedQueue()
    test_store = queue.SingleThreadedQueue()
    resolver_context = context.Context()
    test_collector = collector.Collector(
        test_collection_queue, test_store, dirname, path_spec,
        resolver_context=resolver_context)

    find_specs = engine_utils.BuildFindSpecsFromFile(filter_name)
    test_collector.SetFilter(find_specs)

    test_collector.Collect()

    test_collector_queue_consumer = TestCollectorQueueConsumer(
          test_collection_queue)
    test_collector_queue_consumer.ConsumePathSpecs()

    try:
      os.remove(filter_name)
    except (OSError, IOError) as exception:
      logging.warning((
          u'Unable to remove temporary file: {0:s} with error: {1:s}').format(
              filter_name, exception))

    # Two files with test_data/testdir/filter_*.txt, AUTHORS
    # and test_data/System.evtx.
    self.assertEquals(test_collector_queue_consumer.number_of_path_specs, 4)

    paths = test_collector_queue_consumer.GetFilePaths()

    current_directory = os.getcwd()

    expected_path = os.path.join(
        current_directory, 'test_data', 'testdir', 'filter_1.txt')
    self.assertTrue(expected_path in paths)

    expected_path = os.path.join(
        current_directory, 'test_data', 'testdir', 'filter_2.txt')
    self.assertFalse(expected_path in paths)

    expected_path = os.path.join(
        current_directory, 'test_data', 'testdir', 'filter_3.txt')
    self.assertTrue(expected_path in paths)

    expected_path = os.path.join(
        current_directory, 'AUTHORS')
    self.assertTrue(expected_path in paths)
예제 #6
0
  def testFileSystemCollection(self):
    """Test collection on the file system."""
    test_files = [
        self._GetTestFilePath(['syslog.tgz']),
        self._GetTestFilePath(['syslog.zip']),
        self._GetTestFilePath(['syslog.bz2']),
        self._GetTestFilePath(['wtmp.1'])]

    with TempDirectory() as dirname:
      for a_file in test_files:
        shutil.copy(a_file, dirname)

      path_spec = path_spec_factory.Factory.NewPathSpec(
          dfvfs_definitions.TYPE_INDICATOR_OS, location=dirname)

      test_collection_queue = queue.SingleThreadedQueue()
      test_store = queue.SingleThreadedQueue()
      resolver_context = context.Context()
      test_collector = collector.Collector(
          test_collection_queue, test_store, dirname, path_spec,
          resolver_context=resolver_context)
      test_collector.Collect()

      test_collector_queue_consumer = TestCollectorQueueConsumer(
          test_collection_queue)
      test_collector_queue_consumer.ConsumePathSpecs()

      self.assertEquals(test_collector_queue_consumer.number_of_path_specs, 4)
예제 #7
0
  def testImageWithFilterCollection(self):
    """Test collection on a storage media image file with a filter."""
    test_file = self._GetTestFilePath(['image.dd'])

    volume_path_spec = path_spec_factory.Factory.NewPathSpec(
        dfvfs_definitions.TYPE_INDICATOR_OS, location=test_file)
    path_spec = path_spec_factory.Factory.NewPathSpec(
        dfvfs_definitions.TYPE_INDICATOR_TSK, location=u'/',
        parent=volume_path_spec)

    filter_name = ''
    with tempfile.NamedTemporaryFile(delete=False) as temp_file:
      filter_name = temp_file.name
      temp_file.write('/a_directory/.+zip\n')
      temp_file.write('/a_directory/another.+\n')
      temp_file.write('/passwords.txt\n')

    test_collection_queue = queue.SingleThreadedQueue()
    test_storage_queue = queue.SingleThreadedQueue()
    test_storage_queue_producer = queue.EventObjectQueueProducer(
        test_storage_queue)
    resolver_context = context.Context()
    test_collector = collector.Collector(
        test_collection_queue, test_storage_queue_producer, test_file,
        path_spec, resolver_context=resolver_context)

    find_specs = engine_utils.BuildFindSpecsFromFile(filter_name)
    test_collector.SetFilter(find_specs)

    test_collector.Collect()

    test_collector_queue_consumer = TestCollectorQueueConsumer(
        test_collection_queue)
    test_collector_queue_consumer.ConsumePathSpecs()

    try:
      os.remove(filter_name)
    except (OSError, IOError) as exception:
      logging.warning((
          u'Unable to remove temporary file: {0:s} with error: {1:s}').format(
              filter_name, exception))

    self.assertEquals(test_collector_queue_consumer.number_of_path_specs, 2)

    paths = test_collector_queue_consumer.GetFilePaths()

    # path_specs[0]
    # type: TSK
    # file_path: '/a_directory/another_file'
    # container_path: 'test_data/image.dd'
    # image_offset: 0
    self.assertEquals(paths[0], u'/a_directory/another_file')

    # path_specs[1]
    # type: TSK
    # file_path: '/passwords.txt'
    # container_path: 'test_data/image.dd'
    # image_offset: 0
    self.assertEquals(paths[1], u'/passwords.txt')
예제 #8
0
파일: engine.py 프로젝트: cnbird1999/plaso
  def CreateCollector(
      self, include_directory_stat, vss_stores=None, filter_find_specs=None,
      resolver_context=None):
    """Creates a collector object.

       The collector discovers all the files that need to be processed by
       the workers. Once a file is discovered it is added to the process queue
       as a path specification (instance of dfvfs.PathSpec).

    Args:
      include_directory_stat: Boolean value to indicate whether directory
                              stat information should be collected.
      vss_stores: Optional list of VSS stores to include in the collection,
                  where 1 represents the first store. Set to None if no
                  VSS stores should be processed. The default is None.
      filter_find_specs: Optional list of filter find specifications (instances
                         of dfvfs.FindSpec). The default is None.
      resolver_context: Optional resolver context (instance of dfvfs.Context).
                        The default is None. Note that every thread or process
                        must have its own resolver context.

    Returns:
      A collector object (instance of Collector).

    Raises:
      RuntimeError: if source path specification is not set.
    """
    if not self._source_path_spec:
      raise RuntimeError(u'Missing source.')

    collector_object = collector.Collector(
        self._collection_queue, self._source, self._source_path_spec,
        resolver_context=resolver_context)

    collector_object.SetCollectDirectoryMetadata(include_directory_stat)

    if vss_stores:
      collector_object.SetVssInformation(vss_stores)

    if filter_find_specs:
      collector_object.SetFilter(filter_find_specs)

    return collector_object
예제 #9
0
  def testImageCollection(self):
    """Test collection on a storage media image file.

    This images has two files:
      + logs/hidden.zip
      + logs/sys.tgz

    The hidden.zip file contains one file, syslog, which is the
    same for sys.tgz.

    The end results should therefore be:
      + logs/hidden.zip (unchanged)
      + logs/hidden.zip:syslog (the text file extracted out)
      + logs/sys.tgz (unchanged)
      + logs/sys.tgz (read as a GZIP file, so not compressed)
      + logs/sys.tgz:syslog.gz (A GZIP file from the TAR container)
      + logs/sys.tgz:syslog.gz:syslog (the extracted syslog file)

    This means that the collection script should collect 6 files in total.
    """
    test_file = self._GetTestFilePath(['syslog_image.dd'])

    volume_path_spec = path_spec_factory.Factory.NewPathSpec(
        dfvfs_definitions.TYPE_INDICATOR_OS, location=test_file)
    path_spec = path_spec_factory.Factory.NewPathSpec(
        dfvfs_definitions.TYPE_INDICATOR_TSK, location=u'/',
        parent=volume_path_spec)

    test_collection_queue = queue.SingleThreadedQueue()
    test_storage_queue = queue.SingleThreadedQueue()
    test_storage_queue_producer = queue.EventObjectQueueProducer(
        test_storage_queue)
    resolver_context = context.Context()
    test_collector = collector.Collector(
        test_collection_queue, test_storage_queue_producer, test_file,
        path_spec, resolver_context=resolver_context)
    test_collector.Collect()

    test_collector_queue_consumer = TestCollectorQueueConsumer(
          test_collection_queue)
    test_collector_queue_consumer.ConsumePathSpecs()

    self.assertEquals(test_collector_queue_consumer.number_of_path_specs, 2)
예제 #10
0
파일: engine.py 프로젝트: iwm911/plaso
    def CreateCollector(self,
                        include_directory_stat,
                        vss_stores=None,
                        filter_find_specs=None,
                        resolver_context=None):
        """Creates a collector.

    Args:
      include_directory_stat: Boolean value to indicate whether directory
                              stat information should be collected.
      vss_stores: Optional list of VSS stores to include in the collection,
                  where 1 represents the first store. Set to None if no
                  VSS stores should be processed. The default is None.
      filter_find_specs: Optional list of filter find specifications (instances
                         of dfvfs.FindSpec). The default is None.
      resolver_context: Optional resolver context (instance of dfvfs.Context).
                        The default is None. Note that every thread or process
                        must have its own resolver context.

    Raises:
      RuntimeError: if source path specification is not set.
    """
        if not self._source_path_spec:
            raise RuntimeError(u'Missing source.')

        collector_object = collector.Collector(
            self._collection_queue,
            self._storage_queue_producer,
            self._source,
            self._source_path_spec,
            resolver_context=resolver_context)

        collector_object.collect_directory_metadata = include_directory_stat

        if vss_stores:
            collector_object.SetVssInformation(vss_stores)

        if filter_find_specs:
            collector_object.SetFilter(filter_find_specs)

        return collector_object
예제 #11
0
    def __init__(self,
                 stop_collector_event,
                 source_path_specs,
                 path_spec_queue,
                 filter_find_specs=None,
                 include_directory_stat=True,
                 **kwargs):
        """Initializes the process object.

    Args:
      stop_collector_event: the stop process event (instance of
                            multiprocessing.Event). The collector
                            should exit after this event is set.
      source_path_specs: list of path specifications (instances of
                         dfvfs.PathSpec) to process.
      path_spec_queue: the path specification queue object (instance of
                       MultiProcessingQueue).
      filter_find_specs: Optional list of filter find specifications (instances
                         of dfvfs.FindSpec). The default is None.
      include_directory_stat: Optional boolean value to indicate whether
                              directory stat information should be collected.
                              The default is True.
      kwargs: keyword arguments to pass to multiprocessing.Process.
    """
        super(MultiProcessCollectorProcess,
              self).__init__(definitions.PROCESS_TYPE_COLLECTOR, **kwargs)
        resolver_context = context.Context()

        self._collector = collector.Collector(
            path_spec_queue, resolver_context=resolver_context)
        self._path_spec_queue = path_spec_queue
        self._source_path_specs = source_path_specs
        self._stop_collector_event = stop_collector_event

        self._collector.SetCollectDirectoryMetadata(include_directory_stat)

        if filter_find_specs:
            self._collector.SetFilter(filter_find_specs)
예제 #12
0
파일: image_export.py 프로젝트: f-s-p/plaso
  def _Extract(self, destination_path):
    """Extracts files.

    Args:
      destination_path: the path where the extracted files should be stored.
    """
    if not os.path.isdir(destination_path):
      os.makedirs(destination_path)

    input_queue = single_process.SingleProcessQueue()

    # TODO: add support to handle multiple partitions.
    self._source_path_spec = self.GetSourcePathSpec()

    image_collector = collector.Collector(
        input_queue, self._source_path, self._source_path_spec)

    image_collector.Collect()

    FileSaver.calc_md5 = self._remove_duplicates

    input_queue_consumer = ImageExtractorQueueConsumer(
        input_queue, destination_path, self._filter_collection)
    input_queue_consumer.ConsumeItems()
예제 #13
0
  def testImageWithPartitionsCollections(self):
    """Test collection on a storage media image file with multiple partitions.

    The image contains 2 partitions (p1 and p2) with NFTS file systems.
    """
    test_file = self._GetTestFilePath([u'multi_partition_image.vmdk'])

    image_path_spec = path_spec_factory.Factory.NewPathSpec(
        dfvfs_definitions.TYPE_INDICATOR_OS, location=test_file)

    p1_path_spec = path_spec_factory.Factory.NewPathSpec(
        dfvfs_definitions.TYPE_INDICATOR_TSK_PARTITION, location=u'/p1',
        part_index=2, start_offset=0x00010000, parent=image_path_spec)
    p1_file_system_path_spec = path_spec_factory.Factory.NewPathSpec(
        dfvfs_definitions.TYPE_INDICATOR_TSK, location=u'/',
        parent=p1_path_spec)

    p2_path_spec = path_spec_factory.Factory.NewPathSpec(
        dfvfs_definitions.TYPE_INDICATOR_TSK_PARTITION, location=u'/p2',
        part_index=3, start_offset=0x00510000, parent=image_path_spec)
    p2_file_system_path_spec = path_spec_factory.Factory.NewPathSpec(
        dfvfs_definitions.TYPE_INDICATOR_TSK, location=u'/',
        parent=p2_path_spec)

    test_path_spec_queue = single_process.SingleProcessQueue()
    resolver_context = context.Context()
    test_collector = collector.Collector(
        test_path_spec_queue, resolver_context=resolver_context)
    test_collector.Collect([p1_file_system_path_spec, p2_file_system_path_spec])

    test_collector_queue_consumer = TestCollectorQueueConsumer(
        test_path_spec_queue)
    test_collector_queue_consumer.ConsumeItems()

    paths = test_collector_queue_consumer.GetFilePaths()

    expected_paths_p1 = [
        u'/$AttrDef',
        u'/$BadClus',
        u'/$Bitmap',
        u'/$Boot',
        u'/$Extend',
        u'/$Extend/$ObjId',
        u'/$Extend/$Quota',
        u'/$Extend/$Reparse',
        u'/$Extend/$RmMetadata',
        u'/$Extend/$RmMetadata/$Repair',
        u'/$Extend/$RmMetadata/$TxfLog',
        u'/$LogFile',
        u'/$MFT',
        u'/$MFTMirr',
        u'/$Secure',
        u'/$UpCase',
        u'/$Volume',
        u'/file1.txt',
        u'/file2.txt']

    expected_paths_p2 = [
        u'/$AttrDef',
        u'/$BadClus',
        u'/$Bitmap',
        u'/$Boot',
        u'/$Extend',
        u'/$Extend/$ObjId',
        u'/$Extend/$Quota',
        u'/$Extend/$Reparse',
        u'/$Extend/$RmMetadata',
        u'/$Extend/$RmMetadata/$Repair',
        u'/$Extend/$RmMetadata/$TxfLog',
        u'/$LogFile',
        u'/$MFT',
        u'/$MFTMirr',
        u'/$Secure',
        u'/$UpCase',
        u'/$Volume',
        u'/file1_on_part_2.txt',
        u'/file2_on_part_2.txt']

    expected_paths = []
    expected_paths.extend(expected_paths_p1)
    expected_paths.extend(expected_paths_p2)

    self.assertEqual(
        test_collector_queue_consumer.number_of_path_specs, len(expected_paths))

    self.assertEqual(sorted(paths), sorted(expected_paths))