Example #1
0
File: vfs.py Project: hfakar/grr
  def _GenerateContent(self, start_urns, prefix, age, token=None):
    archive_generator = utils.StreamingZipGenerator(
        compression=zipfile.ZIP_DEFLATED)
    folders_urns = set(start_urns)

    while folders_urns:
      next_urns = set()
      for _, children in aff4.FACTORY.MultiListChildren(folders_urns):
        for urn in children:
          next_urns.add(urn)

      download_fds = set()
      folders_urns = set()
      for fd in aff4.FACTORY.MultiOpen(next_urns, token=token):
        if isinstance(fd, aff4.AFF4Stream):
          download_fds.add(fd)
        elif "Container" in fd.behaviours:
          folders_urns.add(fd.urn)

      if download_fds:
        if age != aff4.NEWEST_TIME:
          urns = [fd.urn for fd in download_fds]
          # We need to reopen the files with the actual age
          # requested. We can't do this in the call above since
          # indexes are stored with the latest timestamp of an object
          # only so adding the age above will potentially ignore
          # some of the indexes.
          download_fds = list(
              aff4.FACTORY.MultiOpen(urns, age=age, token=token))

        for chunk in self._StreamFds(
            archive_generator, prefix, download_fds, token=token):
          yield chunk

    yield archive_generator.Close()
Example #2
0
    def _GenerateContent(self, client_id, start_paths, timestamp, path_prefix):
        client_paths = []
        for start_path in start_paths:
            path_type, components = rdf_objects.ParseCategorizedPath(
                start_path)
            for pi in data_store.REL_DB.ListDescendantPathInfos(
                    client_id, path_type, components):
                if pi.directory:
                    continue

                client_paths.append(db.ClientPath.FromPathInfo(client_id, pi))

        archive_generator = utils.StreamingZipGenerator(
            compression=zipfile.ZIP_DEFLATED)
        for chunk in file_store.StreamFilesChunks(client_paths,
                                                  max_timestamp=timestamp):
            if chunk.chunk_index == 0:
                content_path = os.path.join(path_prefix,
                                            chunk.client_path.vfs_path)
                # TODO(user): Export meaningful file metadata.
                st = os.stat_result(
                    (0o644, 0, 0, 0, 0, 0, chunk.total_size, 0, 0, 0))
                yield archive_generator.WriteFileHeader(content_path, st=st)

            yield archive_generator.WriteFileChunk(chunk.data)

            if chunk.chunk_index == chunk.total_chunks - 1:
                yield archive_generator.WriteFileFooter()

        yield archive_generator.Close()
Example #3
0
 def _GenerateArchive(
     self,
     args: ApiGetCollectedHuntTimelinesArgs,
 ) -> Iterator[bytes]:
     zipgen = utils.StreamingZipGenerator()
     yield from self._GenerateHuntTimelines(args, zipgen)
     yield zipgen.Close()
Example #4
0
 def _Generate(
     self,
     hunt_id: Text,
     format: ApiGetCollectedTimelineArgs.Format,
 ) -> Iterator[bytes]:
     zipgen = utils.StreamingZipGenerator()
     yield from self._GenerateTimelines(hunt_id, format, zipgen)
     yield zipgen.Close()
Example #5
0
 def _Generate(
     self,
     hunt_id: Text,
     fmt: rdf_structs.EnumNamedValue,
 ) -> Iterator[bytes]:
     zipgen = utils.StreamingZipGenerator()
     yield from self._GenerateTimelines(hunt_id, fmt, zipgen)
     yield zipgen.Close()
Example #6
0
  def testHierarchy(self):
    archiver = utils.StreamingZipGenerator()
    output = io.BytesIO()

    output.write(archiver.WriteFileHeader("foo/bar/baz"))
    output.write(archiver.WriteFileChunk(b"quux"))
    output.write(archiver.WriteFileFooter())

    output.write(archiver.Close())

    with zipfile.ZipFile(output, mode="r") as zipdesc:
      self.assertEqual(zipdesc.read("foo/bar/baz"), b"quux")
Example #7
0
  def testWriteFromFD(self):
    filedesc = io.BytesIO(b"foobarbaz" * 1024 * 1024)

    archiver = utils.StreamingZipGenerator()
    output = io.BytesIO()

    for chunk in archiver.WriteFromFD(filedesc, "quux"):
      output.write(chunk)

    output.write(archiver.Close())

    with zipfile.ZipFile(output, mode="r") as zipdesc:
      self.assertEqual(zipdesc.read("quux"), filedesc.getvalue())
Example #8
0
  def testCompression(self):
    archiver = utils.StreamingZipGenerator(zipfile.ZIP_DEFLATED)
    output = io.BytesIO()

    output.write(archiver.WriteFileHeader("foo"))
    output.write(archiver.WriteFileChunk(b"quux"))
    output.write(archiver.WriteFileChunk(b"norf"))
    output.write(archiver.WriteFileFooter())

    output.write(archiver.Close())

    with zipfile.ZipFile(output, mode="r") as zipdesc:
      self.assertEqual(zipdesc.read("foo"), b"quuxnorf")
Example #9
0
    def __init__(self,
                 archive_format=ZIP,
                 prefix=None,
                 description=None,
                 predicate=None,
                 client_id=None):
        """CollectionArchiveGenerator constructor.

    Args:
      archive_format: May be ArchiveCollectionGenerator.ZIP or
        ArchiveCollectionGenerator.TAR_GZ. Defaults to ZIP.
      prefix: Name of the folder inside the archive that will contain all the
        generated data.
      description: String describing archive's contents. It will be included
        into the auto-generated MANIFEST file. Defaults to 'Files archive
        collection'.
      predicate: If not None, only the files matching the predicate will be
        archived, all others will be skipped. The predicate receives a
        db.ClientPath as input.
      client_id: The client_id to use when exporting a flow results collection.

    Raises:
      ValueError: if prefix is None.
    """
        super(Aff4CollectionArchiveGenerator, self).__init__()

        if archive_format == self.ZIP:
            self.archive_generator = utils.StreamingZipGenerator(
                compression=zipfile.ZIP_DEFLATED)
        elif archive_format == self.TAR_GZ:
            self.archive_generator = utils.StreamingTarGenerator()
        else:
            raise ValueError("Unknown archive format: %s" % archive_format)

        if not prefix:
            raise ValueError("Prefix can't be None.")
        self.prefix = prefix

        self.description = description or "Files archive collection"

        self.total_files = 0
        self.archived_files = 0
        self.ignored_files = []
        self.failed_files = []

        self.predicate = predicate or (lambda _: True)
        self.client_id = client_id
Example #10
0
  def __init__(self, flow: rdf_flow_objects.Flow,
               archive_format: ArchiveFormat):
    self.flow = flow
    self.archive_format = archive_format
    if archive_format == ArchiveFormat.ZIP:
      self.archive_generator = utils.StreamingZipGenerator(
          compression=zipfile.ZIP_DEFLATED)
      extension = "zip"
    elif archive_format == ArchiveFormat.TAR_GZ:
      self.archive_generator = utils.StreamingTarGenerator()
      extension = "tar.gz"
    else:
      raise ValueError(f"Unknown archive format: {archive_format}")

    self.prefix = "{}_{}_{}".format(
        flow.client_id.replace(".", "_"), flow.flow_id, flow.flow_class_name)
    self.filename = f"{self.prefix}.{extension}"
    self.num_archived_files = 0
Example #11
0
 def _Generate(self, hunt_id: Text) -> Iterator[bytes]:
     zipgen = utils.StreamingZipGenerator()
     yield from self._GenerateTimelines(hunt_id, zipgen)
     yield zipgen.Close()
Example #12
0
 def Start(self):
   self.archive_generator = utils.StreamingZipGenerator(
       compression=zipfile.ZIP_DEFLATED)
   self.export_counts = {}
   return []
Example #13
0
 def _Generate(self, hunt_id):
     zipgen = utils.StreamingZipGenerator()
     yield from self._GenerateTimelines(hunt_id, zipgen)
     yield zipgen.Close()