Esempio n. 1
0
    def ReceiveFileHash(self, responses):
        """Add hash digest to tracker and check with filestore."""
        vfs_urn = responses.request_data["vfs_urn"]
        if not responses.success:
            self.Log("Failed to hash file: %s", responses.status)
            self.state.pending_hashes.pop(vfs_urn, None)
            return

        self.state.files_hashed += 1
        response = responses.First()
        hash_obj = rdfvalue.Hash()

        if len(response.results
               ) < 1 or response.results[0]["name"] != "generic":
            self.Log("Failed to hash file: %s", str(vfs_urn))
            self.state.pending_hashes.pop(vfs_urn, None)
            return

        result = response.results[0]

        try:
            for hash_type in ["md5", "sha1", "sha256"]:
                value = result.GetItem(hash_type)
                setattr(hash_obj, hash_type, value)
        except AttributeError:
            self.Log("Failed to hash file: %s", str(vfs_urn))
            self.state.pending_hashes.pop(vfs_urn, None)
            return

        self.state.pending_hashes[vfs_urn].hash_obj = hash_obj

        if len(self.state.pending_hashes) >= self.MIN_CALL_TO_FILE_STORE:
            self._CheckHashesWithFileStore()
    def DownloadCollectionFiles(self, collection, output_writer, prefix):
        """Download all files from the collection and deduplicate along the way."""

        hashes = set()
        for fd_urn_batch in utils.Grouper(self.ResultsToUrns(collection),
                                          self.BATCH_SIZE):
            self.HeartBeat()

            for fd in aff4.FACTORY.MultiOpen(fd_urn_batch, token=self.token):
                self.state.total_files += 1

                # Any file-like object with data in AFF4 should inherit AFF4Stream.
                if isinstance(fd, aff4.AFF4Stream):
                    archive_path = os.path.join(prefix, *fd.urn.Split())
                    self.state.archived_files += 1

                    sha256_hash = fd.Get(fd.Schema.HASH,
                                         rdfvalue.Hash()).sha256
                    content_path = os.path.join(prefix, "hashes",
                                                str(sha256_hash))
                    if sha256_hash not in hashes:
                        # Make sure size of the original file is passed. It's required
                        # when output_writer is StreamingTarWriter.
                        st = os.stat_result(
                            (0644, 0, 0, 0, 0, 0, fd.size, 0, 0, 0))
                        output_writer.WriteFromFD(fd, content_path, st=st)
                        hashes.add(sha256_hash)
                        self.Log("Written contents: " + content_path)

                    up_prefix = "../" * len(fd.urn.Split())
                    output_writer.WriteSymlink(up_prefix + content_path,
                                               archive_path)
                    self.Log("Written symlink %s -> %s", archive_path,
                             up_prefix + content_path)
Esempio n. 3
0
    def setUp(self):
        super(TestExportHuntResultsFilesAsArchive, self).setUp()

        path1 = "aff4:/C.0000000000000000/fs/os/foo/bar/hello1.txt"
        fd = aff4.FACTORY.Create(path1, "AFF4MemoryStream", token=self.token)
        fd.Write("hello1")
        fd.Set(fd.Schema.HASH,
               rdfvalue.Hash(sha256=hashlib.sha256("hello1").digest()))
        fd.Close()

        path2 = u"aff4:/C.0000000000000000/fs/os/foo/bar/中国新闻网新闻中.txt"
        fd = aff4.FACTORY.Create(path2, "AFF4MemoryStream", token=self.token)
        fd.Write("hello2")
        fd.Set(fd.Schema.HASH,
               rdfvalue.Hash(sha256=hashlib.sha256("hello2").digest()))
        fd.Close()

        self.paths = [path1, path2]

        with hunts.GRRHunt.StartHunt(hunt_name="GenericHunt",
                                     regex_rules=[
                                         rdfvalue.ForemanAttributeRegex(
                                             attribute_name="GRR client",
                                             attribute_regex="GRR")
                                     ],
                                     output_plugins=[],
                                     token=self.token) as hunt:

            self.hunt_urn = hunt.urn

            runner = hunt.GetRunner()
            runner.Start()

            with aff4.FACTORY.Create(runner.context.results_collection_urn,
                                     aff4_type="RDFValueCollection",
                                     mode="w",
                                     token=self.token) as collection:

                for path in self.paths:
                    collection.Add(
                        rdfvalue.StatEntry(
                            aff4path=path,
                            pathspec=rdfvalue.PathSpec(
                                path="fs/os/foo/bar/" + path.split("/")[-1],
                                pathtype=rdfvalue.PathSpec.PathType.OS)))