def ReceiveFileHash(self, responses): """Add hash digest to tracker and check with filestore.""" vfs_urn = responses.request_data["vfs_urn"] if not responses.success: self.Log("Failed to hash file: %s", responses.status) self.state.pending_hashes.pop(vfs_urn, None) return self.state.files_hashed += 1 response = responses.First() hash_obj = rdfvalue.Hash() if len(response.results ) < 1 or response.results[0]["name"] != "generic": self.Log("Failed to hash file: %s", str(vfs_urn)) self.state.pending_hashes.pop(vfs_urn, None) return result = response.results[0] try: for hash_type in ["md5", "sha1", "sha256"]: value = result.GetItem(hash_type) setattr(hash_obj, hash_type, value) except AttributeError: self.Log("Failed to hash file: %s", str(vfs_urn)) self.state.pending_hashes.pop(vfs_urn, None) return self.state.pending_hashes[vfs_urn].hash_obj = hash_obj if len(self.state.pending_hashes) >= self.MIN_CALL_TO_FILE_STORE: self._CheckHashesWithFileStore()
def DownloadCollectionFiles(self, collection, output_writer, prefix): """Download all files from the collection and deduplicate along the way.""" hashes = set() for fd_urn_batch in utils.Grouper(self.ResultsToUrns(collection), self.BATCH_SIZE): self.HeartBeat() for fd in aff4.FACTORY.MultiOpen(fd_urn_batch, token=self.token): self.state.total_files += 1 # Any file-like object with data in AFF4 should inherit AFF4Stream. if isinstance(fd, aff4.AFF4Stream): archive_path = os.path.join(prefix, *fd.urn.Split()) self.state.archived_files += 1 sha256_hash = fd.Get(fd.Schema.HASH, rdfvalue.Hash()).sha256 content_path = os.path.join(prefix, "hashes", str(sha256_hash)) if sha256_hash not in hashes: # Make sure size of the original file is passed. It's required # when output_writer is StreamingTarWriter. st = os.stat_result( (0644, 0, 0, 0, 0, 0, fd.size, 0, 0, 0)) output_writer.WriteFromFD(fd, content_path, st=st) hashes.add(sha256_hash) self.Log("Written contents: " + content_path) up_prefix = "../" * len(fd.urn.Split()) output_writer.WriteSymlink(up_prefix + content_path, archive_path) self.Log("Written symlink %s -> %s", archive_path, up_prefix + content_path)
def setUp(self): super(TestExportHuntResultsFilesAsArchive, self).setUp() path1 = "aff4:/C.0000000000000000/fs/os/foo/bar/hello1.txt" fd = aff4.FACTORY.Create(path1, "AFF4MemoryStream", token=self.token) fd.Write("hello1") fd.Set(fd.Schema.HASH, rdfvalue.Hash(sha256=hashlib.sha256("hello1").digest())) fd.Close() path2 = u"aff4:/C.0000000000000000/fs/os/foo/bar/中国新闻网新闻中.txt" fd = aff4.FACTORY.Create(path2, "AFF4MemoryStream", token=self.token) fd.Write("hello2") fd.Set(fd.Schema.HASH, rdfvalue.Hash(sha256=hashlib.sha256("hello2").digest())) fd.Close() self.paths = [path1, path2] with hunts.GRRHunt.StartHunt(hunt_name="GenericHunt", regex_rules=[ rdfvalue.ForemanAttributeRegex( attribute_name="GRR client", attribute_regex="GRR") ], output_plugins=[], token=self.token) as hunt: self.hunt_urn = hunt.urn runner = hunt.GetRunner() runner.Start() with aff4.FACTORY.Create(runner.context.results_collection_urn, aff4_type="RDFValueCollection", mode="w", token=self.token) as collection: for path in self.paths: collection.Add( rdfvalue.StatEntry( aff4path=path, pathspec=rdfvalue.PathSpec( path="fs/os/foo/bar/" + path.split("/")[-1], pathtype=rdfvalue.PathSpec.PathType.OS)))