def setUp(self): super(CollectionArchiveGeneratorTest, self).setUp() self.client_id = self.SetupClients(1)[0] path1 = self.client_id.Add("fs/os/foo/bar/hello1.txt") archive_path1 = (u"test_prefix/%s/fs/os/foo/bar/hello1.txt" % self.client_id.Basename()) with aff4.FACTORY.Create(path1, aff4.AFF4MemoryStream, token=self.token) as fd: fd.Write("hello1") fd.Set(fd.Schema.HASH, rdf_crypto.Hash(sha256=hashlib.sha256("hello1").digest())) path2 = self.client_id.Add(u"fs/os/foo/bar/中国新闻网新闻中.txt") archive_path2 = (u"test_prefix/%s/fs/os/foo/bar/" u"中国新闻网新闻中.txt") % self.client_id.Basename() with aff4.FACTORY.Create(path2, aff4.AFF4MemoryStream, token=self.token) as fd: fd.Write("hello2") fd.Set(fd.Schema.HASH, rdf_crypto.Hash(sha256=hashlib.sha256("hello2").digest())) self.stat_entries = [] self.paths = [path1, path2] self.archive_paths = [archive_path1, archive_path2] for path in self.paths: self.stat_entries.append( rdf_client.StatEntry(pathspec=rdf_paths.PathSpec( path="foo/bar/" + str(path).split("/")[-1], pathtype=rdf_paths.PathSpec.PathType.OS))) self.fd = None
def setUp(self): super(TestExportCollectionFilesAsArchive, self).setUp() path1 = "aff4:/C.0000000000000000/fs/os/foo/bar/hello1.txt" fd = aff4.FACTORY.Create(path1, aff4.AFF4MemoryStream, token=self.token) fd.Write("hello1") fd.Set(fd.Schema.HASH, rdf_crypto.Hash(sha256=hashlib.sha256("hello1").digest())) fd.Close() path2 = u"aff4:/C.0000000000000000/fs/os/foo/bar/中国新闻网新闻中.txt" fd = aff4.FACTORY.Create(path2, aff4.AFF4MemoryStream, token=self.token) fd.Write("hello2") fd.Set(fd.Schema.HASH, rdf_crypto.Hash(sha256=hashlib.sha256("hello2").digest())) fd.Close() self.collection_urn = aff4.ROOT_URN.Add("hunts/H:ABCDEF/Results") self.paths = [path1, path2] with aff4.FACTORY.Create( self.collection_urn, aff4_type=collects.RDFValueCollection, mode="w", token=self.token) as collection: for path in self.paths: collection.Add(rdf_client.StatEntry( aff4path=path, pathspec=rdf_paths.PathSpec( path="fs/os/foo/bar/" + path.split("/")[-1], pathtype=rdf_paths.PathSpec.PathType.OS)))
def setUp(self): super(CollectionArchiveGeneratorTest, self).setUp() path1 = "aff4:/C.0000000000000000/fs/os/foo/bar/hello1.txt" with aff4.FACTORY.Create(path1, aff4.AFF4MemoryStream.__name__, token=self.token) as fd: fd.Write("hello1") fd.Set(fd.Schema.HASH, rdf_crypto.Hash(sha256=hashlib.sha256("hello1").digest())) path2 = u"aff4:/C.0000000000000000/fs/os/foo/bar/中国新闻网新闻中.txt" with aff4.FACTORY.Create(path2, aff4.AFF4MemoryStream.__name__, token=self.token) as fd: fd.Write("hello2") fd.Set(fd.Schema.HASH, rdf_crypto.Hash(sha256=hashlib.sha256("hello2").digest())) self.stat_entries = [] self.paths = [path1, path2] for path in self.paths: self.stat_entries.append( rdf_client.StatEntry( aff4path=path, pathspec=rdf_paths.PathSpec( path="fs/os/foo/bar/" + path.split("/")[-1], pathtype=rdf_paths.PathSpec.PathType.OS))) self.fd = None
def testFindPathInfoByPathIDTimestampStatAndHashEntry(self): client_id = self.InitializeClient() path_info = objects.PathInfo.OS(components=["foo"]) path_id = objects.PathID(["foo"]) path_info.stat_entry = rdf_client.StatEntry(st_mode=42) path_info.hash_entry = None self.db.WritePathInfos(client_id, [path_info]) timestamp_1 = rdfvalue.RDFDatetime.Now() path_info.stat_entry = None path_info.hash_entry = rdf_crypto.Hash(md5=b"quux") self.db.WritePathInfos(client_id, [path_info]) timestamp_2 = rdfvalue.RDFDatetime.Now() path_info.stat_entry = rdf_client.StatEntry(st_mode=1337) path_info.hash_entry = None self.db.WritePathInfos(client_id, [path_info]) timestamp_3 = rdfvalue.RDFDatetime.Now() path_info.stat_entry = rdf_client.StatEntry(st_mode=4815162342) path_info.hash_entry = rdf_crypto.Hash(md5=b"norf") self.db.WritePathInfos(client_id, [path_info]) timestamp_4 = rdfvalue.RDFDatetime.Now() path_info_1 = self.db.FindPathInfoByPathID( client_id, objects.PathInfo.PathType.OS, path_id, timestamp=timestamp_1) self.assertEqual(path_info_1.stat_entry.st_mode, 42) self.assertFalse(path_info_1.HasField("hash_entry")) path_info_2 = self.db.FindPathInfoByPathID( client_id, objects.PathInfo.PathType.OS, path_id, timestamp=timestamp_2) self.assertEqual(path_info_2.stat_entry.st_mode, 42) self.assertEqual(path_info_2.hash_entry.md5, b"quux") path_info_3 = self.db.FindPathInfoByPathID( client_id, objects.PathInfo.PathType.OS, path_id, timestamp=timestamp_3) self.assertEqual(path_info_3.stat_entry.st_mode, 1337) self.assertEqual(path_info_3.hash_entry.md5, b"quux") path_info_4 = self.db.FindPathInfoByPathID( client_id, objects.PathInfo.PathType.OS, path_id, timestamp=timestamp_4) self.assertEqual(path_info_4.stat_entry.st_mode, 4815162342) self.assertEqual(path_info_4.hash_entry.md5, b"norf")
def Run(self, args): hashers = {} for t in args.tuples: for hash_name in t.hashers: hashers[str(hash_name).lower()] = self._hash_types[str( hash_name)]() with vfs.VFSOpen(args.pathspec, progress_callback=self.Progress) as file_obj: # Only read as many bytes as we were told. bytes_read = 0 while bytes_read < args.max_filesize: self.Progress() data = file_obj.Read(MAX_BUFFER_SIZE) if not data: break for hasher in hashers.values(): hasher.update(data) bytes_read += len(data) response = rdf_client.FingerprintResponse( pathspec=file_obj.pathspec, bytes_read=bytes_read, hash=rdf_crypto.Hash(**dict( (k, v.digest()) for k, v in hashers.iteritems()))) self.SendReply(response)
def GetHashObject(self): """Returns a `Hash` object with appropriate fields filled-in.""" hash_object = rdf_crypto.Hash() hash_object.num_bytes = self._bytes_read for algorithm in self._hashers: setattr(hash_object, algorithm, self._hashers[algorithm].digest()) return hash_object
def Hash(self, fname, stat_object, policy_max_hash_size, oversized_file_policy, resolve_links=True): file_size = stat_object.st_size if file_size <= policy_max_hash_size: max_hash_size = file_size else: ff_opts = rdf_file_finder.FileFinderHashActionOptions if oversized_file_policy == ff_opts.OversizedFilePolicy.SKIP: return elif oversized_file_policy == ff_opts.OversizedFilePolicy.HASH_TRUNCATED: max_hash_size = policy_max_hash_size try: file_obj = open(fname, "rb") except IOError: return with file_obj: hashers, bytes_read = standard_actions.HashFile().HashFile( ["md5", "sha1", "sha256"], file_obj, max_hash_size) result = rdf_crypto.Hash(**dict((k, v.digest()) for k, v in hashers.iteritems())) result.num_bytes = bytes_read return result
def Start(self): hash_result = rdf_crypto.Hash( sha256=("9e8dc93e150021bb4752029ebbff51394aa36f069cf19901578" "e4f06017acdb5").decode("hex"), sha1="6dd6bee591dfcb6d75eb705405302c3eab65e21a".decode("hex"), md5="8b0a15eefe63fd41f8dc9dee01c5cf9a".decode("hex")) self.SendReply(hash_result)
def DownloadCollectionFiles(self, collection, output_writer, prefix): """Download all files from the collection and deduplicate along the way.""" hashes = set() for fd_urn_batch in utils.Grouper(self.ResultsToUrns(collection), self.BATCH_SIZE): self.HeartBeat() for fd in aff4.FACTORY.MultiOpen(fd_urn_batch, token=self.token): self.state.total_files += 1 # Any file-like object with data in AFF4 should inherit AFF4Stream. if isinstance(fd, aff4.AFF4Stream): archive_path = os.path.join(prefix, *fd.urn.Split()) self.state.archived_files += 1 sha256_hash = fd.Get(fd.Schema.HASH, rdf_crypto.Hash()).sha256 content_path = os.path.join(prefix, "hashes", str(sha256_hash)) if sha256_hash not in hashes: # Make sure size of the original file is passed. It's required # when output_writer is StreamingTarWriter. st = os.stat_result((0644, 0, 0, 0, 0, 0, fd.size, 0, 0, 0)) output_writer.WriteFromFD(fd, content_path, st=st) hashes.add(sha256_hash) self.Log("Written contents: " + content_path) up_prefix = "../" * len(fd.urn.Split()) output_writer.WriteSymlink(up_prefix + content_path, archive_path) self.Log("Written symlink %s -> %s", archive_path, up_prefix + content_path)
def testWritePathInfoHashAndStatEntrySeparateWrites(self): client_id = self.InitializeClient() stat_entry = rdf_client.StatEntry(st_mode=1337) stat_entry_path_info = objects.PathInfo.OS(components=["foo"], stat_entry=stat_entry) stat_entry_timestamp = rdfvalue.RDFDatetime.Now() self.db.WritePathInfos(client_id, [stat_entry_path_info]) hash_entry = rdf_crypto.Hash(sha256=hashlib.sha256("foo").digest()) hash_entry_path_info = objects.PathInfo.OS(components=["foo"], hash_entry=hash_entry) hash_entry_timestamp = rdfvalue.RDFDatetime.Now() self.db.WritePathInfos(client_id, [hash_entry_path_info]) result = self.db.FindPathInfoByPathID(client_id, objects.PathInfo.PathType.OS, objects.PathID(["foo"])) now = rdfvalue.RDFDatetime.Now() self.assertEqual(result.components, ["foo"]) self.assertTrue(result.HasField("stat_entry")) self.assertTrue(result.HasField("hash_entry")) self.assertEqual(result.stat_entry, stat_entry) self.assertEqual(result.hash_entry, hash_entry) self.assertGreater(result.last_stat_entry_timestamp, stat_entry_timestamp) self.assertLess(result.last_stat_entry_timestamp, hash_entry_timestamp) self.assertGreater(result.last_hash_entry_timestamp, hash_entry_timestamp) self.assertLess(result.last_hash_entry_timestamp, now)
def ReceiveFileHash(self, responses): """Add hash digest to tracker and check with filestore.""" # Support old clients which may not have the new client action in place yet. # TODO(user): Deprecate once all clients have the HashFile action. if not responses.success and responses.request.request.name == "HashFile": logging.debug( "HashFile action not available, falling back to FingerprintFile.") self.CallClient( server_stubs.FingerprintFile, responses.request.request.payload, next_state="ReceiveFileHash", request_data=responses.request_data) return index = responses.request_data["index"] if not responses.success: self.Log("Failed to hash file: %s", responses.status) self.state.pending_hashes.pop(index, None) # Report the error. self._FileFetchFailed(index, responses.request.request.name) return self.state.files_hashed += 1 response = responses.First() if response.HasField("hash"): hash_obj = response.hash else: # Deprecate this method of returning hashes. hash_obj = rdf_crypto.Hash() if len(response.results) < 1 or response.results[0]["name"] != "generic": self.Log("Failed to hash file: %s", self.state.indexed_pathspecs[index]) self.state.pending_hashes.pop(index, None) return result = response.results[0] try: for hash_type in ["md5", "sha1", "sha256"]: value = result.GetItem(hash_type) setattr(hash_obj, hash_type, value) except AttributeError: self.Log("Failed to hash file: %s", self.state.indexed_pathspecs[index]) self.state.pending_hashes.pop(index, None) return try: tracker = self.state.pending_hashes[index] except KeyError: # Hashing the file failed, but we did stat it. self._FileFetchFailed(index, responses.request.request.name) return tracker["hash_obj"] = hash_obj tracker["bytes_read"] = response.bytes_read self.state.files_hashed_since_check += 1 if self.state.files_hashed_since_check >= self.MIN_CALL_TO_FILE_STORE: self._CheckHashesWithFileStore()
def setUp(self): super(TestExportHuntResultsFilesAsArchive, self).setUp() path1 = "aff4:/C.0000000000000000/fs/os/foo/bar/hello1.txt" fd = aff4.FACTORY.Create(path1, "AFF4MemoryStream", token=self.token) fd.Write("hello1") fd.Set(fd.Schema.HASH, rdf_crypto.Hash(sha256=hashlib.sha256("hello1").digest())) fd.Close() path2 = u"aff4:/C.0000000000000000/fs/os/foo/bar/中国新闻网新闻中.txt" fd = aff4.FACTORY.Create(path2, "AFF4MemoryStream", token=self.token) fd.Write("hello2") fd.Set(fd.Schema.HASH, rdf_crypto.Hash(sha256=hashlib.sha256("hello2").digest())) fd.Close() self.paths = [path1, path2] with hunts.GRRHunt.StartHunt(hunt_name="GenericHunt", regex_rules=[ rdf_foreman.ForemanAttributeRegex( attribute_name="GRR client", attribute_regex="GRR") ], output_plugins=[], token=self.token) as hunt: self.hunt_urn = hunt.urn runner = hunt.GetRunner() runner.Start() with aff4.FACTORY.Create(runner.context.results_collection_urn, aff4_type="RDFValueCollection", mode="w", token=self.token) as collection: for path in self.paths: collection.Add( rdf_client.StatEntry( aff4path=path, pathspec=rdf_paths.PathSpec( path="fs/os/foo/bar/" + path.split("/")[-1], pathtype=rdf_paths.PathSpec.PathType.OS)))
def _RunFileFinderDownloadHello(self, upload, opts=None): action = rdf_file_finder.FileFinderAction.Download() action.download = opts upload.return_value = rdf_client.UploadedFile( bytes_uploaded=42, file_id="foo", hash=rdf_crypto.Hash()) hello_path = os.path.join(self.base_path, "hello.exe") return self._RunFileFinder([hello_path], action)
def Generate(self, collection, token=None): """Generates archive from a given collection. Iterates the collection and generates an archive by yielding contents of every referenced AFF4Stream. Args: collection: Iterable with items that point to aff4 paths. token: User's ACLToken. Yields: Binary chunks comprising the generated archive. """ hashes = set() for fd_urn_batch in utils.Grouper(self._ItemsToUrns(collection), self.BATCH_SIZE): for fd in aff4.FACTORY.MultiOpen(fd_urn_batch, token=token): self.total_files += 1 # Any file-like object with data in AFF4 should inherit AFF4Stream. if isinstance(fd, aff4.AFF4Stream): archive_path = os.path.join(self.prefix, *fd.urn.Split()) sha256_hash = fd.Get(fd.Schema.HASH, rdf_crypto.Hash()).sha256 if not sha256_hash: continue self.archived_files += 1 content_path = os.path.join(self.prefix, "hashes", str(sha256_hash)) if sha256_hash not in hashes: # Make sure size of the original file is passed. It's required # when output_writer is StreamingTarWriter. st = os.stat_result( (0644, 0, 0, 0, 0, 0, fd.size, 0, 0, 0)) try: for chunk in self.archive_generator.WriteFromFD( fd, content_path, st=st): yield chunk hashes.add(sha256_hash) except Exception: # pylint: disable=broad-except self.failed_files += 1 continue up_prefix = "../" * len(fd.urn.Split()) yield self.archive_generator.WriteSymlink( up_prefix + content_path, archive_path) for chunk in self._WriteDescription(): yield chunk yield self.archive_generator.Close()
def Handle(self, args, token=None): ValidateVfsPath(args.file_path) if args.timestamp: age = args.timestamp else: age = aff4.ALL_TIMES file_obj = aff4.FACTORY.Open( args.client_id.ToClientURN().Add(args.file_path), mode="r", age=age, token=token) if data_store.RelationalDBReadEnabled(category="vfs"): # These are not really "files" so they cannot be stored in the database # but they still can be queried so we need to return something. Sometimes # they contain a trailing slash so we need to take care of that. # # TODO(hanuszczak): Require VFS paths to be normalized so that trailing # slash is either forbidden or mandatory. if args.file_path.endswith("/"): args.file_path = args.file_path[:-1] if args.file_path in ["fs", "registry", "temp", "fs/os", "fs/tsk"]: api_file = ApiFile() api_file.name = api_file.path = args.file_path api_file.is_directory = True return ApiGetFileDetailsResult(file=api_file) path_type, components = rdf_objects.ParseCategorizedPath(args.file_path) # TODO(hanuszczak): The tests passed even without support for timestamp # filtering. The test suite should be probably improved in that regard. path_id = rdf_objects.PathID(components) path_info = data_store.REL_DB.FindPathInfoByPathID( str(args.client_id), path_type, path_id, timestamp=args.timestamp) if path_info: stat_entry = path_info.stat_entry hash_entry = path_info.hash_entry else: stat_entry = rdf_client.StatEntry() hash_entry = rdf_crypto.Hash() else: stat_entry = None hash_entry = None return ApiGetFileDetailsResult(file=ApiFile().InitFromAff4Object( file_obj, stat_entry=stat_entry, hash_entry=hash_entry, with_details=True))
def testFindPathInfoByPathIDTimestampHashEntry(self): client_id = self.InitializeClient() path_info = objects.PathInfo.OS(components=["foo"]) path_id = objects.PathID(["foo"]) path_info.hash_entry = rdf_crypto.Hash(md5=b"bar") self.db.WritePathInfos(client_id, [path_info]) bar_timestamp = rdfvalue.RDFDatetime.Now() path_info.hash_entry = rdf_crypto.Hash(md5=b"baz") self.db.WritePathInfos(client_id, [path_info]) baz_timestamp = rdfvalue.RDFDatetime.Now() path_info.hash_entry = rdf_crypto.Hash(md5=b"quux") self.db.WritePathInfos(client_id, [path_info]) quux_timestamp = rdfvalue.RDFDatetime.Now() bar_path_info = self.db.FindPathInfoByPathID( client_id, objects.PathInfo.PathType.OS, path_id, timestamp=bar_timestamp) self.assertEqual(bar_path_info.hash_entry.md5, b"bar") baz_path_info = self.db.FindPathInfoByPathID( client_id, objects.PathInfo.PathType.OS, path_id, timestamp=baz_timestamp) self.assertEqual(baz_path_info.hash_entry.md5, b"baz") quux_path_info = self.db.FindPathInfoByPathID( client_id, objects.PathInfo.PathType.OS, path_id, timestamp=quux_timestamp) self.assertEqual(quux_path_info.hash_entry.md5, b"quux")
def Run(self, args): hash_types = set() for t in args.tuples: for hash_name in t.hashers: hash_types.add(str(hash_name).lower()) with vfs.VFSOpen(args.pathspec, progress_callback=self.Progress) as file_obj: hashers, bytes_read = self.HashFile(hash_types, file_obj, args.max_filesize) self.SendReply( rdf_client.FingerprintResponse( pathspec=file_obj.pathspec, bytes_read=bytes_read, hash=rdf_crypto.Hash(**dict( (k, v.digest()) for k, v in hashers.iteritems()))))
def _CreateFile(self, path, content, hashing=False): with aff4.FACTORY.Create(path, aff4.AFF4MemoryStream, token=self.token) as fd: fd.Write(content) if hashing: digest = hashlib.sha256(content).digest() fd.Set(fd.Schema.HASH, rdf_crypto.Hash(sha256=digest)) if data_store.RelationalDBWriteEnabled(): client_id, vfs_path = path.Split(2) path_type, components = rdf_objects.ParseCategorizedPath( vfs_path) path_info = rdf_objects.PathInfo() path_info.path_type = path_type path_info.components = components path_info.hash_entry.sha256 = digest data_store.REL_DB.WritePathInfos(client_id, [path_info])
def testWritePathInfosHashAndStatEntry(self): client_id = self.InitializeClient() stat_entry = rdf_client.StatEntry(st_mode=1337) hash_entry = rdf_crypto.Hash(md5=hashlib.md5("foo").digest()) path_info = objects.PathInfo.OS(components=["foo", "bar", "baz"], stat_entry=stat_entry, hash_entry=hash_entry) self.db.WritePathInfos(client_id, [path_info]) result = self.db.FindPathInfoByPathID( client_id, objects.PathInfo.PathType.OS, objects.PathID(["foo", "bar", "baz"])) self.assertEqual(result.components, ["foo", "bar", "baz"]) self.assertTrue(result.HasField("stat_entry")) self.assertTrue(result.HasField("hash_entry")) self.assertEqual(result.stat_entry, stat_entry) self.assertEqual(result.hash_entry, hash_entry)
def testWritePathInfosHashEntry(self): client_id = self.InitializeClient() hash_entry = rdf_crypto.Hash() hash_entry.sha256 = hashlib.sha256("foo").digest() hash_entry.md5 = hashlib.md5("foo").digest() hash_entry.num_bytes = len("foo") path_info = objects.PathInfo.OS( components=["foo", "bar", "baz"], hash_entry=hash_entry) self.db.WritePathInfos(client_id, [path_info]) result = self.db.FindPathInfoByPathID(client_id, objects.PathInfo.PathType.OS, objects.PathID(["foo", "bar", "baz"])) self.assertEqual(result.components, ["foo", "bar", "baz"]) self.assertTrue(result.HasField("hash_entry")) self.assertFalse(result.HasField("stat_entry")) self.assertEqual(result.hash_entry.sha256, hashlib.sha256("foo").digest()) self.assertEqual(result.hash_entry.md5, hashlib.md5("foo").digest()) self.assertEqual(result.hash_entry.num_bytes, len("foo"))
def HashObject(self): return crypto.Hash(sha256=self.hashers["sha256"].digest(), sha1=self.hashers["sha1"].digest(), md5=self.hashers["md5"].digest())
def ReceiveFileHash(self, responses): """Add hash digest to tracker and check with filestore.""" # Support old clients which may not have the new client action in place yet. # TODO(user): Deprecate once all clients have the HashFile action. if not responses.success and responses.request.request.name == "HashFile": logging.debug( "HashFile action not available, falling back to FingerprintFile." ) self.CallClient("FingerprintFile", responses.request.request.payload, next_state="ReceiveFileHash", request_data=responses.request_data) return index = responses.request_data["index"] if not responses.success: self.Log("Failed to hash file: %s", responses.status) self.state.pending_hashes.pop(index, None) self.FileFetchFailed(responses.request.request.payload.pathspec, responses.request.request.name, request_data=responses.request_data) return self.state.files_hashed += 1 response = responses.First() if response.HasField("hash"): hash_obj = response.hash else: # Deprecate this method of returning hashes. hash_obj = rdf_crypto.Hash() if len(response.results ) < 1 or response.results[0]["name"] != "generic": self.Log("Failed to hash file: %s", self.state.indexed_pathspecs[index]) self.state.pending_hashes.pop(index, None) return result = response.results[0] try: for hash_type in ["md5", "sha1", "sha256"]: value = result.GetItem(hash_type) setattr(hash_obj, hash_type, value) except AttributeError: self.Log("Failed to hash file: %s", self.state.indexed_pathspecs[index]) self.state.pending_hashes.pop(index, None) return try: tracker = self.state.pending_hashes[index] except KeyError: # TODO(user): implement a test for this and handle the failure # gracefully: i.e. maybe we can continue with an empty StatEntry. self.Error( "Couldn't stat the file, but got the hash (%s): %s" % (utils.SmartStr(index), utils.SmartStr(response.pathspec))) return tracker.hash_obj = hash_obj tracker.bytes_read = response.bytes_read self.state.files_hashed_since_check += 1 if self.state.files_hashed_since_check >= self.MIN_CALL_TO_FILE_STORE: self._CheckHashesWithFileStore()
def Generate(self, collection, token=None): """Generates archive from a given collection. Iterates the collection and generates an archive by yielding contents of every referenced AFF4Stream. Args: collection: Iterable with items that point to aff4 paths. token: User's ACLToken. Yields: Binary chunks comprising the generated archive. """ hashes = set() for fd_urn_batch in utils.Grouper(self._ItemsToUrns(collection), self.BATCH_SIZE): fds_to_write = {} for fd in aff4.FACTORY.MultiOpen(fd_urn_batch, token=token): self.total_files += 1 if not self.predicate(fd): self.ignored_files.append(utils.SmartUnicode(fd.urn)) continue # Any file-like object with data in AFF4 should inherit AFF4Stream. if isinstance(fd, aff4.AFF4Stream): archive_path = os.path.join(self.prefix, *fd.urn.Split()) sha256_hash = fd.Get(fd.Schema.HASH, rdf_crypto.Hash()).sha256 if not sha256_hash: continue self.archived_files += 1 content_path = os.path.join(self.prefix, "hashes", str(sha256_hash)) if sha256_hash not in hashes: # Make sure size of the original file is passed. It's required # when output_writer is StreamingTarWriter. st = os.stat_result( (0644, 0, 0, 0, 0, 0, fd.size, 0, 0, 0)) fds_to_write[fd] = (content_path, st) hashes.add(sha256_hash) up_prefix = "../" * len(fd.urn.Split()) yield self.archive_generator.WriteSymlink( up_prefix + content_path, archive_path) if fds_to_write: prev_fd = None for fd, chunk, exception in aff4.AFF4Stream.MultiStream( fds_to_write): if exception: logging.exception(exception) self.archived_files -= 1 self.failed_files.append(utils.SmartUnicode(fd.urn)) continue if prev_fd != fd: if prev_fd: yield self.archive_generator.WriteFileFooter() prev_fd = fd content_path, st = fds_to_write[fd] yield self.archive_generator.WriteFileHeader( content_path, st=st) yield self.archive_generator.WriteFileChunk(chunk) if self.archive_generator.is_file_write_in_progress: yield self.archive_generator.WriteFileFooter() for chunk in self._WriteDescription(): yield chunk yield self.archive_generator.Close()
def testWritePathInfosMetadataTimestampUpdate(self): now = rdfvalue.RDFDatetime.Now client_id = self.InitializeClient() timestamp_0 = now() self.db.WritePathInfos(client_id, [objects.PathInfo.OS(components=["foo"])]) result = self.db.FindPathInfoByPathID(client_id, objects.PathInfo.PathType.OS, objects.PathID(["foo"])) self.assertEqual(result.components, ["foo"]) self.assertGreater(result.timestamp, timestamp_0) self.assertLess(result.timestamp, now()) self.assertEqual(result.last_stat_entry_timestamp, None) self.assertEqual(result.last_hash_entry_timestamp, None) timestamp_1 = now() stat_entry = rdf_client.StatEntry(st_mode=42) self.db.WritePathInfos( client_id, [objects.PathInfo.OS(components=["foo"], stat_entry=stat_entry)]) result = self.db.FindPathInfoByPathID(client_id, objects.PathInfo.PathType.OS, objects.PathID(["foo"])) self.assertEqual(result.components, ["foo"]) self.assertEqual(result.stat_entry.st_mode, 42) self.assertGreater(result.timestamp, timestamp_1) self.assertLess(result.timestamp, now()) self.assertGreater(result.last_stat_entry_timestamp, timestamp_1) self.assertLess(result.last_stat_entry_timestamp, now()) timestamp_2 = now() hash_entry = rdf_crypto.Hash(md5=b"foo") self.db.WritePathInfos( client_id, [objects.PathInfo.OS(components=["foo"], hash_entry=hash_entry)]) result = self.db.FindPathInfoByPathID(client_id, objects.PathInfo.PathType.OS, objects.PathID(["foo"])) self.assertEqual(result.components, ["foo"]) self.assertEqual(result.hash_entry.md5, b"foo") self.assertGreater(result.timestamp, timestamp_2) self.assertLess(result.timestamp, now()) self.assertGreater(result.last_hash_entry_timestamp, timestamp_2) self.assertLess(result.last_hash_entry_timestamp, now()) timestamp_3 = now() self.db.WritePathInfos( client_id, [objects.PathInfo.OS(components=["foo"], directory=True)]) result = self.db.FindPathInfoByPathID(client_id, objects.PathInfo.PathType.OS, objects.PathID(["foo"])) self.assertEqual(result.components, ["foo"]) self.assertEqual(result.stat_entry.st_mode, 42) self.assertEqual(result.hash_entry.md5, b"foo") self.assertTrue(result.directory) self.assertGreater(result.timestamp, timestamp_3) self.assertLess(result.timestamp, now()) self.assertGreater(result.last_stat_entry_timestamp, timestamp_1) self.assertLess(result.last_stat_entry_timestamp, timestamp_2) self.assertGreater(result.last_hash_entry_timestamp, timestamp_2) self.assertLess(result.last_hash_entry_timestamp, timestamp_3) timestamp_4 = now() path_info = objects.PathInfo.OS(components=["foo"]) path_info.stat_entry.st_mode = 108 path_info.hash_entry.sha256 = b"norf" self.db.WritePathInfos(client_id, [path_info]) result = self.db.FindPathInfoByPathID(client_id, objects.PathInfo.PathType.OS, objects.PathID(["foo"])) self.assertEqual(result.components, ["foo"]) self.assertEqual(result.stat_entry.st_mode, 108) self.assertEqual(result.hash_entry.sha256, b"norf") self.assertGreater(result.timestamp, timestamp_4) self.assertGreater(result.last_stat_entry_timestamp, timestamp_4) self.assertGreater(result.last_hash_entry_timestamp, timestamp_4) self.assertLess(result.timestamp, now()) self.assertLess(result.last_stat_entry_timestamp, now()) self.assertLess(result.last_hash_entry_timestamp, now())