def testDownloadAndHashActionSizeLimitWithHashTruncatedPolicy(self): image_path = os.path.join(self.base_path, "test_img.dd") # Read a bit more than a typical chunk (600 * 1024). expected_size = 750 * 1024 hash_action = rdf_file_finder.FileFinderAction.Hash( max_size=expected_size, oversized_file_policy="HASH_TRUNCATED") download_action = rdf_file_finder.FileFinderAction.Download( max_size=expected_size, oversized_file_policy="HASH_TRUNCATED") for action in [hash_action, download_action]: results = self.RunFlow(paths=[image_path], action=action) urn = rdfvalue.RDFURN(self.client_id).Add("/fs/os").Add(image_path) vfs_file = aff4.FACTORY.Open(urn, token=self.token) # Make sure just a VFSFile got written. self.assertTrue(isinstance(vfs_file, aff4_grr.VFSFile)) expected_data = open(image_path, "rb").read(expected_size) d = hashlib.sha1() d.update(expected_data) expected_hash = d.hexdigest() hash_entry = data_store_utils.GetFileHashEntry(vfs_file) self.assertEqual(hash_entry.sha1, expected_hash) flow_reply = results[0] self.assertEqual(flow_reply.hash_entry.sha1, expected_hash)
def testDownloadActionSizeLimitWithDownloadTruncatedPolicy(self): image_path = os.path.join(self.base_path, "test_img.dd") # Read a bit more than a typical chunk (600 * 1024). expected_size = 750 * 1024 action = rdf_file_finder.FileFinderAction.Download( max_size=expected_size, oversized_file_policy="DOWNLOAD_TRUNCATED") results = self.RunFlow(paths=[image_path], action=action) urn = rdfvalue.RDFURN(self.client_id).Add("/fs/os").Add(image_path) blobimage = aff4.FACTORY.Open(urn, token=self.token) # Make sure a VFSBlobImage got written. self.assertTrue(isinstance(blobimage, aff4_grr.VFSBlobImage)) self.assertEqual(len(blobimage), expected_size) data = blobimage.read(100 * expected_size) self.assertEqual(len(data), expected_size) expected_data = open(image_path, "rb").read(expected_size) self.assertEqual(data, expected_data) hash_obj = data_store_utils.GetFileHashEntry(blobimage) d = hashlib.sha1() d.update(expected_data) expected_hash = d.hexdigest() self.assertEqual(hash_obj.sha1, expected_hash) flow_reply = results[0] self.assertEqual(flow_reply.hash_entry.sha1, expected_hash)
def testMultiGetFileSizeLimit(self): client_mock = action_mocks.MultiGetFileClientMock() image_path = os.path.join(self.base_path, "test_img.dd") pathspec = rdf_paths.PathSpec(pathtype=rdf_paths.PathSpec.PathType.OS, path=image_path) # Read a bit more than one chunk (600 * 1024). expected_size = 750 * 1024 args = transfer.MultiGetFileArgs(pathspecs=[pathspec], file_size=expected_size) flow_test_lib.TestFlowHelper(transfer.MultiGetFile.__name__, client_mock, token=self.token, client_id=self.client_id, args=args) urn = pathspec.AFF4Path(self.client_id) blobimage = aff4.FACTORY.Open(urn, token=self.token) # Make sure a VFSBlobImage got written. self.assertTrue(isinstance(blobimage, aff4_grr.VFSBlobImage)) self.assertEqual(len(blobimage), expected_size) data = blobimage.read(100 * expected_size) self.assertEqual(len(data), expected_size) expected_data = open(image_path, "rb").read(expected_size) self.assertEqual(data, expected_data) hash_obj = data_store_utils.GetFileHashEntry(blobimage) d = hashlib.sha1() d.update(expected_data) expected_hash = d.hexdigest() self.assertEqual(hash_obj.sha1, expected_hash)
def testClientFileFinderUpload(self): paths = [os.path.join(self.base_path, "{**,.}/*.plist")] action = rdf_file_finder.FileFinderAction.Download() session_id = self._RunClientFileFinder(paths, action) collection = flow.GRRFlow.ResultCollectionForFID(session_id) results = list(collection) self.assertEqual(len(results), 4) relpaths = [ os.path.relpath(p.stat_entry.pathspec.path, self.base_path) for p in results ] self.assertItemsEqual(relpaths, [ "History.plist", "History.xml.plist", "test.plist", "parser_test/com.google.code.grr.plist" ]) for r in results: aff4_obj = aff4.FACTORY.Open(r.stat_entry.pathspec.AFF4Path( self.client_id), token=self.token) data = open(r.stat_entry.pathspec.path, "rb").read() self.assertEqual(aff4_obj.Read(100), data[:100]) hash_obj = data_store_utils.GetFileHashEntry(aff4_obj) self.assertEqual(hash_obj.md5, hashlib.md5(data).hexdigest()) self.assertEqual(hash_obj.sha1, hashlib.sha1(data).hexdigest()) self.assertEqual(hash_obj.sha256, hashlib.sha256(data).hexdigest())
def testClientFileFinderFilestoreIntegration(self): paths = [os.path.join(self.base_path, "{**,.}/*.plist")] action = rdf_file_finder.FileFinderAction.Download() client_ids = self.SetupClients(2) session_ids = { c: self._RunClientFileFinder(paths, action, client_id=c) for c in client_ids } collections = { c: flow.GRRFlow.ResultCollectionForFID(session_id) for c, session_id in iteritems(session_ids) } for client_id, collection in iteritems(collections): results = list(collection) self.assertEqual(len(results), 5) relpaths = [ os.path.relpath(p.stat_entry.pathspec.path, self.base_path) for p in results ] self.assertItemsEqual(relpaths, [ "History.plist", "History.xml.plist", "test.plist", "parser_test/com.google.code.grr.plist", "parser_test/InstallHistory.plist" ]) for r in results: aff4_obj = aff4.FACTORY.Open( r.stat_entry.pathspec.AFF4Path(client_id), token=self.token) # When files are uploaded to the server they are stored as VFSBlobImage. self.assertIsInstance(aff4_obj, aff4_grr.VFSBlobImage) # There is a STAT entry. self.assertTrue(aff4_obj.Get(aff4_obj.Schema.STAT)) # Make sure the HashFileStore has references to this file for # all hashes. hash_entry = data_store_utils.GetFileHashEntry(aff4_obj) fs = filestore.HashFileStore md5_refs = list( fs.GetReferencesMD5(hash_entry.md5, token=self.token)) self.assertIn(aff4_obj.urn, md5_refs) sha1_refs = list( fs.GetReferencesSHA1(hash_entry.sha1, token=self.token)) self.assertIn(aff4_obj.urn, sha1_refs) sha256_refs = list( fs.GetReferencesSHA256(hash_entry.sha256, token=self.token)) self.assertIn(aff4_obj.urn, sha256_refs) # Open the file inside the file store. urn, _ = fs(None, token=self.token).CheckHashes([hash_entry]).next() filestore_fd = aff4.FACTORY.Open(urn, token=self.token) # This is a VFSBlobImage too. self.assertIsInstance(filestore_fd, aff4_grr.VFSBlobImage) # No STAT object attached. self.assertFalse(filestore_fd.Get(filestore_fd.Schema.STAT))
def testMultiGetFileSizeLimit(self): client_mock = action_mocks.MultiGetFileClientMock() image_path = os.path.join(self.base_path, "test_img.dd") pathspec = rdf_paths.PathSpec(pathtype=rdf_paths.PathSpec.PathType.OS, path=image_path) # Read a bit more than one chunk (600 * 1024). expected_size = 750 * 1024 args = transfer.MultiGetFileArgs(pathspecs=[pathspec], file_size=expected_size) flow_test_lib.TestFlowHelper(transfer.MultiGetFile.__name__, client_mock, token=self.token, client_id=self.client_id, args=args) expected_data = open(image_path, "rb").read(expected_size) if data_store.RelationalDBReadEnabled(): cp = db.ClientPath.FromPathSpec(self.client_id.Basename(), pathspec) fd_rel_db = file_store.OpenFile(cp) self.assertEqual(fd_rel_db.size, expected_size) data = fd_rel_db.read(2 * expected_size) self.assertLen(data, expected_size) d = hashlib.sha256() d.update(expected_data) self.assertEqual(fd_rel_db.hash_id.AsBytes(), d.digest()) # Check that SHA256 hash of the file matches the contents # hash and that MD5 and SHA1 are set. history = data_store.REL_DB.ReadPathInfoHistory( cp.client_id, cp.path_type, cp.components) self.assertEqual(history[-1].hash_entry.sha256, fd_rel_db.hash_id.AsBytes()) self.assertIsNotNone(history[-1].hash_entry.sha1) self.assertIsNotNone(history[-1].hash_entry.md5) else: urn = pathspec.AFF4Path(self.client_id) blobimage = aff4.FACTORY.Open(urn, token=self.token) # Make sure a VFSBlobImage got written. self.assertIsInstance(blobimage, aff4_grr.VFSBlobImage) self.assertLen(blobimage, expected_size) data = blobimage.read(100 * expected_size) self.assertLen(data, expected_size) self.assertEqual(data, expected_data) hash_obj = data_store_utils.GetFileHashEntry(blobimage) d = hashlib.sha1() d.update(expected_data) self.assertEqual(hash_obj.sha1, d.digest())
def _HashFile(self, fd): """Look for the required hashes in the file.""" hashes = data_store_utils.GetFileHashEntry(fd) if hashes: found_all = True for fingerprint_type, hash_types in iteritems(self.HASH_TYPES): for hash_type in hash_types: if fingerprint_type == "pecoff": hash_type = "pecoff_%s" % hash_type if not hashes.HasField(hash_type): found_all = False break if not found_all: break if found_all: return hashes fingerprinter = fingerprint.Fingerprinter(fd) if "generic" in self.HASH_TYPES: hashers = self._GetHashers(self.HASH_TYPES["generic"]) fingerprinter.EvalGeneric(hashers=hashers) if "pecoff" in self.HASH_TYPES: hashers = self._GetHashers(self.HASH_TYPES["pecoff"]) if hashers: fingerprinter.EvalPecoff(hashers=hashers) if not hashes: hashes = fd.Schema.HASH() for result in fingerprinter.HashIt(): fingerprint_type = result["name"] for hash_type in self.HASH_TYPES[fingerprint_type]: if hash_type not in result: continue if hash_type == "SignedData": # There can be several certs in the same file. for signed_data in result[hash_type]: hashes.signed_data.Append(revision=signed_data[0], cert_type=signed_data[1], certificate=signed_data[2]) continue # Set the hashes in the original object if fingerprint_type == "generic": hashes.Set(hash_type, result[hash_type]) elif fingerprint_type == "pecoff": hashes.Set("pecoff_%s" % hash_type, result[hash_type]) else: logging.error("Unknown fingerprint_type %s.", fingerprint_type) return hashes
def testClientFileFinderUpload(self): paths = [os.path.join(self.base_path, "{**,.}/*.plist")] action = rdf_file_finder.FileFinderAction.Download() session_id = self._RunClientFileFinder(paths, action) results = flow_test_lib.GetFlowResults(self.client_id, session_id) self.assertLen(results, 5) relpaths = [ os.path.relpath(p.stat_entry.pathspec.path, self.base_path) for p in results ] self.assertCountEqual(relpaths, [ "History.plist", "History.xml.plist", "test.plist", "parser_test/com.google.code.grr.plist", "parser_test/InstallHistory.plist" ]) for r in results: data = open(r.stat_entry.pathspec.path, "rb").read() if data_store.AFF4Enabled(): aff4_obj = aff4.FACTORY.Open(r.stat_entry.pathspec.AFF4Path( self.client_id), token=self.token) self.assertEqual(aff4_obj.Read(100), data[:100]) if not data_store.RelationalDBReadEnabled( category="filestore"): hash_obj = data_store_utils.GetFileHashEntry(aff4_obj) self.assertEqual(hash_obj.sha1, hashlib.sha1(data).digest()) self.assertEqual(hash_obj.sha256, hashlib.sha256(data).digest()) self.assertEqual(hash_obj.md5, hashlib.md5(data).digest()) if data_store.RelationalDBReadEnabled(category="filestore"): fd = file_store.OpenFile( db.ClientPath.FromPathSpec(self.client_id.Basename(), r.stat_entry.pathspec)) self.assertEqual(fd.read(100), data[:100]) self.assertEqual(fd.hash_id.AsBytes(), hashlib.sha256(data).digest())
def testDownloadAndHashActionSizeLimitWithHashTruncatedPolicy(self): image_path = os.path.join(self.base_path, "test_img.dd") # Read a bit more than a typical chunk (600 * 1024). expected_size = 750 * 1024 with io.open(image_path, "rb") as fd: expected_data = fd.read(expected_size) d = hashlib.sha1() d.update(expected_data) expected_hash = d.hexdigest() hash_action = rdf_file_finder.FileFinderAction.Hash( max_size=expected_size, oversized_file_policy="HASH_TRUNCATED") download_action = rdf_file_finder.FileFinderAction.Download( max_size=expected_size, oversized_file_policy="HASH_TRUNCATED") for action in [hash_action, download_action]: results = self.RunFlow(paths=[image_path], action=action) if data_store.RelationalDBReadEnabled("vfs"): with self.assertRaises(file_store.FileHasNoContentError): self._ReadTestFile( ["test_img.dd"], path_type=rdf_objects.PathInfo.PathType.OS) path_info = self._ReadTestPathInfo( ["test_img.dd"], path_type=rdf_objects.PathInfo.PathType.OS) self.assertEqual(path_info.hash_entry.sha1, expected_hash) self.assertEqual(path_info.hash_entry.num_bytes, expected_size) else: urn = rdfvalue.RDFURN( self.client_id).Add("/fs/os").Add(image_path) vfs_file = aff4.FACTORY.Open(urn, token=self.token) # Make sure just a VFSFile got written. self.assertIsInstance(vfs_file, aff4_grr.VFSFile) hash_entry = data_store_utils.GetFileHashEntry(vfs_file) self.assertEqual(hash_entry.sha1, expected_hash) flow_reply = results[0] self.assertEqual(flow_reply.hash_entry.sha1, expected_hash)
def testDownloadActionSizeLimitWithDownloadTruncatedPolicy(self): image_path = os.path.join(self.base_path, "test_img.dd") # Read a bit more than a typical chunk (600 * 1024). expected_size = 750 * 1024 action = rdf_file_finder.FileFinderAction.Download( max_size=expected_size, oversized_file_policy="DOWNLOAD_TRUNCATED") results = self.RunFlow(paths=[image_path], action=action) with io.open(image_path, "rb") as fd: expected_data = fd.read(expected_size) d = hashlib.sha1() d.update(expected_data) expected_hash = d.hexdigest() if data_store.RelationalDBReadEnabled("vfs"): data = self._ReadTestFile( ["test_img.dd"], path_type=rdf_objects.PathInfo.PathType.OS) self.assertEqual(data, expected_data) path_info = self._ReadTestPathInfo( ["test_img.dd"], path_type=rdf_objects.PathInfo.PathType.OS) self.assertEqual(path_info.hash_entry.sha1, expected_hash) self.assertEqual(path_info.hash_entry.num_bytes, expected_size) else: urn = rdfvalue.RDFURN(self.client_id).Add("/fs/os").Add(image_path) blobimage = aff4.FACTORY.Open(urn, token=self.token) # Make sure a VFSBlobImage got written. self.assertIsInstance(blobimage, aff4_grr.VFSBlobImage) self.assertLen(blobimage, expected_size) data = blobimage.read(100 * expected_size) self.assertEqual(data, expected_data) hash_obj = data_store_utils.GetFileHashEntry(blobimage) self.assertEqual(hash_obj.sha1, expected_hash) flow_reply = results[0] self.assertEqual(flow_reply.hash_entry.sha1, expected_hash)