def testMultiWriteHistoryTwoPaths(self): client_id = self.InitializeClient() path_info_foo = rdf_objects.PathInfo.OS( components=["foo"], timestamp=rdfvalue.RDFDatetime.FromHumanReadable("2010-10-10")) path_info_bar = rdf_objects.PathInfo.OS( components=["bar"], timestamp=rdfvalue.RDFDatetime.FromHumanReadable("2011-11-11")) self.db.WritePathInfos(client_id, [path_info_foo, path_info_bar]) hash_entries = { path_info_foo: rdf_crypto.Hash(md5=b"foo"), path_info_bar: rdf_crypto.Hash(md5=b"bar"), } self.db.MultiWritePathHistory(client_id, {}, hash_entries) path_info = self.db.ReadPathInfo(client_id, rdf_objects.PathInfo.PathType.OS, components=("foo", )) self.assertEqual(path_info.hash_entry.md5, b"foo") self.assertEqual(path_info.last_hash_entry_timestamp, rdfvalue.RDFDatetime.FromHumanReadable("2010-10-10")) path_info = self.db.ReadPathInfo(client_id, rdf_objects.PathInfo.PathType.OS, components=("bar", )) self.assertEqual(path_info.hash_entry.md5, b"bar") self.assertEqual(path_info.last_hash_entry_timestamp, rdfvalue.RDFDatetime.FromHumanReadable("2011-11-11"))
def testReadPathInfoTimestampHashEntry(self): client_id = self.InitializeClient() path_info = rdf_objects.PathInfo.OS(components=["foo"]) path_info.hash_entry = rdf_crypto.Hash(md5=b"bar") self.db.WritePathInfos(client_id, [path_info]) bar_timestamp = rdfvalue.RDFDatetime.Now() path_info.hash_entry = rdf_crypto.Hash(md5=b"baz") self.db.WritePathInfos(client_id, [path_info]) baz_timestamp = rdfvalue.RDFDatetime.Now() path_info.hash_entry = rdf_crypto.Hash(md5=b"quux") self.db.WritePathInfos(client_id, [path_info]) quux_timestamp = rdfvalue.RDFDatetime.Now() bar_path_info = self.db.ReadPathInfo(client_id, rdf_objects.PathInfo.PathType.OS, components=("foo", ), timestamp=bar_timestamp) self.assertEqual(bar_path_info.hash_entry.md5, b"bar") baz_path_info = self.db.ReadPathInfo(client_id, rdf_objects.PathInfo.PathType.OS, components=("foo", ), timestamp=baz_timestamp) self.assertEqual(baz_path_info.hash_entry.md5, b"baz") quux_path_info = self.db.ReadPathInfo(client_id, rdf_objects.PathInfo.PathType.OS, components=("foo", ), timestamp=quux_timestamp) self.assertEqual(quux_path_info.hash_entry.md5, b"quux")
def testMigrateHistory(self): datetime = rdfvalue.RDFDatetime.FromHumanReadable client_urn = self.SetupClient(0) file_urn = client_urn.Add("fs/os").Add("foo") with test_lib.FakeTime(datetime("2009-09-09")): with self._Aff4Open(file_urn) as filedesc: filedesc.Set(filedesc.Schema.STAT, rdf_client_fs.StatEntry(st_size=108)) with test_lib.FakeTime(datetime("2010-10-10")): with self._Aff4Open(file_urn) as filedesc: filedesc.Set(filedesc.Schema.STAT, rdf_client_fs.StatEntry(st_size=101)) filedesc.Set(filedesc.Schema.HASH, rdf_crypto.Hash(sha256=b"quux")) with test_lib.FakeTime(datetime("2011-11-11")): with self._Aff4Open(file_urn) as filedesc: filedesc.Set(filedesc.Schema.HASH, rdf_crypto.Hash(md5=b"norf")) with test_lib.FakeTime(datetime("2012-12-12")): with self._Aff4Open(file_urn) as filedesc: filedesc.Set(filedesc.Schema.STAT, rdf_client_fs.StatEntry(st_size=42)) filedesc.Set(filedesc.Schema.HASH, rdf_crypto.Hash(md5=b"thud")) result = self._RunFlow(client_urn) self.assertEqual(result, []) path_info = data_store.REL_DB.ReadPathInfo( client_id=client_urn.Basename(), path_type=rdf_objects.PathInfo.PathType.OS, components=("foo",), timestamp=datetime("2009-09-09")) self.assertEqual(path_info.stat_entry.st_size, 108) self.assertFalse(path_info.hash_entry.sha256) path_info = data_store.REL_DB.ReadPathInfo( client_id=client_urn.Basename(), path_type=rdf_objects.PathInfo.PathType.OS, components=("foo",), timestamp=datetime("2010-10-10")) self.assertEqual(path_info.stat_entry.st_size, 101) self.assertEqual(path_info.hash_entry.sha256, b"quux") path_info = data_store.REL_DB.ReadPathInfo( client_id=client_urn.Basename(), path_type=rdf_objects.PathInfo.PathType.OS, components=("foo",), timestamp=datetime("2011-11-11")) self.assertEqual(path_info.stat_entry.st_size, 101) self.assertEqual(path_info.hash_entry.md5, b"norf") path_info = data_store.REL_DB.ReadPathInfo( client_id=client_urn.Basename(), path_type=rdf_objects.PathInfo.PathType.OS, components=("foo",), timestamp=datetime("2012-12-12")) self.assertEqual(path_info.stat_entry.st_size, 42) self.assertEqual(path_info.hash_entry.md5, b"thud")
def testReadPathInfoTimestampStatAndHashEntry(self): client_id = self.InitializeClient() path_info = rdf_objects.PathInfo.OS(components=["foo"]) path_info.stat_entry = rdf_client.StatEntry(st_mode=42) path_info.hash_entry = None self.db.WritePathInfos(client_id, [path_info]) timestamp_1 = rdfvalue.RDFDatetime.Now() path_info.stat_entry = None path_info.hash_entry = rdf_crypto.Hash(md5=b"quux") self.db.WritePathInfos(client_id, [path_info]) timestamp_2 = rdfvalue.RDFDatetime.Now() path_info.stat_entry = rdf_client.StatEntry(st_mode=1337) path_info.hash_entry = None self.db.WritePathInfos(client_id, [path_info]) timestamp_3 = rdfvalue.RDFDatetime.Now() path_info.stat_entry = rdf_client.StatEntry(st_mode=4815162342) path_info.hash_entry = rdf_crypto.Hash(md5=b"norf") self.db.WritePathInfos(client_id, [path_info]) timestamp_4 = rdfvalue.RDFDatetime.Now() path_info_1 = self.db.ReadPathInfo( client_id, rdf_objects.PathInfo.PathType.OS, components=("foo",), timestamp=timestamp_1) self.assertEqual(path_info_1.stat_entry.st_mode, 42) self.assertFalse(path_info_1.HasField("hash_entry")) path_info_2 = self.db.ReadPathInfo( client_id, rdf_objects.PathInfo.PathType.OS, components=("foo",), timestamp=timestamp_2) self.assertEqual(path_info_2.stat_entry.st_mode, 42) self.assertEqual(path_info_2.hash_entry.md5, b"quux") path_info_3 = self.db.ReadPathInfo( client_id, rdf_objects.PathInfo.PathType.OS, components=("foo",), timestamp=timestamp_3) self.assertEqual(path_info_3.stat_entry.st_mode, 1337) self.assertEqual(path_info_3.hash_entry.md5, b"quux") path_info_4 = self.db.ReadPathInfo( client_id, rdf_objects.PathInfo.PathType.OS, components=("foo",), timestamp=timestamp_4) self.assertEqual(path_info_4.stat_entry.st_mode, 4815162342) self.assertEqual(path_info_4.hash_entry.md5, b"norf")
def testMultiWriteHistoryDoesNotAllowOverridingHash(self): datetime = rdfvalue.RDFDatetime.FromHumanReadable client_id = self.InitializeClient() path_info = rdf_objects.PathInfo.OS(components=("foo", "bar", "baz")) self.db.WritePathInfos(client_id, path_info) path_info.timestamp = datetime("2002-02-02") hash_entry = rdf_crypto.Hash(md5=b"quux") self.db.MultiWritePathHistory(client_id, {}, {path_info: hash_entry}) with self.assertRaises(db.Error): hash_entry = rdf_crypto.Hash(sha256=b"norf") self.db.MultiWritePathHistory(client_id, {}, {path_info: hash_entry})
def _CreateFile(self, path, content, hashing=False, aff4_type=aff4.AFF4MemoryStream): if hashing: digest = hashlib.sha256(content).digest() else: digest = None if data_store.RelationalDBReadEnabled("filestore"): self.assertTrue(data_store.RelationalDBWriteEnabled()) self.assertTrue(hashing) else: with aff4.FACTORY.Create(path, aff4_type, token=self.token) as fd: fd.Write(content) if digest: fd.Set(fd.Schema.HASH, rdf_crypto.Hash(sha256=digest)) if data_store.RelationalDBWriteEnabled() and hashing: client_id, vfs_path = path.Split(2) path_type, components = rdf_objects.ParseCategorizedPath(vfs_path) path_info = rdf_objects.PathInfo() path_info.path_type = path_type path_info.components = components blob_id = rdf_objects.BlobID.FromBytes(digest) data_store.BLOBS.WriteBlobs({blob_id: content}) hash_id = file_store.AddFileWithUnknownHash([blob_id]) path_info.hash_entry.sha256 = hash_id.AsBytes() data_store.REL_DB.WritePathInfos(client_id, [path_info])
def testWritePathInfoHashAndStatEntrySeparateWrites(self): client_id = self.InitializeClient() stat_entry = rdf_client.StatEntry(st_mode=1337) stat_entry_path_info = rdf_objects.PathInfo.OS(components=["foo"], stat_entry=stat_entry) stat_entry_timestamp = rdfvalue.RDFDatetime.Now() self.db.WritePathInfos(client_id, [stat_entry_path_info]) hash_entry = rdf_crypto.Hash(sha256=hashlib.sha256("foo").digest()) hash_entry_path_info = rdf_objects.PathInfo.OS(components=["foo"], hash_entry=hash_entry) hash_entry_timestamp = rdfvalue.RDFDatetime.Now() self.db.WritePathInfos(client_id, [hash_entry_path_info]) result = self.db.ReadPathInfo(client_id, rdf_objects.PathInfo.PathType.OS, components=("foo", )) now = rdfvalue.RDFDatetime.Now() self.assertEqual(result.components, ["foo"]) self.assertTrue(result.HasField("stat_entry")) self.assertTrue(result.HasField("hash_entry")) self.assertEqual(result.stat_entry, stat_entry) self.assertEqual(result.hash_entry, hash_entry) self.assertGreater(result.last_stat_entry_timestamp, stat_entry_timestamp) self.assertLess(result.last_stat_entry_timestamp, hash_entry_timestamp) self.assertGreater(result.last_hash_entry_timestamp, hash_entry_timestamp) self.assertLess(result.last_hash_entry_timestamp, now)
def SetupTestTimeline(self): client_id = self.SetupClient(0) fixture_test_lib.ClientFixture(client_id) # Choose some directory with pathspec in the ClientFixture. self.category_path = u"fs/os" self.folder_path = self.category_path + u"/Users/中国新闻网新闻中/Shared" self.file_path = self.folder_path + u"/a.txt" for i in range(0, 5): with test_lib.FakeTime(i): stat_entry = rdf_client_fs.StatEntry() stat_entry.st_mtime = rdfvalue.RDFDatetimeSeconds.Now() stat_entry.pathspec.path = self.file_path[len(self.category_path):] stat_entry.pathspec.pathtype = rdf_paths.PathSpec.PathType.OS sha256 = ( "0e8dc93e150021bb4752029ebbff51394aa36f069cf19901578e4f06017acdb5") hash_entry = rdf_crypto.Hash(sha256=binascii.unhexlify(sha256)) self.SetupFileMetadata( client_id, self.file_path, stat_entry=stat_entry, hash_entry=hash_entry) return client_id
def GetHashObject(self): """Returns a `Hash` object with appropriate fields filled-in.""" hash_object = rdf_crypto.Hash() hash_object.num_bytes = self._bytes_read for algorithm in self._hashers: setattr(hash_object, algorithm, self._hashers[algorithm].digest()) return hash_object
def Start(self): hash_result = rdf_crypto.Hash( sha256=("9e8dc93e150021bb4752029ebbff51394aa36f069cf19901578" "e4f06017acdb5").decode("hex"), sha1="6dd6bee591dfcb6d75eb705405302c3eab65e21a".decode("hex"), md5="8b0a15eefe63fd41f8dc9dee01c5cf9a".decode("hex")) self.SendReply(hash_result)
def SetupTestTimeline(self): client_id = self.SetupClient(0) fixture_test_lib.ClientFixture(client_id, token=self.token) # Choose some directory with pathspec in the ClientFixture. self.category_path = "fs/os" self.folder_path = self.category_path + "/Users/中国新闻网新闻中/Shared" self.file_path = self.folder_path + "/a.txt" file_urn = client_id.Add(self.file_path) for i in range(0, 5): with test_lib.FakeTime(i): stat_entry = rdf_client.StatEntry() stat_entry.st_mtime = rdfvalue.RDFDatetimeSeconds.Now() stat_entry.pathspec.path = self.file_path[len(self.category_path):] stat_entry.pathspec.pathtype = rdf_paths.PathSpec.PathType.OS hash_entry = rdf_crypto.Hash( sha256=("0e8dc93e150021bb4752029ebbff51394aa36f069cf19901578" "e4f06017acdb5").decode("hex")) with aff4.FACTORY.Create( file_urn, aff4_grr.VFSFile, mode="w", token=self.token) as fd: fd.Set(fd.Schema.STAT, stat_entry) fd.Set(fd.Schema.HASH, hash_entry) if data_store.RelationalDBWriteEnabled(): cid = client_id.Basename() path_info = rdf_objects.PathInfo.FromStatEntry(stat_entry) path_info.hash_entry = hash_entry data_store.REL_DB.WritePathInfos(cid, [path_info]) return client_id
def ReceiveFileHash(self, responses): """Add hash digest to tracker and check with filestore.""" # Support old clients which may not have the new client action in place yet. # TODO(user): Deprecate once all clients have the HashFile action. if not responses.success and responses.request.request.name == "HashFile": logging.debug( "HashFile action not available, falling back to FingerprintFile.") self.CallClient( server_stubs.FingerprintFile, responses.request.request.payload, next_state="ReceiveFileHash", request_data=responses.request_data) return index = responses.request_data["index"] if not responses.success: self.Log("Failed to hash file: %s", responses.status) self.state.pending_hashes.pop(index, None) # Report the error. self._FileFetchFailed(index, responses.request.request.name) return self.state.files_hashed += 1 response = responses.First() if response.HasField("hash"): hash_obj = response.hash else: # Deprecate this method of returning hashes. hash_obj = rdf_crypto.Hash() if len(response.results) < 1 or response.results[0]["name"] != "generic": self.Log("Failed to hash file: %s", self.state.indexed_pathspecs[index]) self.state.pending_hashes.pop(index, None) return result = response.results[0] try: for hash_type in ["md5", "sha1", "sha256"]: value = result.GetItem(hash_type) setattr(hash_obj, hash_type, value) except AttributeError: self.Log("Failed to hash file: %s", self.state.indexed_pathspecs[index]) self.state.pending_hashes.pop(index, None) return try: tracker = self.state.pending_hashes[index] except KeyError: # Hashing the file failed, but we did stat it. self._FileFetchFailed(index, responses.request.request.name) return tracker["hash_obj"] = hash_obj tracker["bytes_read"] = response.bytes_read self.state.files_hashed_since_check += 1 if self.state.files_hashed_since_check >= self.MIN_CALL_TO_FILE_STORE: self._CheckHashesWithFileStore()
def testInitPathInfosRetainsIndirectPathHistory(self): datetime = rdfvalue.RDFDatetime.FromHumanReadable client_id = self.InitializeClient() path_info = rdf_objects.PathInfo.OS(components=( "foo", "bar", )) self.db.WritePathInfos(client_id, [path_info]) parent_path_info = rdf_objects.PathInfo.OS(components=("foo",)) self.db.WritePathStatHistory( client_id, parent_path_info, { datetime("2015-05-05"): rdf_client.StatEntry(st_mode=1337), datetime("2016-06-06"): rdf_client.StatEntry(st_mode=8888), }) self.db.WritePathHashHistory( client_id, parent_path_info, { datetime("2016-06-06"): rdf_crypto.Hash(sha256=b"quux"), datetime("2017-07-07"): rdf_crypto.Hash(sha256=b"norf"), }) self.db.InitPathInfos(client_id, [path_info]) history = self.db.ReadPathInfoHistory( client_id, rdf_objects.PathInfo.PathType.OS, components=("foo",)) self.assertEqual(history[0].timestamp, datetime("2015-05-05")) self.assertEqual(history[0].stat_entry.st_mode, 1337) self.assertEqual(history[1].timestamp, datetime("2016-06-06")) self.assertEqual(history[1].stat_entry.st_mode, 8888) self.assertEqual(history[1].hash_entry.sha256, b"quux") self.assertEqual(history[2].timestamp, datetime("2017-07-07")) self.assertEqual(history[2].hash_entry.sha256, b"norf") self.db.InitPathInfos(client_id, [parent_path_info]) history = self.db.ReadPathInfoHistory( client_id, rdf_objects.PathInfo.PathType.OS, components=("foo",)) self.assertEqual(history, [])
def _ReceiveFileHash(self, responses): """Add hash digest to tracker and check with filestore.""" index = responses.request_data["index"] if not responses.success: self.Log("Failed to hash file: %s", responses.status) self.state.pending_hashes.pop(index, None) # Report the error. self._FileFetchFailed(index, status=responses.status) return self.state.files_hashed += 1 response = responses.First() if response.HasField("hash"): hash_obj = response.hash else: # Deprecate this method of returning hashes. hash_obj = rdf_crypto.Hash() if len(response.results) < 1 or response.results[0]["name"] != "generic": self.Log("Failed to hash file: %s", self.state.indexed_pathspecs[index]) self.state.pending_hashes.pop(index, None) return result = response.results[0] try: for hash_type in ["md5", "sha1", "sha256"]: value = result.GetItem(hash_type) setattr(hash_obj, hash_type, value) except AttributeError: self.Log("Failed to hash file: %s", self.state.indexed_pathspecs[index]) self.state.pending_hashes.pop(index, None) return try: tracker = self.state.pending_hashes[index] except KeyError: # Hashing the file failed, but we did stat it. self._FileFetchFailed(index, status=responses.status) return tracker["hash_obj"] = hash_obj tracker["bytes_read"] = response.bytes_read stat_entry = tracker["stat_entry"] request_data = self.state.request_data_list[index] self.ReceiveFetchedFileHash(stat_entry, hash_obj, request_data) if getattr(self.state, "stop_at_hash", False): self._RemoveCompletedPathspec(index) return self.state.files_hashed_since_check += 1 if self.state.files_hashed_since_check >= self.MIN_CALL_TO_FILE_STORE: self._CheckHashesWithFileStore()
def Handle(self, args, token=None): ValidateVfsPath(args.file_path) if args.timestamp: age = args.timestamp else: age = aff4.ALL_TIMES file_obj = aff4.FACTORY.Open(args.client_id.ToClientURN().Add( args.file_path), mode="r", age=age, token=token) if data_store.RelationalDBReadEnabled(category="vfs"): # These are not really "files" so they cannot be stored in the database # but they still can be queried so we need to return something. Sometimes # they contain a trailing slash so we need to take care of that. # # TODO(hanuszczak): Require VFS paths to be normalized so that trailing # slash is either forbidden or mandatory. if args.file_path.endswith("/"): args.file_path = args.file_path[:-1] if args.file_path in ["fs", "registry", "temp", "fs/os", "fs/tsk"]: api_file = ApiFile() api_file.name = api_file.path = args.file_path api_file.is_directory = True return ApiGetFileDetailsResult(file=api_file) path_type, components = rdf_objects.ParseCategorizedPath( args.file_path) # TODO(hanuszczak): The tests passed even without support for timestamp # filtering. The test suite should be probably improved in that regard. path_info = data_store.REL_DB.ReadPathInfo( str(args.client_id), path_type, components, timestamp=args.timestamp) if path_info: stat_entry = path_info.stat_entry hash_entry = path_info.hash_entry else: stat_entry = rdf_client.StatEntry() hash_entry = rdf_crypto.Hash() else: stat_entry = None hash_entry = None return ApiGetFileDetailsResult( file=ApiFile().InitFromAff4Object(file_obj, stat_entry=stat_entry, hash_entry=hash_entry, with_details=True))
def CreateFile(client_path, content=b"", token=None): """Creates a file in datastore-agnostic way. Args: client_path: A `ClientPath` instance specifying location of the file. content: A content to write to the file. token: A GRR token for accessing the data store. """ precondition.AssertType(client_path, db.ClientPath) precondition.AssertType(content, bytes) blob_id = rdf_objects.BlobID.FromBlobData(content) stat_entry = rdf_client_fs.StatEntry(pathspec=rdf_paths.PathSpec( pathtype=client_path.path_type, path="/".join(client_path.components)), st_mode=33206, st_size=len(content)) if data_store.RelationalDBEnabled(): data_store.BLOBS.WriteBlobs({blob_id: content}) blob_ref = rdf_objects.BlobReference(size=len(content), offset=0, blob_id=blob_id) hash_id = file_store.AddFileWithUnknownHash(client_path, [blob_ref]) path_info = rdf_objects.PathInfo() path_info.path_type = client_path.path_type path_info.components = client_path.components path_info.hash_entry.num_bytes = len(content) path_info.hash_entry.sha256 = hash_id.AsBytes() path_info.stat_entry = stat_entry data_store.REL_DB.WritePathInfos(client_path.client_id, [path_info]) if data_store.AFF4Enabled(): urn = aff4.ROOT_URN.Add(client_path.client_id).Add( client_path.vfs_path) with aff4.FACTORY.Create(urn, aff4_grr.VFSBlobImage, token=token) as filedesc: bio = io.BytesIO() bio.write(content) bio.seek(0) filedesc.AppendContent(bio) filedesc.Set(filedesc.Schema.STAT, stat_entry) filedesc.Set( filedesc.Schema.HASH, rdf_crypto.Hash(sha256=rdf_objects.SHA256HashID.FromData( content).AsBytes(), num_bytes=len(content))) filedesc.Set(filedesc.Schema.CONTENT_LAST, rdfvalue.RDFDatetime.Now())
def testMigrateClientHashHistory(self): datetime = rdfvalue.RDFDatetime.FromHumanReadable client_urn = self.SetupClient(0) file_urn = client_urn.Add("fs/os").Add("bar") with test_lib.FakeTime(datetime("2010-01-01")): with self._Aff4Open(file_urn) as fd: fd.Set(fd.Schema.HASH, rdf_crypto.Hash(md5=b"quux")) with test_lib.FakeTime(datetime("2020-01-01")): with self._Aff4Open(file_urn) as fd: fd.Set(fd.Schema.HASH, rdf_crypto.Hash(md5=b"norf")) with test_lib.FakeTime(datetime("2030-01-01")): with self._Aff4Open(file_urn) as fd: fd.Set(fd.Schema.HASH, rdf_crypto.Hash(md5=b"blargh")) migrator = data_migration.ClientVfsMigrator() migrator.MigrateClient(client_urn) path_info = data_store.REL_DB.ReadPathInfo( client_id=client_urn.Basename(), path_type=rdf_objects.PathInfo.PathType.OS, components=("bar", ), timestamp=datetime("2010-12-31")) self.assertEqual(path_info.hash_entry.md5, b"quux") path_info = data_store.REL_DB.ReadPathInfo( client_id=client_urn.Basename(), path_type=rdf_objects.PathInfo.PathType.OS, components=("bar", ), timestamp=datetime("2020-12-31")) self.assertEqual(path_info.hash_entry.md5, b"norf") path_info = data_store.REL_DB.ReadPathInfo( client_id=client_urn.Basename(), path_type=rdf_objects.PathInfo.PathType.OS, components=("bar", ), timestamp=datetime("2030-12-31")) self.assertEqual(path_info.hash_entry.md5, b"blargh")
def testWriteHashHistory(self): datetime = rdfvalue.RDFDatetime.FromHumanReadable client_id = self.InitializeClient() path_info = rdf_objects.PathInfo.OS(components=["foo", "bar"]) hash_entries = { datetime("2000-01-01"): rdf_crypto.Hash(md5=b"quux"), datetime("2000-02-01"): rdf_crypto.Hash(md5=b"norf"), datetime("2000-03-01"): rdf_crypto.Hash(md5=b"thud"), } self.db.WritePathInfos(client_id, [path_info]) self.db.WritePathHashHistory(client_id, path_info, hash_entries) path_info_1 = self.db.ReadPathInfo( client_id, rdf_objects.PathInfo.PathType.OS, components=("foo", "bar"), timestamp=datetime("2000-01-20")) self.assertEqual(path_info_1.hash_entry.md5, b"quux") path_info_2 = self.db.ReadPathInfo( client_id, rdf_objects.PathInfo.PathType.OS, components=("foo", "bar"), timestamp=datetime("2000-02-20")) self.assertEqual(path_info_2.hash_entry.md5, b"norf") path_info_3 = self.db.ReadPathInfo( client_id, rdf_objects.PathInfo.PathType.OS, components=("foo", "bar"), timestamp=datetime("2000-03-20")) self.assertEqual(path_info_3.hash_entry.md5, b"thud") path_info = self.db.ReadPathInfo( client_id, rdf_objects.PathInfo.PathType.OS, components=("foo", "bar")) self.assertEqual(path_info.hash_entry.md5, b"thud") self.assertEqual(path_info.last_hash_entry_timestamp, datetime("2000-03-01"))
def testInitPathInfosClearsHashHistory(self): datetime = rdfvalue.RDFDatetime.FromHumanReadable client_id = self.InitializeClient() path_info = rdf_objects.PathInfo.OS(components=("foo",)) self.db.WritePathInfos(client_id, [path_info]) self.db.WritePathHashHistory( client_id, path_info, { datetime("2011-01-01"): rdf_crypto.Hash(md5=b"quux"), datetime("2012-02-02"): rdf_crypto.Hash(md5=b"norf"), datetime("2013-03-03"): rdf_crypto.Hash(md5=b"thud"), }) self.db.InitPathInfos(client_id, [path_info]) history = self.db.ReadPathInfoHistory( client_id, rdf_objects.PathInfo.PathType.OS, components=("foo",)) self.assertEqual(history, [])
def testTimelineEntriesWithHashOnlyAreIgnoredOnBodyExport(self): client_id = self.SetupClient(1) hash_entry = rdf_crypto.Hash(sha256=b"quux") self.SetupFileMetadata( client_id, u"fs/os/foo/bar", stat_entry=None, hash_entry=hash_entry) args = vfs_plugin.ApiGetVfsTimelineAsCsvArgs( client_id=client_id, file_path=u"fs/os/foo", format=vfs_plugin.ApiGetVfsTimelineAsCsvArgs.Format.BODY) result = self.handler.Handle(args, context=self.context) content = b"".join(result.GenerateContent()) self.assertEqual(content, b"")
def testMigrateHashEntries(self): client_urn = self.SetupClient(0) with self._Aff4Open(client_urn.Add("fs/os").Add("foo")) as filedesc: filedesc.Set(filedesc.Schema.HASH, rdf_crypto.Hash(md5=b"quux")) result = self._RunFlow(client_urn) self.assertEqual(result, []) path_info = data_store.REL_DB.ReadPathInfo( client_id=client_urn.Basename(), path_type=rdf_objects.PathInfo.PathType.OS, components=("foo", )) self.assertEqual(path_info.hash_entry.md5, b"quux")
def testHashEntryFromSimpleFile(self): client_urn = self.SetupClient(0) with self._Aff4Open(client_urn.Add("fs/os").Add("foo")) as fd: hash_entry = rdf_crypto.Hash(md5=b"bar", sha256=b"baz") fd.Set(fd.Schema.HASH, hash_entry) data_migration.MigrateClientVfs(client_urn) path_info = data_store.REL_DB.ReadPathInfo( client_id=client_urn.Basename(), path_type=rdf_objects.PathInfo.PathType.OS, components=("foo", )) self.assertEqual(path_info.hash_entry.md5, b"bar") self.assertEqual(path_info.hash_entry.sha256, b"baz")
def _GenSampleResult(self): return rdf_file_finder.CollectSingleFileResult( stat=rdf_client_fs.StatEntry( pathspec=rdf_paths.PathSpec.OS(path="/etc/hosts"), st_mode=33184, st_size=4242, st_atime=1336469177, st_mtime=1336129892, st_ctime=1336129892, ), hash=rdf_crypto.Hash( sha256=binascii.unhexlify( "9e8dc93e150021bb4752029ebbff51394aa36f069cf19901578e4f06017acdb5" ), sha1=binascii.unhexlify("6dd6bee591dfcb6d75eb705405302c3eab65e21a"), md5=binascii.unhexlify("8b0a15eefe63fd41f8dc9dee01c5cf9a")))
def testMigrateAllClients(self): client_urns = list(map(self.SetupClient, range(25))) for client_urn in client_urns: with self._Aff4Open(client_urn.Add("registry").Add("quux")) as fd: fd.Set(fd.Schema.HASH, rdf_crypto.Hash(md5=b"norf")) migrator = data_migration.ClientVfsMigrator() migrator.MigrateAllClients() for client_urn in client_urns: path_info = data_store.REL_DB.ReadPathInfo( client_id=client_urn.Basename(), path_type=rdf_objects.PathInfo.PathType.REGISTRY, components=("quux", )) self.assertEqual(path_info.hash_entry.md5, b"norf")
def testMigrateAllClientsSharded(self): client_urns = list(map(self.SetupClient, range(31))) for client_urn in client_urns: with self._Aff4Open(client_urn.Add("fs/os").Add("bar")) as fd: fd.Set(fd.Schema.HASH, rdf_crypto.Hash(sha256=b"baz")) migrator = data_migration.ClientVfsMigrator() for i in range(3): migrator.MigrateAllClients(shard_number=(i + 1), shard_count=3) for client_urn in client_urns: path_info = data_store.REL_DB.ReadPathInfo( client_id=client_urn.Basename(), path_type=rdf_objects.PathInfo.PathType.OS, components=("bar", )) self.assertEqual(path_info.hash_entry.sha256, b"baz")
def testStatAndHashEntryFromSimpleFile(self): client_urn = self.SetupClient(0) with self._Aff4Open(client_urn.Add("fs/os").Add("foo")) as fd: stat_entry = rdf_client.StatEntry(st_mode=108) fd.Set(fd.Schema.STAT, stat_entry) hash_entry = rdf_crypto.Hash(sha256=b"quux") fd.Set(fd.Schema.HASH, hash_entry) data_migration.MigrateClientVfs(client_urn) path_info = data_store.REL_DB.ReadPathInfo( client_id=client_urn.Basename(), path_type=rdf_objects.PathInfo.PathType.OS, components=("foo", )) self.assertEqual(path_info.stat_entry.st_mode, 108) self.assertEqual(path_info.hash_entry.sha256, b"quux")
def _CreateFile(self, path, content, hashing=False): with aff4.FACTORY.Create( path, aff4.AFF4MemoryStream, token=self.token) as fd: fd.Write(content) if hashing: digest = hashlib.sha256(content).digest() fd.Set(fd.Schema.HASH, rdf_crypto.Hash(sha256=digest)) if data_store.RelationalDBWriteEnabled(): client_id, vfs_path = path.Split(2) path_type, components = rdf_objects.ParseCategorizedPath(vfs_path) path_info = rdf_objects.PathInfo() path_info.path_type = path_type path_info.components = components path_info.hash_entry.sha256 = digest data_store.REL_DB.WritePathInfos(client_id, [path_info])
def testMigrateClientsSmallThreadCount(self): client_urns = list(map(self.SetupClient, range(25))) for i, client_urn in enumerate(client_urns): with self._Aff4Open(client_urn.Add("fs/os").Add("foo").Add(str(i))) as fd: fd.Set(fd.Schema.STAT, rdf_client.StatEntry(st_size=i + 42)) fd.Set(fd.Schema.HASH, rdf_crypto.Hash(md5=b"bar")) migrator = data_migration.ClientVfsMigrator() migrator.thread_count = 3 migrator.MigrateClients(client_urns) for i, client_urn in enumerate(client_urns): path_info = data_store.REL_DB.ReadPathInfo( client_id=client_urn.Basename(), path_type=rdf_objects.PathInfo.PathType.OS, components=("foo", unicode(i))) self.assertEqual(path_info.hash_entry.md5, b"bar") self.assertEqual(path_info.stat_entry.st_size, i + 42)
def testWritePathInfosHashAndStatEntry(self): client_id = self.InitializeClient() stat_entry = rdf_client.StatEntry(st_mode=1337) hash_entry = rdf_crypto.Hash(md5=hashlib.md5("foo").digest()) path_info = rdf_objects.PathInfo.OS(components=["foo", "bar", "baz"], stat_entry=stat_entry, hash_entry=hash_entry) self.db.WritePathInfos(client_id, [path_info]) result = self.db.ReadPathInfo(client_id, rdf_objects.PathInfo.PathType.OS, components=("foo", "bar", "baz")) self.assertEqual(result.components, ["foo", "bar", "baz"]) self.assertTrue(result.HasField("stat_entry")) self.assertTrue(result.HasField("hash_entry")) self.assertEqual(result.stat_entry, stat_entry) self.assertEqual(result.hash_entry, hash_entry)
def testTimelineInBodyFormatWithHashCorrectlyReturned(self): client_id = self.SetupClient(1) stat_entry = rdf_client_fs.StatEntry(st_size=1337) stat_entry.pathspec.path = u"foo/bar" stat_entry.pathspec.pathtype = rdf_paths.PathSpec.PathType.OS hash_entry = rdf_crypto.Hash(md5=b"quux", sha256=b"norf") self.SetupFileMetadata(client_id, u"fs/os/foo/bar", stat_entry=stat_entry, hash_entry=hash_entry) args = vfs_plugin.ApiGetVfsTimelineAsCsvArgs( client_id=client_id, file_path=u"fs/os/foo", format=vfs_plugin.ApiGetVfsTimelineAsCsvArgs.Format.BODY) result = self.handler.Handle(args, context=self.context) content = b"".join(result.GenerateContent()) expected_csv = u"71757578|fs/os/foo/bar|0|----------|0|0|1337|0|0|0|0\n" self.assertEqual(content, expected_csv.encode("utf-8"))