def testMergePathInfoLastUpdate(self): components = ["usr", "local", "bin"] dest = rdf_objects.PathInfo(components=components) self.assertIsNone(dest.last_stat_entry_timestamp) dest.UpdateFrom( rdf_objects.PathInfo(components=components, last_stat_entry_timestamp=rdfvalue. RDFDatetime.FromHumanReadable("2017-01-01"))) self.assertEqual(dest.last_stat_entry_timestamp, rdfvalue.RDFDatetime.FromHumanReadable("2017-01-01")) # Merging in a record without last_stat_entry_timestamp shouldn't change # it. dest.UpdateFrom(rdf_objects.PathInfo(components=components)) self.assertEqual(dest.last_stat_entry_timestamp, rdfvalue.RDFDatetime.FromHumanReadable("2017-01-01")) # Merging in a record with an earlier last_stat_entry_timestamp shouldn't # change it. dest.UpdateFrom( rdf_objects.PathInfo(components=components, last_stat_entry_timestamp=rdfvalue. RDFDatetime.FromHumanReadable("2016-01-01"))) self.assertEqual(dest.last_stat_entry_timestamp, rdfvalue.RDFDatetime.FromHumanReadable("2017-01-01")) # Merging in a record with a later last_stat_entry_timestamp should change # it. dest.UpdateFrom( rdf_objects.PathInfo(components=components, last_stat_entry_timestamp=rdfvalue. RDFDatetime.FromHumanReadable("2018-01-01"))) self.assertEqual(dest.last_stat_entry_timestamp, rdfvalue.RDFDatetime.FromHumanReadable("2018-01-01"))
def testUpdateFromDirectory(self): dest = rdf_objects.PathInfo(components=["usr", "local", "bin"]) self.assertFalse(dest.directory) dest.UpdateFrom( rdf_objects.PathInfo(components=["usr", "local", "bin"], directory=True)) self.assertTrue(dest.directory)
def testUpdateFromStatEntryRetain(self): stat_entry = rdf_client.StatEntry(st_mode=707) dst = objects.PathInfo(components=["foo", "bar"], stat_entry=stat_entry) src = objects.PathInfo(components=["foo", "bar"]) dst.UpdateFrom(src) self.assertEqual(dst.stat_entry.st_mode, 707)
def testUpdateFromStatEntryUpdate(self): dst = objects.PathInfo(components=["foo", "bar"]) stat_entry = rdf_client.StatEntry(st_mode=1337) src = objects.PathInfo(components=["foo", "bar"], stat_entry=stat_entry) dst.UpdateFrom(src) self.assertEqual(dst.stat_entry.st_mode, 1337)
def __init__(self, path_type, components): self._path_info = rdf_objects.PathInfo( path_type=path_type, components=components) self._stat_entries = {} self._hash_entries = {} self._children = set()
def testGetAncestorsOrder(self): path_info = objects.PathInfo(components=["foo", "bar", "baz", "quux"]) results = list(path_info.GetAncestors()) self.assertEqual(len(results), 4) self.assertEqual(results[0].components, ["foo", "bar", "baz"]) self.assertEqual(results[1].components, ["foo", "bar"]) self.assertEqual(results[2].components, ["foo"]) self.assertEqual(results[3].components, [])
def testSwitchingBetweenFilesRefreshesFileHashes(self): # Create 2 files and set their HASH attributes to different values. # Note that a string passed to fd.Schema.HASH constructor will be # printed as a hexademical bytestring. Thus "111" will become "313131" # and "222" will become "323232". urn_a = rdfvalue.RDFURN("%s/fs/os/c/Downloads/a.txt" % self.client_id) with aff4.FACTORY.Open(urn_a, mode="rw") as fd: fd.Set(fd.Schema.HASH(sha256="111")) urn_b = rdfvalue.RDFURN("%s/fs/os/c/Downloads/b.txt" % self.client_id) with aff4.FACTORY.Open(urn_b, mode="rw") as fd: fd.Set(fd.Schema.HASH(sha256="222")) if data_store.RelationalDBWriteEnabled(): path_info_a = rdf_objects.PathInfo() path_info_a.path_type = rdf_objects.PathInfo.PathType.OS path_info_a.components = ["c", "Downloads", "a.txt"] path_info_a.hash_entry.sha256 = b"111" path_info_b = rdf_objects.PathInfo() path_info_b.path_type = rdf_objects.PathInfo.PathType.OS path_info_b.components = ["c", "Downloads", "b.txt"] path_info_b.hash_entry.sha256 = b"222" data_store.REL_DB.WritePathInfos(self.client_id, [path_info_a, path_info_b]) # Open a URL pointing to file "a". self.Open("/#/clients/%s/vfs/fs/os/c/Downloads/a.txt?tab=download" % self.client_id) self.WaitUntil(self.IsElementPresent, "css=tr:contains('Sha256') td:contains('313131')") # Click on a file table row with file "b". Information in the download # tab should get rerendered and we should see Sha256 value corresponding # to file "b". self.Click("css=tr:contains(\"b.txt\")") self.WaitUntil(self.IsElementPresent, "css=tr:contains('Sha256') td:contains('323232')")
def MigrateClientVfs(client_urn): """Migrates entire VFS of given client to the relational data store.""" vfs = ListVfs(client_urn) path_infos = [] for vfs_urn in vfs: _, vfs_path = vfs_urn.Split(2) path_type, components = rdf_objects.ParseCategorizedPath(vfs_path) path_info = rdf_objects.PathInfo(path_type=path_type, components=components) path_infos.append(path_info) data_store.REL_DB.WritePathInfos(client_urn.Basename(), path_infos) for vfs_group in utils.Grouper(vfs, _VFS_GROUP_SIZE): stat_entries = dict() hash_entries = dict() for fd in aff4.FACTORY.MultiOpen(vfs_group, age=aff4.ALL_TIMES): _, vfs_path = fd.urn.Split(2) path_type, components = rdf_objects.ParseCategorizedPath(vfs_path) path_info = rdf_objects.PathInfo(path_type=path_type, components=components) for stat_entry in fd.GetValuesForAttribute(fd.Schema.STAT): stat_path_info = path_info.Copy() stat_path_info.timestamp = stat_entry.age stat_entries[stat_path_info] = stat_entry for hash_entry in fd.GetValuesForAttribute(fd.Schema.HASH): hash_path_info = path_info.Copy() hash_path_info.timestamp = hash_entry.age hash_entries[hash_path_info] = hash_entry data_store.REL_DB.MultiWritePathHistory(client_urn.Basename(), stat_entries, hash_entries)
def CreateFileVersions(self, client_id, file_path): """Add a new version for a file.""" path_type, components = rdf_objects.ParseCategorizedPath(file_path) with test_lib.FakeTime(self.time_1): token = access_control.ACLToken(username="******") fd = aff4.FACTORY.Create(client_id.Add(file_path), aff4.AFF4MemoryStream, mode="w", token=token) fd.Write("Hello World") fd.Close() if data_store.RelationalDBWriteEnabled(): path_info = rdf_objects.PathInfo() path_info.path_type = path_type path_info.components = components path_info.directory = False data_store.REL_DB.WritePathInfos(client_id.Basename(), [path_info]) with test_lib.FakeTime(self.time_2): fd = aff4.FACTORY.Create(client_id.Add(file_path), aff4.AFF4MemoryStream, mode="w", token=token) fd.Write("Goodbye World") fd.Close() if data_store.RelationalDBWriteEnabled(): path_info = rdf_objects.PathInfo() path_info.path_type = path_type path_info.components = components path_info.directory = False data_store.REL_DB.WritePathInfos(client_id.Basename(), [path_info])
def CreateFolder(client_id, path, timestamp, token=None): """Creates a VFS folder.""" with test_lib.FakeTime(timestamp): with aff4.FACTORY.Create(client_id.Add(path), aff4_type=aff4_standard.VFSDirectory, mode="w", token=token) as _: pass if data_store.RelationalDBWriteEnabled(): path_type, components = rdf_objects.ParseCategorizedPath(path) path_info = rdf_objects.PathInfo() path_info.path_type = path_type path_info.components = components path_info.directory = True data_store.REL_DB.WritePathInfos(client_id.Basename(), [path_info])
def _CreateFile(self, path, content, hashing=False): with aff4.FACTORY.Create(path, aff4.AFF4MemoryStream, token=self.token) as fd: fd.Write(content) if hashing: digest = hashlib.sha256(content).digest() fd.Set(fd.Schema.HASH, rdf_crypto.Hash(sha256=digest)) if data_store.RelationalDBWriteEnabled(): client_id, vfs_path = path.Split(2) path_type, components = rdf_objects.ParseCategorizedPath( vfs_path) path_info = rdf_objects.PathInfo() path_info.path_type = path_type path_info.components = components path_info.hash_entry.sha256 = digest data_store.REL_DB.WritePathInfos(client_id, [path_info])
def CreateFileVersion(client_id, path, content="", timestamp=None, token=None): """Add a new version for a file.""" if timestamp is None: timestamp = rdfvalue.RDFDatetime.Now() with test_lib.FakeTime(timestamp): with aff4.FACTORY.Create(client_id.Add(path), aff4_type=aff4_grr.VFSFile, mode="w", token=token) as fd: fd.Write(content) fd.Set(fd.Schema.CONTENT_LAST, rdfvalue.RDFDatetime.Now()) if data_store.RelationalDBWriteEnabled(): path_type, components = rdf_objects.ParseCategorizedPath(path) path_info = rdf_objects.PathInfo() path_info.path_type = path_type path_info.components = components path_info.directory = False data_store.REL_DB.WritePathInfos(client_id.Basename(), [path_info])
def testWritePathInfosValidatesPathType(self): path = ["usr", "local"] client_id = "C.bbbbbbbbbbbbbbbb" with self.assertRaises(ValueError): self.db.WritePathInfos(client_id, [objects.PathInfo(components=path)])
def CreateClientObject(self, vfs_fixture): """Make a new client object.""" # First remove the old fixture just in case its still there. aff4.FACTORY.Delete(self.client_id, token=self.token) # Create the fixture at a fixed time. with test_lib.FakeTime(self.age): for path, (aff4_type, attributes) in vfs_fixture: path %= self.args aff4_object = aff4.FACTORY.Create(self.client_id.Add(path), aff4_type, mode="rw", token=self.token) if data_store.RelationalDBWriteEnabled(): data_store.REL_DB.WriteClientMetadata( self.client_id.Basename(), fleetspeak_enabled=False) components = [ component for component in path.split("/") if component ] if components[0:2] == ["fs", "os"]: path_info = rdf_objects.PathInfo() path_info.path_type = rdf_objects.PathInfo.PathType.OS path_info.components = components[2:] if aff4_type in [ aff4_grr.VFSFile, aff4_grr.VFSMemoryFile ]: path_info.directory = False elif aff4_type == aff4_standard.VFSDirectory: path_info.directory = True else: raise ValueError("Incorrect AFF4 type: %s" % aff4_type) data_store.REL_DB.WritePathInfos( client_id=self.client_id.Basename(), path_infos=[path_info]) for attribute_name, value in attributes.items(): attribute = aff4.Attribute.PREDICATES[attribute_name] if isinstance(value, (str, unicode)): # Interpolate the value value %= self.args # Is this supposed to be an RDFValue array? if aff4.issubclass(attribute.attribute_type, rdf_protodict.RDFValueArray): rdfvalue_object = attribute() for item in value: new_object = rdfvalue_object.rdf_type.FromTextFormat( utils.SmartStr(item)) rdfvalue_object.Append(new_object) # It is a text serialized protobuf. elif aff4.issubclass(attribute.attribute_type, rdf_structs.RDFProtoStruct): # Use the alternate constructor - we always write protobufs in # textual form: rdfvalue_object = attribute.attribute_type.FromTextFormat( utils.SmartStr(value)) elif aff4.issubclass(attribute.attribute_type, rdfvalue.RDFInteger): rdfvalue_object = attribute(int(value)) else: rdfvalue_object = attribute(value) # If we don't already have a pathspec, try and get one from the stat. if aff4_object.Get(aff4_object.Schema.PATHSPEC) is None: # If the attribute was a stat, it has a pathspec nested in it. # We should add that pathspec as an attribute. if attribute.attribute_type == rdf_client.StatEntry: stat_object = attribute.attribute_type.FromTextFormat( utils.SmartStr(value)) if stat_object.pathspec: pathspec_attribute = aff4.Attribute( "aff4:pathspec", rdf_paths.PathSpec, "The pathspec used to retrieve " "this object from the client.", "pathspec") aff4_object.AddAttribute( pathspec_attribute, stat_object.pathspec) if attribute in ["aff4:content", "aff4:content"]: # For AFF4MemoryStreams we need to call Write() instead of # directly setting the contents.. aff4_object.Write(rdfvalue_object) else: aff4_object.AddAttribute(attribute, rdfvalue_object) if (isinstance(rdfvalue_object, rdf_client.StatEntry) and rdfvalue_object.pathspec.pathtype != "UNSET"): if data_store.RelationalDBWriteEnabled(): client_id = self.client_id.Basename() path_info = rdf_objects.PathInfo.FromStatEntry( rdfvalue_object) data_store.REL_DB.WritePathInfos( client_id, [path_info]) # Populate the KB from the client attributes. if aff4_type == aff4_grr.VFSGRRClient: kb = rdf_client.KnowledgeBase() artifact.SetCoreGRRKnowledgeBaseValues(kb, aff4_object) aff4_object.Set(aff4_object.Schema.KNOWLEDGE_BASE, kb) # Make sure we do not actually close the object here - we only want to # sync back its attributes, not run any finalization code. aff4_object.Flush() if aff4_type == aff4_grr.VFSGRRClient: index = client_index.CreateClientIndex(token=self.token) index.AddClient(aff4_object)
def AddFile(self, fd): """Adds a file to the hash file store. We take a file in the client space: aff4:/C.123123123/fs/os/usr/local/blah Hash it, update the hash in the original file if its different to the one calculated on the client, and copy the original AFF4 object to aff4:/files/hash/generic/sha256/123123123 (canonical reference) We then create symlinks for all other hash types: aff4:/files/hash/generic/sha1/345345345 aff4:/files/hash/generic/md5/456456456 aff4:/files/hash/pecoff/md5/aaaaaaaa (only for PEs) aff4:/files/hash/pecoff/sha1/bbbbbbbb (only for PEs) When present in PE files, the signing data (revision, cert_type, certificate) is added to the original object. This can't be done simply in the FileStore.Write() method with fixed hash buffer sizes because the authenticode hashes need to track hashing of different-sized regions based on the signature information. Args: fd: File open for reading. Raises: IOError: If there was an error writing the file. """ hashes = self._HashFile(fd) # The empty file is very common, we don't keep the back references for it # in the DB since it just takes up too much space. empty_hash = ("e3b0c44298fc1c149afbf4c8996fb924" "27ae41e4649b934ca495991b7852b855") if hashes.sha256 == empty_hash: return # Update the hashes field now that we have calculated them all. fd.Set(fd.Schema.HASH, hashes) fd.Flush() if data_store.RelationalDBWriteEnabled(): client_id, vfs_path = fd.urn.Split(2) path_type, components = rdf_objects.ParseCategorizedPath(vfs_path) path_info = rdf_objects.PathInfo( path_type=path_type, components=components, hash_entry=hashes) data_store.REL_DB.WritePathInfos(client_id, [path_info]) # sha256 is the canonical location. canonical_urn = self.PATH.Add("generic/sha256").Add(str(hashes.sha256)) if not list(aff4.FACTORY.Stat(canonical_urn)): aff4.FACTORY.Copy(fd.urn, canonical_urn) # Remove the STAT entry, it makes no sense to copy it between clients. with aff4.FACTORY.Open( canonical_urn, mode="rw", token=self.token) as new_fd: new_fd.Set(new_fd.Schema.STAT(None)) self._AddToIndex(canonical_urn, fd.urn) for hash_type, hash_digest in hashes.ListSetFields(): # Determine fingerprint type. hash_type = hash_type.name # No need to create a symlink for sha256, it's the canonical location. if hash_type == "sha256": continue hash_digest = str(hash_digest) fingerprint_type = "generic" if hash_type.startswith("pecoff_"): fingerprint_type = "pecoff" hash_type = hash_type[len("pecoff_"):] if hash_type not in self.HASH_TYPES[fingerprint_type]: continue file_store_urn = self.PATH.Add(fingerprint_type).Add(hash_type).Add( hash_digest) with aff4.FACTORY.Create( file_store_urn, aff4.AFF4Symlink, token=self.token) as symlink: symlink.Set(symlink.Schema.SYMLINK_TARGET, canonical_urn) # We do not want to be externally written here. return None
def testValidateEmptyComponent(self): with self.assertRaisesRegexp(ValueError, "Empty"): rdf_objects.PathInfo(components=["foo", "", "bar"])
def testValidateDoubleDotComponent(self): with self.assertRaisesRegexp(ValueError, "Incorrect"): rdf_objects.PathInfo(components=["..", "foo", "bar"])
def testUpdateFromValidatesComponents(self): with self.assertRaises(ValueError): rdf_objects.PathInfo( components=["usr", "local", "bin"]).UpdateFrom( rdf_objects.PathInfo( components=["usr", "local", "bin", "protoc"]))
def testUpdateFromValidatesType(self): with self.assertRaises(TypeError): rdf_objects.PathInfo(components=["usr", "local", "bin" ], ).UpdateFrom("/usr/local/bin")
def testGetAncestorsRoot(self): path_info = rdf_objects.PathInfo(components=["foo"]) results = list(path_info.GetAncestors()) self.assertEqual(len(results), 1) self.assertEqual(results[0].components, [])
def testGetAncestorsEmpty(self): path_info = rdf_objects.PathInfo(components=[], directory=True) self.assertEqual(list(path_info.GetAncestors()), [])