def Handle(self, args, token=None): ValidateVfsPath(args.file_path) if args.timestamp: age = args.timestamp else: age = aff4.ALL_TIMES file_obj = aff4.FACTORY.Open(args.client_id.ToClientURN().Add( args.file_path), mode="r", age=age, token=token) if data_store.RelationalDBReadEnabled(category="vfs"): # These are not really "files" so they cannot be stored in the database # but they still can be queried so we need to return something. Sometimes # they contain a trailing slash so we need to take care of that. # # TODO(hanuszczak): Require VFS paths to be normalized so that trailing # slash is either forbidden or mandatory. if args.file_path.endswith("/"): args.file_path = args.file_path[:-1] if args.file_path in ["fs", "registry", "temp", "fs/os", "fs/tsk"]: api_file = ApiFile() api_file.name = api_file.path = args.file_path api_file.is_directory = True return ApiGetFileDetailsResult(file=api_file) path_type, components = rdf_objects.ParseCategorizedPath( args.file_path) # TODO(hanuszczak): The tests passed even without support for timestamp # filtering. The test suite should be probably improved in that regard. path_id = rdf_objects.PathID(components) path_info = data_store.REL_DB.FindPathInfoByPathID( str(args.client_id), path_type, path_id, timestamp=args.timestamp) if path_info: stat_entry = path_info.stat_entry hash_entry = path_info.hash_entry else: stat_entry = rdf_client.StatEntry() hash_entry = rdf_crypto.Hash() else: stat_entry = None hash_entry = None return ApiGetFileDetailsResult( file=ApiFile().InitFromAff4Object(file_obj, stat_entry=stat_entry, hash_entry=hash_entry, with_details=True))
def GetUrnHashEntry(urn, token=None): """Returns an `rdf_crypto.Hash` instance for given URN of an AFF4 file.""" if data_store.RelationalDBReadEnabled(category="vfs"): client_id, vfs_path = urn.Split(2) path_type, components = rdf_objects.ParseCategorizedPath(vfs_path) path_id = rdf_objects.PathID(components) path_info = data_store.REL_DB.FindPathInfoByPathID( client_id, path_type, path_id) return path_info.hash_entry else: with aff4.FACTORY.Open(urn, token=token) as fd: return GetFileHashEntry(fd)
def MigrateClientVfs(client_urn): """Migrates entire VFS of given client to the relational data store.""" vfs = ListVfs(client_urn) path_infos = [] for vfs_urn in vfs: _, vfs_path = vfs_urn.Split(2) path_type, components = rdf_objects.ParseCategorizedPath(vfs_path) path_info = rdf_objects.PathInfo(path_type=path_type, components=components) path_infos.append(path_info) data_store.REL_DB.WritePathInfos(client_urn.Basename(), path_infos) for vfs_group in utils.Grouper(vfs, _VFS_GROUP_SIZE): stat_entries = dict() hash_entries = dict() for fd in aff4.FACTORY.MultiOpen(vfs_group, age=aff4.ALL_TIMES): _, vfs_path = fd.urn.Split(2) path_type, components = rdf_objects.ParseCategorizedPath(vfs_path) path_info = rdf_objects.PathInfo(path_type=path_type, components=components) for stat_entry in fd.GetValuesForAttribute(fd.Schema.STAT): stat_path_info = path_info.Copy() stat_path_info.timestamp = stat_entry.age stat_entries[stat_path_info] = stat_entry for hash_entry in fd.GetValuesForAttribute(fd.Schema.HASH): hash_path_info = path_info.Copy() hash_path_info.timestamp = hash_entry.age hash_entries[hash_path_info] = hash_entry data_store.REL_DB.MultiWritePathHistory(client_urn.Basename(), stat_entries, hash_entries)
def CreateFolder(client_id, path, timestamp, token=None): """Creates a VFS folder.""" with test_lib.FakeTime(timestamp): with aff4.FACTORY.Create(client_id.Add(path), aff4_type=aff4_standard.VFSDirectory, mode="w", token=token) as _: pass if data_store.RelationalDBWriteEnabled(): path_type, components = rdf_objects.ParseCategorizedPath(path) path_info = rdf_objects.PathInfo() path_info.path_type = path_type path_info.components = components path_info.directory = True data_store.REL_DB.WritePathInfos(client_id.Basename(), [path_info])
def _CreateFile(self, path, content, hashing=False): with aff4.FACTORY.Create(path, aff4.AFF4MemoryStream, token=self.token) as fd: fd.Write(content) if hashing: digest = hashlib.sha256(content).digest() fd.Set(fd.Schema.HASH, rdf_crypto.Hash(sha256=digest)) if data_store.RelationalDBWriteEnabled(): client_id, vfs_path = path.Split(2) path_type, components = rdf_objects.ParseCategorizedPath( vfs_path) path_info = rdf_objects.PathInfo() path_info.path_type = path_type path_info.components = components path_info.hash_entry.sha256 = digest data_store.REL_DB.WritePathInfos(client_id, [path_info])
def CreateFileVersion(client_id, path, content="", timestamp=None, token=None): """Add a new version for a file.""" if timestamp is None: timestamp = rdfvalue.RDFDatetime.Now() with test_lib.FakeTime(timestamp): with aff4.FACTORY.Create(client_id.Add(path), aff4_type=aff4_grr.VFSFile, mode="w", token=token) as fd: fd.Write(content) fd.Set(fd.Schema.CONTENT_LAST, rdfvalue.RDFDatetime.Now()) if data_store.RelationalDBWriteEnabled(): path_type, components = rdf_objects.ParseCategorizedPath(path) path_info = rdf_objects.PathInfo() path_info.path_type = path_type path_info.components = components path_info.directory = False data_store.REL_DB.WritePathInfos(client_id.Basename(), [path_info])
def CreateFileVersions(self, client_id, file_path): """Add a new version for a file.""" path_type, components = rdf_objects.ParseCategorizedPath(file_path) with test_lib.FakeTime(self.time_1): token = access_control.ACLToken(username="******") fd = aff4.FACTORY.Create(client_id.Add(file_path), aff4.AFF4MemoryStream, mode="w", token=token) fd.Write("Hello World") fd.Close() if data_store.RelationalDBWriteEnabled(): path_info = rdf_objects.PathInfo() path_info.path_type = path_type path_info.components = components path_info.directory = False data_store.REL_DB.WritePathInfos(client_id.Basename(), [path_info]) with test_lib.FakeTime(self.time_2): fd = aff4.FACTORY.Create(client_id.Add(file_path), aff4.AFF4MemoryStream, mode="w", token=token) fd.Write("Goodbye World") fd.Close() if data_store.RelationalDBWriteEnabled(): path_info = rdf_objects.PathInfo() path_info.path_type = path_type path_info.components = components path_info.directory = False data_store.REL_DB.WritePathInfos(client_id.Basename(), [path_info])
def AddFile(self, fd): """Adds a file to the hash file store. We take a file in the client space: aff4:/C.123123123/fs/os/usr/local/blah Hash it, update the hash in the original file if its different to the one calculated on the client, and copy the original AFF4 object to aff4:/files/hash/generic/sha256/123123123 (canonical reference) We then create symlinks for all other hash types: aff4:/files/hash/generic/sha1/345345345 aff4:/files/hash/generic/md5/456456456 aff4:/files/hash/pecoff/md5/aaaaaaaa (only for PEs) aff4:/files/hash/pecoff/sha1/bbbbbbbb (only for PEs) When present in PE files, the signing data (revision, cert_type, certificate) is added to the original object. This can't be done simply in the FileStore.Write() method with fixed hash buffer sizes because the authenticode hashes need to track hashing of different-sized regions based on the signature information. Args: fd: File open for reading. Raises: IOError: If there was an error writing the file. """ hashes = self._HashFile(fd) # The empty file is very common, we don't keep the back references for it # in the DB since it just takes up too much space. empty_hash = ("e3b0c44298fc1c149afbf4c8996fb924" "27ae41e4649b934ca495991b7852b855") if hashes.sha256 == empty_hash: return # Update the hashes field now that we have calculated them all. fd.Set(fd.Schema.HASH, hashes) fd.Flush() if data_store.RelationalDBWriteEnabled(): client_id, vfs_path = fd.urn.Split(2) path_type, components = rdf_objects.ParseCategorizedPath(vfs_path) path_info = rdf_objects.PathInfo( path_type=path_type, components=components, hash_entry=hashes) data_store.REL_DB.WritePathInfos(client_id, [path_info]) # sha256 is the canonical location. canonical_urn = self.PATH.Add("generic/sha256").Add(str(hashes.sha256)) if not list(aff4.FACTORY.Stat(canonical_urn)): aff4.FACTORY.Copy(fd.urn, canonical_urn) # Remove the STAT entry, it makes no sense to copy it between clients. with aff4.FACTORY.Open( canonical_urn, mode="rw", token=self.token) as new_fd: new_fd.Set(new_fd.Schema.STAT(None)) self._AddToIndex(canonical_urn, fd.urn) for hash_type, hash_digest in hashes.ListSetFields(): # Determine fingerprint type. hash_type = hash_type.name # No need to create a symlink for sha256, it's the canonical location. if hash_type == "sha256": continue hash_digest = str(hash_digest) fingerprint_type = "generic" if hash_type.startswith("pecoff_"): fingerprint_type = "pecoff" hash_type = hash_type[len("pecoff_"):] if hash_type not in self.HASH_TYPES[fingerprint_type]: continue file_store_urn = self.PATH.Add(fingerprint_type).Add(hash_type).Add( hash_digest) with aff4.FACTORY.Create( file_store_urn, aff4.AFF4Symlink, token=self.token) as symlink: symlink.Set(symlink.Schema.SYMLINK_TARGET, canonical_urn) # We do not want to be externally written here. return None
def _HandleRelational(self, args, token=None): client_id = args.client_id.ToClientURN() if not args.file_path or args.file_path == "/": return self._GetRootChildren(args, token=token) if args.file_path == "fs": return self._GetFilesystemChildren(args) path_type, components = rdf_objects.ParseCategorizedPath(args.file_path) path_id = rdf_objects.PathID(components) child_path_ids = data_store.REL_DB.FindDescendentPathIDs( client_id=client_id.Basename(), path_type=path_type, path_id=path_id, max_depth=1) child_path_infos = data_store.REL_DB.FindPathInfosByPathIDs( client_id=client_id.Basename(), path_type=path_type, path_ids=child_path_ids).values() items = [] for child_path_info in child_path_infos: if args.directories_only and not child_path_info.directory: continue child_item = ApiFile() child_item.name = child_path_info.basename if path_type == rdf_objects.PathInfo.PathType.OS: prefix = "fs/os/" elif path_type == rdf_objects.PathInfo.PathType.TSK: prefix = "fs/tsk/" elif path_type == rdf_objects.PathInfo.PathType.REGISTRY: prefix = "registry/" elif path_type == rdf_objects.PathInfo.PathType.TEMP: prefix = "temp/" child_item.path = prefix + "/".join(child_path_info.components) # TODO(hanuszczak): `PathInfo#directory` tells us whether given path has # ever been observed as a directory. Is this what we want here or should # we use `st_mode` information instead? child_item.is_directory = child_path_info.directory child_item.stat = child_path_info.stat_entry # The `age` field collides with RDF `age` pseudo-property so `Set` lets us # set the right thing. child_item.Set("age", child_path_info.timestamp) items.append(child_item) # TODO(hanuszczak): Instead of getting the whole list from the database and # then filtering the results we should do the filtering directly in the # database query. if args.filter: pattern = re.compile(args.filter, re.IGNORECASE) is_matching = lambda item: pattern.search(item.name) items = filter(is_matching, items) items.sort(key=lambda item: item.path) if args.count: items = items[args.offset:args.offset + args.count] else: items = items[args.offset:] return ApiListFilesResult(items=items)
def testParseIncorrect(self): with self.assertRaisesRegexp(ValueError, "path"): rdf_objects.ParseCategorizedPath("foo/bar") with self.assertRaisesRegexp(ValueError, "path"): rdf_objects.ParseCategorizedPath("fs")
def testParseTskExtraSlashes(self): path_type, components = rdf_objects.ParseCategorizedPath( "/fs///tsk/foo///bar") self.assertEqual(path_type, rdf_objects.PathInfo.PathType.TSK) self.assertEqual(components, ["foo", "bar"])
def testParseOsRoot(self): path_type, components = rdf_objects.ParseCategorizedPath("fs/os") self.assertEqual(path_type, rdf_objects.PathInfo.PathType.OS) self.assertEqual(components, [])
def testParseTemp(self): path_type, components = rdf_objects.ParseCategorizedPath( "temp/os/registry") self.assertEqual(path_type, rdf_objects.PathInfo.PathType.TEMP) self.assertEqual(components, ["os", "registry"])
def testParseRegistry(self): path_type, components = rdf_objects.ParseCategorizedPath( "registry/thud/blargh") self.assertEqual(path_type, rdf_objects.PathInfo.PathType.REGISTRY) self.assertEqual(components, ["thud", "blargh"])
def testParseTsk(self): path_type, components = rdf_objects.ParseCategorizedPath( "fs/tsk/quux/norf") self.assertEqual(path_type, rdf_objects.PathInfo.PathType.TSK) self.assertEqual(components, ["quux", "norf"])
def testParseOs(self): path_type, components = rdf_objects.ParseCategorizedPath( "fs/os/foo/bar") self.assertEqual(path_type, rdf_objects.PathInfo.PathType.OS) self.assertEqual(components, ["foo", "bar"])