def CreateFileVersion(client_id, path, content=b"", timestamp=None, token=None): """Add a new version for a file.""" if timestamp is None: timestamp = rdfvalue.RDFDatetime.Now() with test_lib.FakeTime(timestamp): with aff4.FACTORY.Create(client_id.Add(path), aff4_type=aff4_grr.VFSFile, mode="w", token=token) as fd: fd.Write(content) fd.Set(fd.Schema.CONTENT_LAST, rdfvalue.RDFDatetime.Now()) if data_store.RelationalDBWriteEnabled(): path_type, components = rdf_objects.ParseCategorizedPath(path) path_info = rdf_objects.PathInfo() path_info.path_type = path_type path_info.components = components path_info.directory = False data_store.REL_DB.WritePathInfos(client_id.Basename(), [path_info])
def _GenerateContent(self, client_id, start_paths, timestamp, path_prefix): client_paths = [] for start_path in start_paths: path_type, components = rdf_objects.ParseCategorizedPath( start_path) for pi in data_store.REL_DB.ListDescendentPathInfos( client_id, path_type, components): if pi.directory: continue client_paths.append(db.ClientPath.FromPathInfo(client_id, pi)) archive_generator = utils.StreamingZipGenerator( compression=zipfile.ZIP_DEFLATED) for chunk in file_store.StreamFilesChunks(client_paths, max_timestamp=timestamp): if chunk.chunk_index == 0: content_path = os.path.join(path_prefix, chunk.client_path.vfs_path) # TODO(user): Export meaningful file metadata. st = os.stat_result( (0o644, 0, 0, 0, 0, 0, chunk.total_size, 0, 0, 0)) yield archive_generator.WriteFileHeader(content_path, st=st) yield archive_generator.WriteFileChunk(chunk.data) if chunk.chunk_index == chunk.total_chunks - 1: yield archive_generator.WriteFileFooter() yield archive_generator.Close()
def Handle(self, args, token=None): ValidateVfsPath(args.file_path) path_type, components = rdf_objects.ParseCategorizedPath( args.file_path) client_path = db.ClientPath(str(args.client_id), path_type, components) # TODO: Raise FileNotFoundError if the file does not exist in # VFS. try: file_obj = file_store.OpenFile(client_path, max_timestamp=args.timestamp) except file_store.FileHasNoContentError: raise FileContentNotFoundError(args.client_id, path_type, components, args.timestamp) size = max(0, file_obj.size - args.offset) if args.length and args.length < size: size = args.length generator = self._GenerateFile(file_obj, args.offset, size) return api_call_handler_base.ApiBinaryStream( filename=components[-1], content_generator=generator, content_length=size)
def Touch(self, vfs_path, content=b""): path_type, components = rdf_objects.ParseCategorizedPath(vfs_path) client_path = db.ClientPath( client_id=self.client_id.Basename(), path_type=path_type, components=components) vfs_test_lib.CreateFile(client_path, content=content)
def _WrapContentGenerator(self, generator, args, username): if args.file_path: path_type, components = rdf_objects.ParseCategorizedPath( args.file_path) vfs_file_ref = rdf_objects.VfsFileReference( client_id=args.client_id, path_type=path_type, path_components=components) else: vfs_file_ref = rdf_objects.VfsFileReference( client_id=args.client_id) object_reference = rdf_objects.ObjectReference( reference_type=rdf_objects.ObjectReference.Type.VFS_FILE, vfs_file=vfs_file_ref) try: for item in generator: yield item notification.Notify( username, rdf_objects.UserNotification.Type.TYPE_FILE_ARCHIVE_GENERATED, "Downloaded an archive of folder %s from client %s." % (args.file_path, args.client_id), object_reference) except Exception as e: notification.Notify( username, rdf_objects.UserNotification.Type. TYPE_FILE_ARCHIVE_GENERATION_FAILED, "Archive generation failed for folder %s on client %s: %s" % (args.file_path, args.client_id, e), object_reference) raise
def Handle(self, args, token=None): ValidateVfsPath(args.file_path) path_type, components = rdf_objects.ParseCategorizedPath( args.file_path) client_path = db.ClientPath(str(args.client_id), path_type, components) try: fd = file_store.OpenFile(client_path, max_timestamp=args.timestamp) except file_store.FileHasNoContentError: raise FileContentNotFoundError(args.client_id, path_type, components, args.timestamp) fd.seek(args.offset) # No need to protect against args.length == 0 case and large files: # file_store logic has all necessary checks in place. byte_content = fd.read(args.length or None) if args.encoding: encoding = args.encoding.name.lower() else: encoding = ApiGetFileTextArgs.Encoding.UTF_8.name.lower() text_content = self._Decode(encoding, byte_content) return ApiGetFileTextResult(total_size=fd.size, content=text_content)
def GetFileHashEntry(fd): """Returns an `rdf_crypto.Hash` instance for given AFF4 file descriptor.""" # Hash file store is not migrated to RELDB just yet, hence the first check. client_id, vfs_path = fd.urn.Split(2) path_type, components = rdf_objects.ParseCategorizedPath(vfs_path) path_info = data_store.REL_DB.ReadPathInfo(client_id, path_type, components) return path_info.hash_entry
def CreateFileVersion(client_id, path, content=b"", timestamp=None): """Add a new version for a file.""" if timestamp is None: timestamp = rdfvalue.RDFDatetime.Now() with test_lib.FakeTime(timestamp): path_type, components = rdf_objects.ParseCategorizedPath(path) client_path = db.ClientPath(client_id, path_type, components) vfs_test_lib.CreateFile(client_path, content=content)
def Handle(self, args, token=None): decoder = decoders.FACTORY.Create(args.decoder_name) path_type, components = rdf_objects.ParseCategorizedPath(args.file_path) client_path = db.ClientPath(str(args.client_id), path_type, components) fd = file_store.OpenFile(client_path) return api_call_handler_base.ApiBinaryStream( filename=client_path.components[-1], content_generator=decoder.Decode(fd))
def CreateFileVersions(self, client_id, file_path): """Add a new version for a file.""" path_type, components = rdf_objects.ParseCategorizedPath(file_path) client_path = db.ClientPath(client_id, path_type, components) with test_lib.FakeTime(self.time_1): vfs_test_lib.CreateFile(client_path, "Hello World".encode("utf-8")) with test_lib.FakeTime(self.time_2): vfs_test_lib.CreateFile(client_path, "Goodbye World".encode("utf-8"))
def Handle(self, args, token=None): ValidateVfsPath(args.file_path) if args.timestamp: age = args.timestamp else: age = aff4.ALL_TIMES file_obj = aff4.FACTORY.Open(args.client_id.ToClientURN().Add( args.file_path), mode="r", age=age, token=token) if data_store.RelationalDBReadEnabled(category="vfs"): # These are not really "files" so they cannot be stored in the database # but they still can be queried so we need to return something. Sometimes # they contain a trailing slash so we need to take care of that. # # TODO(hanuszczak): Require VFS paths to be normalized so that trailing # slash is either forbidden or mandatory. if args.file_path.endswith("/"): args.file_path = args.file_path[:-1] if args.file_path in ["fs", "registry", "temp", "fs/os", "fs/tsk"]: api_file = ApiFile() api_file.name = api_file.path = args.file_path api_file.is_directory = True return ApiGetFileDetailsResult(file=api_file) path_type, components = rdf_objects.ParseCategorizedPath( args.file_path) # TODO(hanuszczak): The tests passed even without support for timestamp # filtering. The test suite should be probably improved in that regard. path_info = data_store.REL_DB.ReadPathInfo( str(args.client_id), path_type, components, timestamp=args.timestamp) if path_info: stat_entry = path_info.stat_entry hash_entry = path_info.hash_entry else: stat_entry = rdf_client.StatEntry() hash_entry = rdf_crypto.Hash() else: stat_entry = None hash_entry = None return ApiGetFileDetailsResult( file=ApiFile().InitFromAff4Object(file_obj, stat_entry=stat_entry, hash_entry=hash_entry, with_details=True))
def GetUrnHashEntry(urn, token=None): """Returns an `rdf_crypto.Hash` instance for given URN of an AFF4 file.""" if data_store.RelationalDBReadEnabled(category="vfs"): client_id, vfs_path = urn.Split(2) path_type, components = rdf_objects.ParseCategorizedPath(vfs_path) path_info = data_store.REL_DB.ReadPathInfo(client_id, path_type, components) return path_info.hash_entry else: with aff4.FACTORY.Open(urn, token=token) as fd: return GetFileHashEntry(fd)
def CreateFolder(client_id, path, timestamp): """Creates a VFS folder.""" with test_lib.FakeTime(timestamp): path_type, components = rdf_objects.ParseCategorizedPath(path) path_info = rdf_objects.PathInfo() path_info.path_type = path_type path_info.components = components path_info.directory = True data_store.REL_DB.WritePathInfos(client_id, [path_info])
def Handle(self, args, context=None): if not args.hunt_id: raise ValueError("ApiGetHuntFileArgs.hunt_id can't be unset") if not args.client_id: raise ValueError("ApiGetHuntFileArgs.client_id can't be unset") if not args.vfs_path: raise ValueError("ApiGetHuntFileArgs.vfs_path can't be unset") if not args.timestamp: raise ValueError("ApiGetHuntFileArgs.timestamp can't be unset") api_vfs.ValidateVfsPath(args.vfs_path) path_type, components = rdf_objects.ParseCategorizedPath(args.vfs_path) expected_client_path = db.ClientPath( str(args.client_id), path_type, components) results = data_store.REL_DB.ReadHuntResults( str(args.hunt_id), offset=0, count=self.MAX_RECORDS_TO_CHECK, with_timestamp=args.timestamp) for item in results: try: # Do not pass the client id we got from the caller. This will # get filled automatically from the hunt results and we check # later that the aff4_path we get is the same as the one that # was requested. client_path = export.CollectionItemToClientPath(item, client_id=None) except export.ItemNotExportableError: continue if client_path != expected_client_path: continue try: # TODO(user): this effectively downloads the latest version of # the file and always disregards the timestamp. Reconsider this logic # after AFF4 implementation is gone. We also most likely don't need # the MAX_RECORDS_TO_CHECK logic in the new implementation. file_obj = file_store.OpenFile(client_path) return api_call_handler_base.ApiBinaryStream( "%s_%s" % (args.client_id, os.path.basename(file_obj.Path())), content_generator=self._GenerateFile(file_obj), content_length=file_obj.size) except (file_store.FileHasNoContentError, file_store.FileNotFoundError): break raise HuntFileNotFoundError( "File %s with timestamp %s and client %s " "wasn't found among the results of hunt %s" % (args.vfs_path, args.timestamp, args.client_id, args.hunt_id))
def _InitVfsUrnGroup(self, vfs_urns): """Writes initial path information for a group of VFS URNs.""" path_infos = dict() for vfs_urn in vfs_urns: client_id, vfs_path = vfs_urn.Split(2) path_type, components = rdf_objects.ParseCategorizedPath(vfs_path) path_info = rdf_objects.PathInfo(path_type=path_type, components=components) path_infos.setdefault(client_id, []).append(path_info) data_store.REL_DB.MultiInitPathInfos(path_infos)
def CreateFileVersions(self, client_id, file_path): """Add a new version for a file.""" path_type, components = rdf_objects.ParseCategorizedPath(file_path) client_path = db.ClientPath(client_id.Basename(), path_type, components) token = access_control.ACLToken(username="******") with test_lib.FakeTime(self.time_1): vfs_test_lib.CreateFile( client_path, "Hello World".encode("utf-8"), token=token) with test_lib.FakeTime(self.time_2): vfs_test_lib.CreateFile( client_path, "Goodbye World".encode("utf-8"), token=token)
def MigrateClient(self, client_urn): """Migrates entire VFS of given client to the relational data store.""" vfs = ListVfs(client_urn) path_infos = [] for vfs_urn in vfs: _, vfs_path = vfs_urn.Split(2) path_type, components = rdf_objects.ParseCategorizedPath(vfs_path) path_info = rdf_objects.PathInfo(path_type=path_type, components=components) path_infos.append(path_info) data_store.REL_DB.InitPathInfos(client_urn.Basename(), path_infos) for vfs_group in utils.Grouper(vfs, self.vfs_group_size): stat_entries = dict() hash_entries = dict() for fd in aff4.FACTORY.MultiOpen(vfs_group, age=aff4.ALL_TIMES): _, vfs_path = fd.urn.Split(2) path_type, components = rdf_objects.ParseCategorizedPath( vfs_path) path_info = rdf_objects.PathInfo(path_type=path_type, components=components) for stat_entry in fd.GetValuesForAttribute(fd.Schema.STAT): stat_path_info = path_info.Copy() stat_path_info.timestamp = stat_entry.age stat_entries[stat_path_info] = stat_entry for hash_entry in fd.GetValuesForAttribute(fd.Schema.HASH): hash_path_info = path_info.Copy() hash_path_info.timestamp = hash_entry.age hash_entries[hash_path_info] = hash_entry data_store.REL_DB.MultiWritePathHistory(client_urn.Basename(), stat_entries, hash_entries)
def testIsDirectoryFlag(self): # Set up a directory. dir_path = "fs/os/Random/Directory" path_type, components = rdf_objects.ParseCategorizedPath(dir_path) client_path = db.ClientPath(self.client_id, path_type, components) vfs_test_lib.CreateDirectory(client_path) args = vfs_plugin.ApiGetFileDetailsArgs( client_id=self.client_id, file_path=self.file_path) result = self.handler.Handle(args, context=self.context) self.assertFalse(result.file.is_directory) args = vfs_plugin.ApiGetFileDetailsArgs( client_id=self.client_id, file_path=dir_path) result = self.handler.Handle(args, context=self.context) self.assertTrue(result.file.is_directory)
def Handle(self, args, token=None): ValidateVfsPath(args.file_path) try: path_type, components = rdf_objects.ParseCategorizedPath( args.file_path.rstrip("/")) except ValueError: # If the path does not point to a file (i.e. "fs"), just return an # empty response. return ApiGetFileVersionTimesResult(times=[]) history = data_store.REL_DB.ReadPathInfoHistory( str(args.client_id), path_type, components) times = reversed([pi.timestamp for pi in history]) return ApiGetFileVersionTimesResult(times=times)
def Handle(self, args, token=None): ValidateVfsPath(args.file_path) path_type, components = rdf_objects.ParseCategorizedPath(args.file_path) client_path = db.ClientPath(str(args.client_id), path_type, components) file_obj = file_store.OpenFile(client_path, max_timestamp=args.timestamp) size = max(0, file_obj.size - args.offset) if args.length and args.length < size: size = args.length generator = self._GenerateFile(file_obj, args.offset, size) return api_call_handler_base.ApiBinaryStream( filename=components[-1], content_generator=generator, content_length=size)
def _CreateFile(self, client_id, vfs_path, content): digest = hashlib.sha256(content).digest() path_type, components = rdf_objects.ParseCategorizedPath(vfs_path) path_info = rdf_objects.PathInfo() path_info.path_type = path_type path_info.components = components blob_id = rdf_objects.BlobID.FromSerializedBytes(digest) data_store.BLOBS.WriteBlobs({blob_id: content}) blob_ref = rdf_objects.BlobReference( offset=0, size=len(content), blob_id=blob_id) hash_id = file_store.AddFileWithUnknownHash( db.ClientPath.FromPathInfo(client_id, path_info), [blob_ref]) path_info.hash_entry.sha256 = hash_id.AsBytes() data_store.REL_DB.WritePathInfos(client_id, [path_info])
def _FindPathspec(self, args): path_type, components = rdf_objects.ParseCategorizedPath( args.file_path.rstrip("/")) components_copy = components[:] all_components = [] while components_copy: all_components.append(components_copy) components_copy = components_copy[:-1] res = data_store.REL_DB.ReadPathInfos(str(args.client_id), path_type, all_components) for k in sorted(res, key=len, reverse=True): path_info = res[k] if path_info is None: raise FileNotFoundError(args.client_id, path_type, components) if path_info.stat_entry and path_info.stat_entry.pathspec: ps = path_info.stat_entry.pathspec if len(k) < len(components): new_path = utils.JoinPath(*components[len(k):]) ps.Append( rdf_paths.PathSpec(path=new_path, pathtype=ps.last.pathtype)) return ps # We don't have any pathspec in the database so we just send the path we # have with the correct path type and hope for the best. pathspec = rdf_paths.PathSpec(path="/" + "/".join(components)) if path_type == rdf_objects.PathInfo.PathType.TSK: pathspec.pathtype = pathspec.PathType.TSK elif path_type == rdf_objects.PathInfo.PathType.NTFS: pathspec.pathtype = pathspec.PathType.NTFS elif path_type == rdf_objects.PathInfo.PathType.OS: pathspec.pathtype = pathspec.PathType.OS elif path_type == rdf_objects.PathInfo.PathType.REGISTRY: pathspec.pathtype = pathspec.PathType.REGISTRY elif path_type == rdf_objects.PathInfo.PathType.TEMP: pathspec.pathtype = pathspec.PathType.TMPFILE else: raise ValueError("Invalid path_type: %r" % path_type) return pathspec
def _CreateFile(self, path, content, hashing=False): with aff4.FACTORY.Create( path, aff4.AFF4MemoryStream, token=self.token) as fd: fd.Write(content) if hashing: digest = hashlib.sha256(content).digest() fd.Set(fd.Schema.HASH, rdf_crypto.Hash(sha256=digest)) if data_store.RelationalDBWriteEnabled(): client_id, vfs_path = path.Split(2) path_type, components = rdf_objects.ParseCategorizedPath(vfs_path) path_info = rdf_objects.PathInfo() path_info.path_type = path_type path_info.components = components path_info.hash_entry.sha256 = digest data_store.REL_DB.WritePathInfos(client_id, [path_info])
def testIsDirectoryFlag(self): # Set up a directory. dir_path = "fs/os/Random/Directory" path_type, components = rdf_objects.ParseCategorizedPath(dir_path) client_path = db.ClientPath(self.client_id.Basename(), path_type, components) token = access_control.ACLToken(username="******") vfs_test_lib.CreateDirectory(client_path, token=token) args = vfs_plugin.ApiGetFileDetailsArgs( client_id=self.client_id, file_path=self.file_path) result = self.handler.Handle(args, token=self.token) self.assertFalse(result.file.is_directory) args = vfs_plugin.ApiGetFileDetailsArgs( client_id=self.client_id, file_path=dir_path) result = self.handler.Handle(args, token=self.token) self.assertTrue(result.file.is_directory)
def CreateFolder(client_id, path, timestamp, token=None): """Creates a VFS folder.""" with test_lib.FakeTime(timestamp): with aff4.FACTORY.Create(client_id.Add(path), aff4_type=aff4_standard.VFSDirectory, mode="w", token=token) as _: pass if data_store.RelationalDBWriteEnabled(): path_type, components = rdf_objects.ParseCategorizedPath(path) path_info = rdf_objects.PathInfo() path_info.path_type = path_type path_info.components = components path_info.directory = True data_store.REL_DB.WritePathInfos(client_id.Basename(), [path_info])
def Handle(self, args, token=None): path_type, components = rdf_objects.ParseCategorizedPath(args.file_path) path_info = data_store.REL_DB.ReadPathInfo( str(args.client_id), path_type, components) if (not path_info or not path_info.stat_entry or not path_info.stat_entry.pathspec): raise FileNotFoundError("Unable to download file %s." % args.file_path) flow_args = transfer.MultiGetFileArgs( pathspecs=[path_info.stat_entry.pathspec]) flow_id = flow.StartFlow( client_id=str(args.client_id), flow_cls=transfer.MultiGetFile, flow_args=flow_args, creator=token.username) return ApiUpdateVfsFileContentResult(operation_id=flow_id)
def _MigrateVfsUrnGroup(self, vfs_urns): """Migrates history of given group of VFS URNs.""" client_path_histories = dict() for fd in aff4.FACTORY.MultiOpen(vfs_urns, age=aff4.ALL_TIMES): client_id, vfs_path = fd.urn.Split(2) path_type, components = rdf_objects.ParseCategorizedPath(vfs_path) client_path = db.ClientPath(client_id, path_type, components) client_path_history = db.ClientPathHistory() for stat_entry in fd.GetValuesForAttribute(fd.Schema.STAT): client_path_history.AddStatEntry(stat_entry.age, stat_entry) for hash_entry in fd.GetValuesForAttribute(fd.Schema.HASH): client_path_history.AddHashEntry(hash_entry.age, hash_entry) client_path_histories[client_path] = client_path_history data_store.REL_DB.MultiWritePathHistory(client_path_histories)
def Handle(self, args, token=None): result = ApiGetFileDecodersResult() path_type, components = rdf_objects.ParseCategorizedPath( args.file_path) client_path = db.ClientPath(client_id=str(args.client_id), path_type=path_type, components=components) for decoder_name in decoders.FACTORY.Names(): decoder = decoders.FACTORY.Create(decoder_name) filedesc = file_store.OpenFile(client_path) filectx = context.NullContext(filedesc) with filectx as filedesc: if decoder.Check(filedesc): result.decoder_names.append(decoder_name) return result
def _WrapContentGenerator(self, generator, args, username): try: for item in generator: yield item except Exception as e: path_type, components = rdf_objects.ParseCategorizedPath( args.file_path) vfs_file_ref = rdf_objects.VfsFileReference( client_id=args.client_id, path_type=path_type, path_components=components) object_reference = rdf_objects.ObjectReference( reference_type=rdf_objects.ObjectReference.Type.VFS_FILE, vfs_file=vfs_file_ref) notification.Notify( username, rdf_objects.UserNotification.Type.TYPE_FILE_BLOB_FETCH_FAILED, "File blob fetch failed for path %s on client %s: %s" % (args.client_id, args.file_path, e), object_reference) raise
def _CreateFile(self, path, content, hashing=False, aff4_type=aff4.AFF4MemoryStream): if hashing: digest = hashlib.sha256(content).digest() else: digest = None if data_store.RelationalDBReadEnabled("filestore"): self.assertTrue(data_store.RelationalDBWriteEnabled()) self.assertTrue(hashing) else: with aff4.FACTORY.Create(path, aff4_type, token=self.token) as fd: fd.Write(content) if digest: fd.Set(fd.Schema.HASH, rdf_crypto.Hash(sha256=digest)) if data_store.RelationalDBWriteEnabled() and hashing: client_id, vfs_path = path.Split(2) path_type, components = rdf_objects.ParseCategorizedPath(vfs_path) path_info = rdf_objects.PathInfo() path_info.path_type = path_type path_info.components = components blob_id = rdf_objects.BlobID.FromBytes(digest) data_store.BLOBS.WriteBlobs({blob_id: content}) blob_ref = rdf_objects.BlobReference(offset=0, size=len(content), blob_id=blob_id) hash_id = file_store.AddFileWithUnknownHash( db.ClientPath.FromPathInfo(client_id, path_info), [blob_ref]) path_info.hash_entry.sha256 = hash_id.AsBytes() data_store.REL_DB.WritePathInfos(client_id, [path_info])