def CreateClientObject(self, vfs_fixture): """Make a new client object.""" # First remove the old fixture just in case its still there. aff4.FACTORY.Delete(self.client_id, token=self.token) # Create the fixture at a fixed time. with test_lib.FakeTime(self.age): if data_store.RelationalDBWriteEnabled(): # Constructing a client snapshot from the aff4 fixture is only possible # using aff4. Using a serialized string instead. data_store.REL_DB.WriteClientMetadata(self.client_id, fleetspeak_enabled=False) snapshot = rdf_objects.ClientSnapshot.FromSerializedString( SERIALIZED_CLIENT.decode("hex")) snapshot.client_id = self.client_id snapshot.knowledge_base.fqdn = "Host%s" % self.client_id data_store.REL_DB.WriteClientSnapshot(snapshot) client_index.ClientIndex().AddClient(snapshot) for path, (aff4_type, attributes) in vfs_fixture: path %= self.args if data_store.AFF4Enabled(): aff4_object = aff4.FACTORY.Create( self.client_urn.Add(path), aff4_type, mode="rw", token=self.token) path_info = None if data_store.RelationalDBWriteEnabled(): components = [ component for component in path.split("/") if component ] if (len(components) > 1 and components[0] == "fs" and components[1] in ["os", "tsk"]): path_info = rdf_objects.PathInfo() if components[1] == "os": path_info.path_type = rdf_objects.PathInfo.PathType.OS else: path_info.path_type = rdf_objects.PathInfo.PathType.TSK path_info.components = components[2:] if aff4_type in [ aff4_grr.VFSFile, aff4_grr.VFSMemoryFile ]: path_info.directory = False elif aff4_type == aff4_standard.VFSDirectory: path_info.directory = True else: raise ValueError("Incorrect AFF4 type: %s" % aff4_type) for attribute_name, value in iteritems(attributes): attribute = aff4.Attribute.PREDICATES[attribute_name] if isinstance(value, (bytes, Text)): # Interpolate the value value %= self.args # Is this supposed to be an RDFValue array? if issubclass(attribute.attribute_type, rdf_protodict.RDFValueArray): rdfvalue_object = attribute() for item in value: new_object = rdfvalue_object.rdf_type.FromTextFormat( utils.SmartStr(item)) rdfvalue_object.Append(new_object) # It is a text serialized protobuf. elif issubclass(attribute.attribute_type, rdf_structs.RDFProtoStruct): # Use the alternate constructor - we always write protobufs in # textual form: rdfvalue_object = attribute.attribute_type.FromTextFormat( utils.SmartStr(value)) elif issubclass(attribute.attribute_type, rdfvalue.RDFInteger): rdfvalue_object = attribute(int(value)) else: rdfvalue_object = attribute(value) if data_store.AFF4Enabled(): # If we don't already have a pathspec, try and get one from the # stat. if aff4_object.Get( aff4_object.Schema.PATHSPEC) is None: # If the attribute was a stat, it has a pathspec nested in it. # We should add that pathspec as an attribute. if attribute.attribute_type == rdf_client_fs.StatEntry: stat_object = attribute.attribute_type.FromTextFormat( utils.SmartStr(value)) if stat_object.pathspec: pathspec_attribute = aff4.Attribute( "aff4:pathspec", rdf_paths.PathSpec, "The pathspec used to retrieve " "this object from the client.", "pathspec") aff4_object.AddAttribute( pathspec_attribute, stat_object.pathspec) if attribute in ["aff4:content", "aff4:content"]: content = rdfvalue_object.AsBytes() if data_store.AFF4Enabled(): # For AFF4MemoryStreams we need to call Write() instead of # directly setting the contents.. aff4_object.Write(content) if path_info is not None: blob_id = rdf_objects.BlobID.FromBlobData(content) data_store.BLOBS.WriteBlobs({blob_id: content}) blob_ref = rdf_objects.BlobReference( offset=0, size=len(content), blob_id=blob_id) hash_id = file_store.AddFileWithUnknownHash( db.ClientPath.FromPathInfo( self.client_id, path_info), [blob_ref]) path_info.hash_entry.num_bytes = len(content) path_info.hash_entry.sha256 = hash_id.AsBytes() elif data_store.AFF4Enabled(): aff4_object.AddAttribute(attribute, rdfvalue_object) if (isinstance(rdfvalue_object, rdf_client_fs.StatEntry) and rdfvalue_object.pathspec.pathtype != "UNSET"): if data_store.RelationalDBWriteEnabled(): path_info = rdf_objects.PathInfo.FromStatEntry( rdfvalue_object) data_store.REL_DB.WritePathInfos( self.client_id, [path_info]) # Populate the KB from the client attributes. if aff4_type == aff4_grr.VFSGRRClient: if data_store.AFF4Enabled(): kb = rdf_client.KnowledgeBase() artifact.SetCoreGRRKnowledgeBaseValues(kb, aff4_object) aff4_object.Set(aff4_object.Schema.KNOWLEDGE_BASE, kb) aff4_object.Flush() index = client_index.CreateClientIndex( token=self.token) index.AddClient(aff4_object) if path_info is not None: data_store.REL_DB.WritePathInfos(client_id=self.client_id, path_infos=[path_info]) if data_store.AFF4Enabled(): aff4_object.Flush()
def StoreBlobsAsTmpFile(self, responses): """Stores bytes retrieved from client in the VFS tmp folder.""" if not responses.success: raise flow_base.FlowError(responses.status) file_size = 0 file_hash_from_client = None # Hash on the last buffer reference. blob_refs = [] smallest_offset = None biggest_offset = 0 for response in responses: file_size += response.blob.length if smallest_offset is None or response.blob.offset < smallest_offset: smallest_offset = response.blob.offset if response.blob.offset >= biggest_offset: biggest_offset = response.blob.offset file_hash_from_client = response.accumulated_hash blob_refs.append( rdf_objects.BlobReference( offset=response.blob.offset, size=response.blob.length, blob_id=rdf_objects.BlobID.FromSerializedBytes( response.blob.data))) if file_size < self.args.length: self.Log(f"Read less bytes than requested ({file_size} < " f"{self.args.length}). The file is probably smaller than " "requested read length.") elif file_size > self.args.length: raise flow_base.FlowError( f"Read more bytes than requested ({file_size} >" f" {self.args.length}).") # This raw data is not necessarily a file, but any data from the device. # We artificially create a filename to refer to it on our file store. alphanumeric_only = "".join(c for c in self.args.path if c.isalnum()) # TODO: Remove client_id from `tmp_filename` when bug is fixed. tmp_filename = f"{self.client_id}_{self.rdf_flow.flow_id}_{alphanumeric_only}" tmp_filepath = db.ClientPath.Temp(self.client_id, [tmp_filename]) # Store blobs under this name in file_store. file_hash_from_store = file_store.AddFileWithUnknownHash( tmp_filepath, blob_refs, use_external_stores=False) # Check if the file hashes match, and log in case they don't. file_hash_id_from_client = rdf_objects.SHA256HashID.FromSerializedBytes( file_hash_from_client.AsBytes()) if file_hash_id_from_client != file_hash_from_store: logging.warning( "Flow %s (%s): mismatch in file hash id in the storage (%s) and in the client (%s)", self.rdf_flow.protobuf.flow_id, self.client_id, file_hash_from_store, file_hash_from_client) path_info = rdf_objects.PathInfo.Temp(components=[tmp_filename]) path_info.hash_entry.sha256 = file_hash_from_store.AsBytes() path_info.hash_entry.num_bytes = file_size path_info.hash_entry.source_offset = smallest_offset # Store file reference for this client in data_store. data_store.REL_DB.WritePathInfos(self.client_id, [path_info]) result = rdf_read_low_level.ReadLowLevelFlowResult(path=tmp_filename) self.SendReply(result)
def _WriteBuffer(self, responses): """Write the hash received to the blob image.""" index = responses.request_data["index"] if index not in self.state.pending_files: return # Failed to read the file - ignore it. if not responses.success: self._FileFetchFailed(index, status=responses.status) return response = responses.First() file_tracker = self.state.pending_files.get(index) if not file_tracker: return blob_dict = file_tracker.setdefault("blobs", {}) blob_index = responses.request_data["blob_index"] blob_dict[blob_index] = (response.data, response.length) if len(blob_dict) != file_tracker["expected_chunks"]: # We need more data before we can write the file. return # Write the file to the data store. stat_entry = file_tracker["stat_entry"] path_info = rdf_objects.PathInfo.FromStatEntry(stat_entry) blob_refs = [] offset = 0 for index in sorted(blob_dict): digest, size = blob_dict[index] blob_refs.append( rdf_objects.BlobReference( offset=offset, size=size, blob_id=rdf_objects.BlobID.FromSerializedBytes(digest))) offset += size hash_obj = file_tracker["hash_obj"] client_path = db.ClientPath.FromPathInfo(self.client_id, path_info) hash_id = file_store.AddFileWithUnknownHash( client_path, blob_refs, use_external_stores=self.state.use_external_stores) # If the hash that we've calculated matches what we got from the # client, then simply store the full hash entry. # Otherwise store just the hash that we've calculated. if hash_id.AsBytes() == hash_obj.sha256: path_info.hash_entry = hash_obj else: path_info.hash_entry.sha256 = hash_id.AsBytes() path_info.hash_entry.num_bytes = offset data_store.REL_DB.WritePathInfos(self.client_id, [path_info]) # Save some space. del file_tracker["blobs"] del file_tracker["hash_list"] # File done, remove from the store and close it. self._ReceiveFetchedFile(file_tracker) self.state.files_fetched += 1 if not self.state.files_fetched % 100: self.Log("Fetched %d of %d files.", self.state.files_fetched, self.state.files_to_fetch)
"""Tests for signed-binary DB functionality.""" from __future__ import absolute_import from __future__ import division from __future__ import unicode_literals from grr_response_server.databases import db from grr_response_server.rdfvalues import objects as rdf_objects _test_id1 = rdf_objects.SignedBinaryID( binary_type=rdf_objects.SignedBinaryID.BinaryType.EXECUTABLE, path="linux/test/hello") _test_id2 = rdf_objects.SignedBinaryID( binary_type=rdf_objects.SignedBinaryID.BinaryType.PYTHON_HACK, path="windows/test/hello") _test_references1 = rdf_objects.BlobReferences(items=[ rdf_objects.BlobReference(offset=0, size=2, blob_id=b"\xaa" * 32), rdf_objects.BlobReference(offset=2, size=3, blob_id=b"\xbb" * 32), ]) _test_references2 = rdf_objects.BlobReferences(items=[ rdf_objects.BlobReference(offset=0, size=3, blob_id=b"\xcc" * 32), rdf_objects.BlobReference(offset=3, size=2, blob_id=b"\xdd" * 32), ]) class DatabaseTestSignedBinariesMixin(object): """Mixin that adds tests for signed binary DB functionality.""" def testReadSignedBinaryReferences(self): self.db.WriteSignedBinaryReferences(_test_id1, _test_references1) stored_hash_id, stored_timestamp = self.db.ReadSignedBinaryReferences( _test_id1) self.assertEqual(stored_hash_id, _test_references1)
def testRaisesIfOneOfTwoBlobsIsNotFound(self): blob_ref = rdf_objects.BlobReference( offset=0, size=0, blob_id=rdf_objects.BlobID.FromBlobData(b"")) with self.assertRaises(file_store.BlobNotFoundError): file_store.AddFileWithUnknownHash(self.client_path, [self.blob_refs[0], blob_ref])
def WriteBuffer(self, responses): """Write the hash received to the blob image.""" index = responses.request_data["index"] if index not in self.state.pending_files: return # Failed to read the file - ignore it. if not responses.success: self._FileFetchFailed(index) return response = responses.First() file_tracker = self.state.pending_files.get(index) if not file_tracker: return blob_dict = file_tracker.setdefault("blobs", {}) blob_index = responses.request_data["blob_index"] blob_dict[blob_index] = (response.data, response.length) if len(blob_dict) != len(file_tracker["hash_list"]): # We need more data before we can write the file. return # Write the file to the data store. stat_entry = file_tracker["stat_entry"] urn = stat_entry.pathspec.AFF4Path(self.client_urn) if data_store.AFF4Enabled(): with aff4.FACTORY.Create(urn, aff4_grr.VFSBlobImage, mode="w", token=self.token) as fd: fd.SetChunksize(self.CHUNK_SIZE) fd.Set(fd.Schema.STAT(stat_entry)) fd.Set(fd.Schema.PATHSPEC(stat_entry.pathspec)) fd.Set(fd.Schema.CONTENT_LAST(rdfvalue.RDFDatetime().Now())) for index in sorted(blob_dict): digest, length = blob_dict[index] fd.AddBlob(rdf_objects.BlobID.FromBytes(digest), length) if data_store.RelationalDBEnabled(): path_info = rdf_objects.PathInfo.FromStatEntry(stat_entry) blob_refs = [] offset = 0 for index in sorted(blob_dict): digest, size = blob_dict[index] blob_refs.append( rdf_objects.BlobReference( offset=offset, size=size, blob_id=rdf_objects.BlobID.FromBytes(digest))) offset += size hash_obj = file_tracker["hash_obj"] client_path = db.ClientPath.FromPathInfo(self.client_id, path_info) hash_id = file_store.AddFileWithUnknownHash( client_path, blob_refs, use_external_stores=self.state.use_external_stores) # If the hash that we've calculated matches what we got from the # client, then simply store the full hash entry. # Otherwise store just the hash that we've calculated. if hash_id.AsBytes() == hash_obj.sha256: path_info.hash_entry = hash_obj else: path_info.hash_entry.sha256 = hash_id.AsBytes() data_store.REL_DB.WritePathInfos(self.client_id, [path_info]) # Save some space. del file_tracker["blobs"] del file_tracker["hash_list"] # File done, remove from the store and close it. self._ReceiveFetchedFile(file_tracker) self.state.files_fetched += 1 if not self.state.files_fetched % 100: self.Log("Fetched %d of %d files.", self.state.files_fetched, self.state.files_to_fetch)
def ReadBuffer(self, responses): """Read the buffer and write to the file.""" # Did it work? if not responses.success: return response = responses.First() if not response: raise IOError("Missing hash for offset %s missing" % response.offset) if response.offset <= self.state.max_chunk_number * self.CHUNK_SIZE: # Response.data is the hash of the block (32 bytes) and # response.length is the length of the block. self.state.blobs.append((response.data, response.length)) self.Log("Received blob hash %s", response.data.encode("hex")) # Add one more chunk to the window. self.FetchWindow(1) if response.offset + response.length >= self.state.file_size: # File is complete. stat_entry = self.state.stat_entry urn = self.state.stat_entry.AFF4Path(self.client_urn) # TODO(user): when all the code can read files from REL_DB, # protect this with: # if not data_store.RelationalDBEnabled(): if data_store.AFF4Enabled(): with aff4.FACTORY.Create(urn, aff4_grr.VFSBlobImage, token=self.token) as fd: fd.SetChunksize(self.CHUNK_SIZE) fd.Set(fd.Schema.STAT(stat_entry)) for data, length in self.state.blobs: fd.AddBlob(rdf_objects.BlobID.FromBytes(data), length) fd.Set(fd.Schema.CONTENT_LAST, rdfvalue.RDFDatetime.Now()) if data_store.RelationalDBEnabled(): path_info = rdf_objects.PathInfo.FromStatEntry(stat_entry) # Adding files to filestore requires reading data from RELDB, # thus protecting this code with a filestore-read-enabled check. if data_store.RelationalDBEnabled(): blob_refs = [] offset = 0 for data, size in self.state.blobs: blob_refs.append( rdf_objects.BlobReference( offset=offset, size=size, blob_id=rdf_objects.BlobID.FromBytes(data))) offset += size client_path = db.ClientPath.FromPathInfo( self.client_id, path_info) hash_id = file_store.AddFileWithUnknownHash( client_path, blob_refs) path_info.hash_entry.sha256 = hash_id.AsBytes() data_store.REL_DB.WritePathInfos(self.client_id, [path_info]) # Save some space. del self.state["blobs"] self.state.success = True
def CreateClientObject(self, vfs_fixture): """Make a new client object.""" # Constructing a client snapshot from the legacy fixture is hard, we are # using a serialized string instead. data_store.REL_DB.WriteClientMetadata(self.client_id, fleetspeak_enabled=False) snapshot = rdf_objects.ClientSnapshot.FromSerializedBytes( binascii.unhexlify(SERIALIZED_CLIENT)) snapshot.client_id = self.client_id snapshot.knowledge_base.fqdn = "Host%s" % self.client_id # Client version number may affect flows behavior so it's important # to keep it current in order for flows tests to test the most # recent logic. snapshot.startup_info.client_info.client_version = config.CONFIG[ "Source.version_numeric"] data_store.REL_DB.WriteClientSnapshot(snapshot) client_index.ClientIndex().AddClient(snapshot) for path, (typ, attributes) in vfs_fixture: path %= self.args path_info = None components = [ component for component in path.split("/") if component ] if (len(components) > 1 and components[0] == "fs" and components[1] in ["os", "tsk", "ntfs"]): path_info = rdf_objects.PathInfo() if components[1] == "os": path_info.path_type = rdf_objects.PathInfo.PathType.OS elif components[1] == "ntfs": path_info.path_type = rdf_objects.PathInfo.PathType.NTFS else: path_info.path_type = rdf_objects.PathInfo.PathType.TSK path_info.components = components[2:] if typ == "File": path_info.directory = False elif typ == "Directory": path_info.directory = True else: raise ValueError("Incorrect object type: %s" % typ) for attribute_name in attributes: if attribute_name not in ["stat", "content"]: raise ValueError("Unknown attribute: " + attribute_name) stat = attributes.get("stat", None) if stat: stat_entry = rdf_client_fs.StatEntry.FromTextFormat(stat % self.args) if stat_entry.pathspec.pathtype != "UNSET": path_info = rdf_objects.PathInfo.FromStatEntry(stat_entry) content = attributes.get("content", None) if content: blob_id = rdf_objects.BlobID.FromBlobData(content) data_store.BLOBS.WriteBlobs({blob_id: content}) blob_ref = rdf_objects.BlobReference(offset=0, size=len(content), blob_id=blob_id) hash_id = file_store.AddFileWithUnknownHash( db.ClientPath.FromPathInfo(self.client_id, path_info), [blob_ref]) path_info.hash_entry.num_bytes = len(content) path_info.hash_entry.sha256 = hash_id.AsBytes() if path_info is not None: data_store.REL_DB.WritePathInfos(client_id=self.client_id, path_infos=[path_info])