def _WriteFileContent(self, response, mutation_pool=None): urn = response.stat_entry.pathspec.AFF4Path(self.client_urn) filedesc = aff4.FACTORY.Create(urn, aff4_grr.VFSBlobImage, token=self.token, mutation_pool=mutation_pool) with filedesc: filedesc.SetChunksize(response.transferred_file.chunk_size) filedesc.Set(filedesc.Schema.STAT, response.stat_entry) chunks = sorted(response.transferred_file.chunks, key=lambda _: _.offset) for chunk in chunks: filedesc.AddBlob(chunk.digest, chunk.length) filedesc.Set(filedesc.Schema.CONTENT_LAST, rdfvalue.RDFDatetime.Now()) if data_store.RelationalDBWriteEnabled(): path_info = rdf_objects.PathInfo.FromStatEntry(response.stat_entry) # Adding files to filestore requires reading data from RELDB, # thus protecting this code with a filestore-read-enabled check. if data_store.RelationalDBReadEnabled("filestore"): blob_ids = [ rdf_objects.BlobID.FromBytes(c.digest) for c in chunks ] hash_id = file_store.AddFileWithUnknownHash(blob_ids) path_info.hash_entry.sha256 = hash_id.AsBytes() data_store.REL_DB.WritePathInfos(self.client_id, [path_info])
def CreateFile(client_path, content=b""): """Creates a file in datastore-agnostic way. Args: client_path: A `ClientPath` instance specifying location of the file. content: A content to write to the file. """ precondition.AssertType(client_path, db.ClientPath) precondition.AssertType(content, bytes) blob_id = rdf_objects.BlobID.FromBlobData(content) stat_entry = rdf_client_fs.StatEntry(pathspec=rdf_paths.PathSpec( pathtype=client_path.path_type, path="/".join(client_path.components)), st_mode=33206, st_size=len(content)) data_store.BLOBS.WriteBlobs({blob_id: content}) blob_ref = rdf_objects.BlobReference(size=len(content), offset=0, blob_id=blob_id) hash_id = file_store.AddFileWithUnknownHash(client_path, [blob_ref]) path_info = rdf_objects.PathInfo() path_info.path_type = client_path.path_type path_info.components = client_path.components path_info.hash_entry.num_bytes = len(content) path_info.hash_entry.sha256 = hash_id.AsBytes() path_info.stat_entry = stat_entry data_store.REL_DB.WritePathInfos(client_path.client_id, [path_info])
def _CreateFile(self, path, content, hashing=False, aff4_type=aff4.AFF4MemoryStream): if hashing: digest = hashlib.sha256(content).digest() else: digest = None if data_store.RelationalDBReadEnabled("filestore"): self.assertTrue(data_store.RelationalDBWriteEnabled()) self.assertTrue(hashing) else: with aff4.FACTORY.Create(path, aff4_type, token=self.token) as fd: fd.Write(content) if digest: fd.Set(fd.Schema.HASH, rdf_crypto.Hash(sha256=digest)) if data_store.RelationalDBWriteEnabled() and hashing: client_id, vfs_path = path.Split(2) path_type, components = rdf_objects.ParseCategorizedPath(vfs_path) path_info = rdf_objects.PathInfo() path_info.path_type = path_type path_info.components = components blob_id = rdf_objects.BlobID.FromBytes(digest) data_store.BLOBS.WriteBlobs({blob_id: content}) hash_id = file_store.AddFileWithUnknownHash([blob_id]) path_info.hash_entry.sha256 = hash_id.AsBytes() data_store.REL_DB.WritePathInfos(client_id, [path_info])
def testAddsFileToExternalFileStore(self, add_file_mock): hash_id = file_store.AddFileWithUnknownHash(self.client_path, self.blob_refs) add_file_mock.assert_called_once() args = add_file_mock.call_args_list[0][0] self.assertEqual(args[0][hash_id].client_path, self.client_path) self.assertEqual(args[0][hash_id].blob_refs, self.blob_refs)
def setUp(self): super(OpenFileTest, self).setUp() self.client_id = self.SetupClient(0).Basename() self.client_path = db.ClientPath.OS(self.client_id, ("foo", "bar")) self.blob_size = 10 self.blob_data = [c * self.blob_size for c in b"abcdef"] self.blob_ids = [ rdf_objects.BlobID.FromBlobData(bd) for bd in self.blob_data ] data_store.BLOBS.WriteBlobs(dict(zip(self.blob_ids, self.blob_data))) self.hash_id = file_store.AddFileWithUnknownHash(self.blob_ids[:3]) self.data = b"".join(self.blob_data[:3]) self.other_hash_id = file_store.AddFileWithUnknownHash(self.blob_ids[3:]) self.invalid_hash_id = rdf_objects.SHA256HashID.FromData(b"")
def ReadBuffer(self, responses): """Read the buffer and write to the file.""" # Did it work? if responses.success: response = responses.First() if not response: raise IOError("Missing hash for offset %s missing" % response.offset) if response.offset <= self.state.max_chunk_number * self.CHUNK_SIZE: # Response.data is the hash of the block (32 bytes) and # response.length is the length of the block. self.state.blobs.append((response.data, response.length)) self.Log("Received blob hash %s", response.data.encode("hex")) # Add one more chunk to the window. self.FetchWindow(1) if response.offset + response.length >= self.state.file_size: # File is complete. stat_entry = self.state.stat_entry urn = self.state.stat_entry.AFF4Path(self.client_urn) # TODO(user): when all the code can read files from REL_DB, # protect this with: # if not data_store.RelationalDBReadEnabled(category="filestore"): with aff4.FACTORY.Create(urn, aff4_grr.VFSBlobImage, token=self.token) as fd: fd.SetChunksize(self.CHUNK_SIZE) fd.Set(fd.Schema.STAT(stat_entry)) for data, length in self.state.blobs: fd.AddBlob(data, length) fd.Set(fd.Schema.CONTENT_LAST, rdfvalue.RDFDatetime.Now()) if data_store.RelationalDBWriteEnabled(): path_info = rdf_objects.PathInfo.FromStatEntry(stat_entry) # Adding files to filestore requires reading data from RELDB, # thus protecting this code with a filestore-read-enabled check. if data_store.RelationalDBReadEnabled("filestore"): blob_ids = [ rdf_objects.BlobID.FromBytes(data) for data, _ in self.state.blobs ] hash_id = file_store.AddFileWithUnknownHash(blob_ids) path_info.hash_entry.sha256 = hash_id.AsBytes() data_store.REL_DB.WritePathInfos(self.client_id, [path_info]) # Save some space. del self.state["blobs"] self.state.success = True
def _WriteFile(self, client_path, blobs_range=None): path_info = rdf_objects.PathInfo.OS(components=client_path.components) if blobs_range: hash_id = file_store.AddFileWithUnknownHash( self.blob_ids[blobs_range[0]:blobs_range[1]]) path_info.hash_entry.sha256 = hash_id.AsBytes() data_store.REL_DB.WritePathInfos(client_path.client_id, [path_info])
def testAddsFileToExternalFileStore(self, add_file_mock): hash_id = file_store.AddFileWithUnknownHash(self.client_path, self.blob_ids) add_file_mock.assert_called_once() args = add_file_mock.call_args_list[0][0] self.assertEqual(args[0], self.client_path) self.assertEqual(args[1], hash_id) blob_ids = [ref.blob_id for ref in args[2]] self.assertEqual(blob_ids, self.blob_ids)
def CreateFile(client_path, content=b"", token=None): """Creates a file in datastore-agnostic way. Args: client_path: A `ClientPath` instance specifying location of the file. content: A content to write to the file. token: A GRR token for accessing the data store. """ precondition.AssertType(client_path, db.ClientPath) precondition.AssertType(content, bytes) blob_id = rdf_objects.BlobID.FromBlobData(content) stat_entry = rdf_client_fs.StatEntry(pathspec=rdf_paths.PathSpec( pathtype=client_path.path_type, path="/".join(client_path.components)), st_mode=33206, st_size=len(content)) if data_store.RelationalDBEnabled(): data_store.BLOBS.WriteBlobs({blob_id: content}) blob_ref = rdf_objects.BlobReference(size=len(content), offset=0, blob_id=blob_id) hash_id = file_store.AddFileWithUnknownHash(client_path, [blob_ref]) path_info = rdf_objects.PathInfo() path_info.path_type = client_path.path_type path_info.components = client_path.components path_info.hash_entry.num_bytes = len(content) path_info.hash_entry.sha256 = hash_id.AsBytes() path_info.stat_entry = stat_entry data_store.REL_DB.WritePathInfos(client_path.client_id, [path_info]) if data_store.AFF4Enabled(): urn = aff4.ROOT_URN.Add(client_path.client_id).Add( client_path.vfs_path) with aff4.FACTORY.Create(urn, aff4_grr.VFSBlobImage, token=token) as filedesc: bio = io.BytesIO() bio.write(content) bio.seek(0) filedesc.AppendContent(bio) filedesc.Set(filedesc.Schema.STAT, stat_entry) filedesc.Set( filedesc.Schema.HASH, rdf_crypto.Hash(sha256=rdf_objects.SHA256HashID.FromData( content).AsBytes(), num_bytes=len(content))) filedesc.Set(filedesc.Schema.CONTENT_LAST, rdfvalue.RDFDatetime.Now())
def setUp(self): super(OpenFileTest, self).setUp() self.client_id = self.SetupClient(0) self.client_path = db.ClientPath.OS(self.client_id, ("foo", "bar")) blob_size = 10 blob_data, blob_refs = _GenerateBlobRefs(blob_size, b"abcdef") blob_ids = [ref.blob_id for ref in blob_refs] data_store.BLOBS.WriteBlobs(dict(zip(blob_ids, blob_data))) blob_data, blob_refs = _GenerateBlobRefs(blob_size, b"def") self.hash_id = file_store.AddFileWithUnknownHash( self.client_path, blob_refs) self.data = b"".join(blob_data) _, blob_refs = _GenerateBlobRefs(blob_size, b"abc") self.other_hash_id = file_store.AddFileWithUnknownHash( self.client_path, blob_refs) self.invalid_hash_id = rdf_objects.SHA256HashID.FromData(b"")
def _WriteFile(self, client_path, blobs_range=None): path_info = rdf_objects.PathInfo.OS(components=client_path.components) if blobs_range: _, blob_refs = _GenerateBlobRefs( self.blob_size, b"abcdef"[blobs_range[0]:blobs_range[1]]) hash_id = file_store.AddFileWithUnknownHash(client_path, blob_refs) path_info.hash_entry.sha256 = hash_id.AsBytes() data_store.REL_DB.WritePathInfos(client_path.client_id, [path_info])
def ReadBuffer(self, responses): """Read the buffer and write to the file.""" # Did it work? if not responses.success: return response = responses.First() if not response: raise IOError("Missing hash for offset %s missing" % response.offset) if response.offset <= self.state.max_chunk_number * self.CHUNK_SIZE: # Response.data is the hash of the block (32 bytes) and # response.length is the length of the block. self.state.blobs.append((response.data, response.length)) hex_data = binascii.hexlify(response.data).decode("ascii") self.Log("Received blob hash %s", hex_data) # Add one more chunk to the window. self.FetchWindow(1) if response.offset + response.length >= self.state.file_size: # File is complete. stat_entry = self.state.stat_entry path_info = rdf_objects.PathInfo.FromStatEntry(stat_entry) blob_refs = [] offset = 0 for data, size in self.state.blobs: blob_refs.append( rdf_objects.BlobReference( offset=offset, size=size, blob_id=rdf_objects.BlobID.FromBytes(data))) offset += size client_path = db.ClientPath.FromPathInfo(self.client_id, path_info) hash_id = file_store.AddFileWithUnknownHash(client_path, blob_refs) path_info.hash_entry.sha256 = hash_id.AsBytes() path_info.hash_entry.num_bytes = offset data_store.REL_DB.WritePathInfos(self.client_id, [path_info]) # Save some space. del self.state["blobs"] self.state.success = True
def _WriteFile(self, path: str, data: bytes) -> None: components = tuple(path.split("/")) blob_id = data_store.BLOBS.WriteBlobWithUnknownHash(blob_data=data) blob_ref = rdf_objects.BlobReference( offset=0, size=len(data), blob_id=blob_id) path_info = rdf_objects.PathInfo.OS(components=components) path_info.hash_entry.sha256 = blob_id.AsBytes() data_store.REL_DB.WritePathInfos(self.client_id, [path_info]) client_path = db.ClientPath.OS( client_id=self.client_id, components=components) file_store.AddFileWithUnknownHash(client_path, [blob_ref])
def _CreateFile(self, client_id, vfs_path, content): digest = hashlib.sha256(content).digest() path_type, components = rdf_objects.ParseCategorizedPath(vfs_path) path_info = rdf_objects.PathInfo() path_info.path_type = path_type path_info.components = components blob_id = rdf_objects.BlobID.FromSerializedBytes(digest) data_store.BLOBS.WriteBlobs({blob_id: content}) blob_ref = rdf_objects.BlobReference( offset=0, size=len(content), blob_id=blob_id) hash_id = file_store.AddFileWithUnknownHash( db.ClientPath.FromPathInfo(client_id, path_info), [blob_ref]) path_info.hash_entry.sha256 = hash_id.AsBytes() data_store.REL_DB.WritePathInfos(client_id, [path_info])
def testFilesWithOneBlobAreStillReadToEnsureBlobExists(self): _, long_blob_refs = vfs_test_lib.GenerateBlobRefs(self.blob_size, "cd") _, short_blob_refs1 = vfs_test_lib.GenerateBlobRefs( self.blob_size, "a") _, short_blob_refs2 = vfs_test_lib.GenerateBlobRefs( self.blob_size, "b") path1 = db.ClientPath.OS(self.client_id, ["foo"]) path2 = db.ClientPath.OS(self.client_id, ["bar"]) path3 = db.ClientPath.OS(self.client_id, ["baz"]) # One small file, blob is still read. with mock.patch.object(data_store.BLOBS, "ReadBlobs", wraps=data_store.BLOBS.ReadBlobs) as p: file_store.AddFileWithUnknownHash(path1, short_blob_refs1) p.assert_called_once() # Same for multiple small files. with mock.patch.object(data_store.BLOBS, "ReadBlobs", wraps=data_store.BLOBS.ReadBlobs) as p: file_store.AddFilesWithUnknownHashes({ path1: short_blob_refs1, path2: short_blob_refs2 }) p.assert_called_once() # One large file and two small ones result in a single read for the # all three blobs. with mock.patch.object(data_store.BLOBS, "ReadBlobs", wraps=data_store.BLOBS.ReadBlobs) as p: file_store.AddFilesWithUnknownHashes({ path1: short_blob_refs1, path2: short_blob_refs2, path3: long_blob_refs }) p.assert_called_once() self.assertLen(p.call_args[POSITIONAL_ARGS], 1) self.assertEmpty(p.call_args[KEYWORD_ARGS]) self.assertCountEqual(p.call_args[0][0], [ r.blob_id for r in itertools.chain( short_blob_refs1, short_blob_refs2, long_blob_refs) ])
def testOptimizationForSmallFiles(self): _, long_blob_refs = _GenerateBlobRefs(self.blob_size, b"ab") _, short_blob_refs1 = _GenerateBlobRefs(self.blob_size, b"a") _, short_blob_refs2 = _GenerateBlobRefs(self.blob_size, b"b") path1 = db.ClientPath.OS(self.client_id, ["foo"]) path2 = db.ClientPath.OS(self.client_id, ["bar"]) path3 = db.ClientPath.OS(self.client_id, ["baz"]) # One small file, no need to read blobs. with mock.patch.object(data_store.BLOBS, "ReadBlobs", wraps=data_store.BLOBS.ReadBlobs) as p: file_store.AddFileWithUnknownHash(path1, short_blob_refs1) p.assert_not_called() # Same for multiple small files. with mock.patch.object(data_store.BLOBS, "ReadBlobs", wraps=data_store.BLOBS.ReadBlobs) as p: file_store.AddFilesWithUnknownHashes({ path1: short_blob_refs1, path2: short_blob_refs2 }) p.assert_not_called() # One large file and two small ones result in a single read for the two # blobs of the large file only. with mock.patch.object(data_store.BLOBS, "ReadBlobs", wraps=data_store.BLOBS.ReadBlobs) as p: file_store.AddFilesWithUnknownHashes({ path1: short_blob_refs1, path2: short_blob_refs2, path3: long_blob_refs }) p.assert_called_once() self.assertLen(p.call_args[POSITIONAL_ARGS], 1) self.assertEmpty(p.call_args[KEYWORD_ARGS]) self.assertCountEqual(p.call_args[0][0], [r.blob_id for r in long_blob_refs])
def _AddFileToFileStore(self): stat_entry = self.state.stat_entry path_info = rdf_objects.PathInfo.FromStatEntry(stat_entry) blob_refs = [] offset = 0 for data, size in self.state.blobs: blob_refs.append( rdf_objects.BlobReference( offset=offset, size=size, blob_id=rdf_objects.BlobID.FromSerializedBytes(data))) offset += size client_path = db.ClientPath.FromPathInfo(self.client_id, path_info) hash_id = file_store.AddFileWithUnknownHash(client_path, blob_refs) path_info.hash_entry.sha256 = hash_id.AsBytes() path_info.hash_entry.num_bytes = offset data_store.REL_DB.WritePathInfos(self.client_id, [path_info]) # Save some space. del self.state["blobs"]
def testRaisesIfSingleBlobIsNotFound(self): blob_id = rdf_objects.BlobID.FromBlobData("") with self.assertRaises(file_store.BlobNotFound): file_store.AddFileWithUnknownHash([blob_id])
def testAddsFileWithTwoBlobs(self): hash_id = file_store.AddFileWithUnknownHash(self.client_path, self.blob_refs) self.assertEqual( hash_id.AsBytes(), rdf_objects.SHA256HashID.FromData(b"".join(self.blob_data)))
def WriteBuffer(self, responses): """Write the hash received to the blob image.""" index = responses.request_data["index"] if index not in self.state.pending_files: return # Failed to read the file - ignore it. if not responses.success: self._FileFetchFailed(index, responses.request.request.name) return response = responses.First() file_tracker = self.state.pending_files.get(index) if not file_tracker: return blob_dict = file_tracker.setdefault("blobs", {}) blob_index = responses.request_data["blob_index"] blob_dict[blob_index] = (response.data, response.length) if len(blob_dict) != len(file_tracker["hash_list"]): # We need more data before we can write the file. return # Write the file to the data store. stat_entry = file_tracker["stat_entry"] urn = stat_entry.pathspec.AFF4Path(self.client_urn) if data_store.AFF4Enabled(): with aff4.FACTORY.Create(urn, aff4_grr.VFSBlobImage, mode="w", token=self.token) as fd: fd.SetChunksize(self.CHUNK_SIZE) fd.Set(fd.Schema.STAT(stat_entry)) fd.Set(fd.Schema.PATHSPEC(stat_entry.pathspec)) fd.Set(fd.Schema.CONTENT_LAST(rdfvalue.RDFDatetime().Now())) for index in sorted(blob_dict): digest, length = blob_dict[index] fd.AddBlob(rdf_objects.BlobID.FromBytes(digest), length) if data_store.RelationalDBEnabled(): path_info = rdf_objects.PathInfo.FromStatEntry(stat_entry) blob_refs = [] offset = 0 for index in sorted(blob_dict): digest, size = blob_dict[index] blob_refs.append( rdf_objects.BlobReference( offset=offset, size=size, blob_id=rdf_objects.BlobID.FromBytes(digest))) offset += size hash_obj = file_tracker["hash_obj"] client_path = db.ClientPath.FromPathInfo(self.client_id, path_info) hash_id = file_store.AddFileWithUnknownHash( client_path, blob_refs, use_external_stores=self.state.use_external_stores) # If the hash that we've calculated matches what we got from the # client, then simply store the full hash entry. # Otherwise store just the hash that we've calculated. if hash_id.AsBytes() == hash_obj.sha256: path_info.hash_entry = hash_obj else: path_info.hash_entry.sha256 = hash_id.AsBytes() data_store.REL_DB.WritePathInfos(self.client_id, [path_info]) if (not data_store.RelationalDBEnabled() and self.state.use_external_stores): # Publish the new file event to cause the file to be added to the # filestore. events.Events.PublishEvent("LegacyFileStore.AddFileToStore", urn, token=self.token) # Save some space. del file_tracker["blobs"] del file_tracker["hash_list"] # File done, remove from the store and close it. self._ReceiveFetchedFile(file_tracker) self.state.files_fetched += 1 if not self.state.files_fetched % 100: self.Log("Fetched %d of %d files.", self.state.files_fetched, self.state.files_to_fetch)
def CreateClientObject(self, vfs_fixture): """Make a new client object.""" # Constructing a client snapshot from the legacy fixture is hard, we are # using a serialized string instead. data_store.REL_DB.WriteClientMetadata( self.client_id, fleetspeak_enabled=False) snapshot = rdf_objects.ClientSnapshot.FromSerializedString( SERIALIZED_CLIENT.decode("hex")) snapshot.client_id = self.client_id snapshot.knowledge_base.fqdn = "Host%s" % self.client_id data_store.REL_DB.WriteClientSnapshot(snapshot) client_index.ClientIndex().AddClient(snapshot) for path, (typ, attributes) in vfs_fixture: path %= self.args path_info = None components = [component for component in path.split("/") if component] if (len(components) > 1 and components[0] == "fs" and components[1] in ["os", "tsk"]): path_info = rdf_objects.PathInfo() if components[1] == "os": path_info.path_type = rdf_objects.PathInfo.PathType.OS else: path_info.path_type = rdf_objects.PathInfo.PathType.TSK path_info.components = components[2:] if typ == "File": path_info.directory = False elif typ == "Directory": path_info.directory = True else: raise ValueError("Incorrect object type: %s" % typ) for attribute_name in attributes: if attribute_name not in ["stat", "content"]: raise ValueError("Unknown attribute: " + attribute_name) stat = attributes.get("stat", None) if stat: stat_entry = rdf_client_fs.StatEntry.FromTextFormat(stat % self.args) if stat_entry.pathspec.pathtype != "UNSET": path_info = rdf_objects.PathInfo.FromStatEntry(stat_entry) content = attributes.get("content", None) if content: blob_id = rdf_objects.BlobID.FromBlobData(content) data_store.BLOBS.WriteBlobs({blob_id: content}) blob_ref = rdf_objects.BlobReference( offset=0, size=len(content), blob_id=blob_id) hash_id = file_store.AddFileWithUnknownHash( db.ClientPath.FromPathInfo(self.client_id, path_info), [blob_ref]) path_info.hash_entry.num_bytes = len(content) path_info.hash_entry.sha256 = hash_id.AsBytes() if path_info is not None: data_store.REL_DB.WritePathInfos( client_id=self.client_id, path_infos=[path_info])
def CreateClientObject(self, vfs_fixture): """Make a new client object.""" # First remove the old fixture just in case its still there. aff4.FACTORY.Delete(self.client_id, token=self.token) # Create the fixture at a fixed time. with test_lib.FakeTime(self.age): for path, (aff4_type, attributes) in vfs_fixture: path %= self.args aff4_object = aff4.FACTORY.Create(self.client_id.Add(path), aff4_type, mode="rw", token=self.token) path_info = None if data_store.RelationalDBWriteEnabled(): data_store.REL_DB.WriteClientMetadata( self.client_id.Basename(), fleetspeak_enabled=False) components = [ component for component in path.split("/") if component ] if (len(components) > 1 and components[0] == "fs" and components[1] in ["os", "tsk"]): path_info = rdf_objects.PathInfo() if components[1] == "os": path_info.path_type = rdf_objects.PathInfo.PathType.OS else: path_info.path_type = rdf_objects.PathInfo.PathType.TSK path_info.components = components[2:] if aff4_type in [ aff4_grr.VFSFile, aff4_grr.VFSMemoryFile ]: path_info.directory = False elif aff4_type == aff4_standard.VFSDirectory: path_info.directory = True else: raise ValueError("Incorrect AFF4 type: %s" % aff4_type) for attribute_name, value in iteritems(attributes): attribute = aff4.Attribute.PREDICATES[attribute_name] if isinstance(value, (str, unicode)): # Interpolate the value value %= self.args # Is this supposed to be an RDFValue array? if aff4.issubclass(attribute.attribute_type, rdf_protodict.RDFValueArray): rdfvalue_object = attribute() for item in value: new_object = rdfvalue_object.rdf_type.FromTextFormat( utils.SmartStr(item)) rdfvalue_object.Append(new_object) # It is a text serialized protobuf. elif aff4.issubclass(attribute.attribute_type, rdf_structs.RDFProtoStruct): # Use the alternate constructor - we always write protobufs in # textual form: rdfvalue_object = attribute.attribute_type.FromTextFormat( utils.SmartStr(value)) elif aff4.issubclass(attribute.attribute_type, rdfvalue.RDFInteger): rdfvalue_object = attribute(int(value)) else: rdfvalue_object = attribute(value) # If we don't already have a pathspec, try and get one from the stat. if aff4_object.Get(aff4_object.Schema.PATHSPEC) is None: # If the attribute was a stat, it has a pathspec nested in it. # We should add that pathspec as an attribute. if attribute.attribute_type == rdf_client_fs.StatEntry: stat_object = attribute.attribute_type.FromTextFormat( utils.SmartStr(value)) if stat_object.pathspec: pathspec_attribute = aff4.Attribute( "aff4:pathspec", rdf_paths.PathSpec, "The pathspec used to retrieve " "this object from the client.", "pathspec") aff4_object.AddAttribute( pathspec_attribute, stat_object.pathspec) if attribute in ["aff4:content", "aff4:content"]: # For AFF4MemoryStreams we need to call Write() instead of # directly setting the contents.. content = rdfvalue_object.AsBytes() aff4_object.Write(content) if path_info is not None: blob_id = rdf_objects.BlobID.FromBlobData(content) data_store.BLOBS.WriteBlobs({blob_id: content}) hash_id = file_store.AddFileWithUnknownHash( [blob_id]) path_info.hash_entry.num_bytes = len(content) path_info.hash_entry.sha256 = hash_id.AsBytes() else: aff4_object.AddAttribute(attribute, rdfvalue_object) if (isinstance(rdfvalue_object, rdf_client_fs.StatEntry) and rdfvalue_object.pathspec.pathtype != "UNSET"): if data_store.RelationalDBWriteEnabled(): client_id = self.client_id.Basename() path_info = rdf_objects.PathInfo.FromStatEntry( rdfvalue_object) data_store.REL_DB.WritePathInfos( client_id, [path_info]) # Populate the KB from the client attributes. if aff4_type == aff4_grr.VFSGRRClient: kb = rdf_client.KnowledgeBase() artifact.SetCoreGRRKnowledgeBaseValues(kb, aff4_object) aff4_object.Set(aff4_object.Schema.KNOWLEDGE_BASE, kb) # Make sure we do not actually close the object here - we only want to # sync back its attributes, not run any finalization code. aff4_object.Flush() if aff4_type == aff4_grr.VFSGRRClient: index = client_index.CreateClientIndex(token=self.token) index.AddClient(aff4_object) if path_info is not None: data_store.REL_DB.WritePathInfos( client_id=self.client_id.Basename(), path_infos=[path_info])
def WriteBuffer(self, responses): """Write the hash received to the blob image.""" index = responses.request_data["index"] if index not in self.state.pending_files: return # Failed to read the file - ignore it. if not responses.success: self._FileFetchFailed(index, responses.request.request.name) return response = responses.First() file_tracker = self.state.pending_files.get(index) if file_tracker: blob_dict = file_tracker.setdefault("blobs", {}) blob_index = responses.request_data["blob_index"] blob_dict[blob_index] = (response.data, response.length) if len(blob_dict) == len(file_tracker["hash_list"]): # Write the file to the data store. stat_entry = file_tracker["stat_entry"] urn = stat_entry.pathspec.AFF4Path(self.client_urn) # TODO(user): when all the code can read files from REL_DB, # protect this with: # if not data_store.RelationalDBReadEnabled(category="filestore"): with aff4.FACTORY.Create(urn, aff4_grr.VFSBlobImage, mode="w", token=self.token) as fd: fd.SetChunksize(self.CHUNK_SIZE) fd.Set(fd.Schema.STAT(stat_entry)) fd.Set(fd.Schema.PATHSPEC(stat_entry.pathspec)) fd.Set(fd.Schema.CONTENT_LAST( rdfvalue.RDFDatetime().Now())) for index in sorted(blob_dict): digest, length = blob_dict[index] fd.AddBlob(digest, length) # Publish the new file event to cause the file to be added to the # filestore. events.Events.PublishEvent("LegacyFileStore.AddFileToStore", urn, token=self.token) if data_store.RelationalDBWriteEnabled(): path_info = rdf_objects.PathInfo.FromStatEntry(stat_entry) # Adding files to filestore requires reading data from RELDB, # thus protecting this code with a filestore-read-enabled check. if data_store.RelationalDBReadEnabled("filestore"): blob_ids = [] for index in sorted(blob_dict): digest, _ = blob_dict[index] blob_ids.append( rdf_objects.BlobID.FromBytes(digest)) hash_obj = file_tracker["hash_obj"] hash_id = file_store.AddFileWithUnknownHash(blob_ids) # If the hash that we've calculated matches what we got from the # client, then simply store the full hash entry. # Otherwise store just the hash that we've calculated. if hash_id.AsBytes() == hash_obj.sha256: path_info.hash_entry = hash_obj else: path_info.hash_entry.sha256 = hash_id.AsBytes() # Publish the add file event to cause the file to be added to the # filestore. events.Events.PublishEvent( "FileStore.Add", rdf_file_store.FileStoreAddEvent( hash_id=hash_id, blob_ids=blob_ids), token=self.token) data_store.REL_DB.WritePathInfos(self.client_id, [path_info]) # Save some space. del file_tracker["blobs"] del file_tracker["hash_list"] # File done, remove from the store and close it. self._ReceiveFetchedFile(file_tracker) self.state.files_fetched += 1 if not self.state.files_fetched % 100: self.Log("Fetched %d of %d files.", self.state.files_fetched, self.state.files_to_fetch)
def testRaisesIfOneOfTwoBlobsIsNotFound(self): blob_id = rdf_objects.BlobID.FromBlobData("") with self.assertRaises(file_store.BlobNotFound): file_store.AddFileWithUnknownHash([self.blob_ids[0], blob_id])
def testAddsFileWithSingleBlob(self): hash_id = file_store.AddFileWithUnknownHash(self.client_path, self.blob_refs[:1]) self.assertEqual(hash_id.AsBytes(), self.blob_refs[0].blob_id.AsBytes())
def testRaisesIfOneOfTwoBlobsIsNotFound(self): blob_ref = rdf_objects.BlobReference( offset=0, size=0, blob_id=rdf_objects.BlobID.FromBlobData("")) with self.assertRaises(file_store.BlobNotFoundError): file_store.AddFileWithUnknownHash(self.client_path, [self.blob_refs[0], blob_ref])
def WriteBuffer(self, responses): """Write the hash received to the blob image.""" index = responses.request_data["index"] if index not in self.state.pending_files: return # Failed to read the file - ignore it. if not responses.success: self._FileFetchFailed(index) return response = responses.First() file_tracker = self.state.pending_files.get(index) if not file_tracker: return blob_dict = file_tracker.setdefault("blobs", {}) blob_index = responses.request_data["blob_index"] blob_dict[blob_index] = (response.data, response.length) if len(blob_dict) != file_tracker["expected_chunks"]: # We need more data before we can write the file. return # Write the file to the data store. stat_entry = file_tracker["stat_entry"] path_info = rdf_objects.PathInfo.FromStatEntry(stat_entry) blob_refs = [] offset = 0 for index in sorted(blob_dict): digest, size = blob_dict[index] blob_refs.append( rdf_objects.BlobReference( offset=offset, size=size, blob_id=rdf_objects.BlobID.FromSerializedBytes(digest))) offset += size hash_obj = file_tracker["hash_obj"] client_path = db.ClientPath.FromPathInfo(self.client_id, path_info) hash_id = file_store.AddFileWithUnknownHash( client_path, blob_refs, use_external_stores=self.state.use_external_stores) # If the hash that we've calculated matches what we got from the # client, then simply store the full hash entry. # Otherwise store just the hash that we've calculated. if hash_id.AsBytes() == hash_obj.sha256: path_info.hash_entry = hash_obj else: path_info.hash_entry.sha256 = hash_id.AsBytes() path_info.hash_entry.num_bytes = offset data_store.REL_DB.WritePathInfos(self.client_id, [path_info]) # Save some space. del file_tracker["blobs"] del file_tracker["hash_list"] # File done, remove from the store and close it. self._ReceiveFetchedFile(file_tracker) self.state.files_fetched += 1 if not self.state.files_fetched % 100: self.Log("Fetched %d of %d files.", self.state.files_fetched, self.state.files_to_fetch)
def StoreBlobsAsTmpFile(self, responses): """Stores bytes retrieved from client in the VFS tmp folder.""" if not responses.success: raise flow_base.FlowError(responses.status) file_size = 0 file_hash_from_client = None # Hash on the last buffer reference. blob_refs = [] smallest_offset = None biggest_offset = 0 for response in responses: file_size += response.blob.length if smallest_offset is None or response.blob.offset < smallest_offset: smallest_offset = response.blob.offset if response.blob.offset >= biggest_offset: biggest_offset = response.blob.offset file_hash_from_client = response.accumulated_hash blob_refs.append( rdf_objects.BlobReference( offset=response.blob.offset, size=response.blob.length, blob_id=rdf_objects.BlobID.FromSerializedBytes( response.blob.data))) if file_size < self.args.length: self.Log(f"Read less bytes than requested ({file_size} < " f"{self.args.length}). The file is probably smaller than " "requested read length.") elif file_size > self.args.length: raise flow_base.FlowError( f"Read more bytes than requested ({file_size} >" f" {self.args.length}).") # This raw data is not necessarily a file, but any data from the device. # We artificially create a filename to refer to it on our file store. alphanumeric_only = "".join(c for c in self.args.path if c.isalnum()) # TODO: Remove client_id from `tmp_filename` when bug is fixed. tmp_filename = f"{self.client_id}_{self.rdf_flow.flow_id}_{alphanumeric_only}" tmp_filepath = db.ClientPath.Temp(self.client_id, [tmp_filename]) # Store blobs under this name in file_store. file_hash_from_store = file_store.AddFileWithUnknownHash( tmp_filepath, blob_refs, use_external_stores=False) # Check if the file hashes match, and log in case they don't. file_hash_id_from_client = rdf_objects.SHA256HashID.FromSerializedBytes( file_hash_from_client.AsBytes()) if file_hash_id_from_client != file_hash_from_store: logging.warning( "Flow %s (%s): mismatch in file hash id in the storage (%s) and in the client (%s)", self.rdf_flow.protobuf.flow_id, self.client_id, file_hash_from_store, file_hash_from_client) path_info = rdf_objects.PathInfo.Temp(components=[tmp_filename]) path_info.hash_entry.sha256 = file_hash_from_store.AsBytes() path_info.hash_entry.num_bytes = file_size path_info.hash_entry.source_offset = smallest_offset # Store file reference for this client in data_store. data_store.REL_DB.WritePathInfos(self.client_id, [path_info]) result = rdf_read_low_level.ReadLowLevelFlowResult(path=tmp_filename) self.SendReply(result)