def test_dataset_del() -> None: id = UID() data = UID() description = "This is a dummy id" tags = ["dummy", "test"] obj1 = StorableObject(id=id, data=data, description=description, tags=tags) id = UID() data = th.Tensor([1, 2, 3, 4]) description = "This is a dummy tensor n1" tags = ["dummy", "test"] obj2 = StorableObject(id=id, data=data, description=description, tags=tags) id = UID() data = th.Tensor([10, 20, 30, 40]) description = "This is a dummy tensor n2" tags = ["dummy", "test"] obj3 = StorableObject(id=id, data=data, description=description, tags=tags) id = UID() data = [obj1, obj2, obj3] description = "This is a dataset" tags = ["dummy", "dataset"] dataset_obj = Dataset(id=id, data=data, description=description, tags=tags) dataset_obj.__delitem__(obj2.id) assert dataset_obj.data == [obj1, obj3]
def test_dataset_get_element() -> None: id = UID() data = UID() description = "This is a dummy id" tags = ["dummy", "test"] obj1 = StorableObject(id=id, data=data, description=description, tags=tags) id = UID() data = th.Tensor([1, 2, 3, 4]) description = "This is a dummy tensor n1" tags = ["dummy", "test"] obj2 = StorableObject(id=id, data=data, description=description, tags=tags) id = UID() data = th.Tensor([10, 20, 30, 40]) description = "This is a dummy tensor n2" tags = ["dummy", "test"] obj3 = StorableObject(id=id, data=data, description=description, tags=tags) id = UID() data = [obj1, obj2, obj3] description = "This is a dataset" tags = ["dummy", "dataset"] dataset_obj = Dataset(id=id, data=data, description=description, tags=tags) result = dataset_obj.__getitem__(_id=obj1.id) assert len(result) == 1 assert result[0] == obj1
def test_dataset_search_id_fail() -> None: id = UID() data = UID() description = "This is a dummy id" tags = ["dummy", "test"] obj1 = StorableObject(id=id, data=data, description=description, tags=tags) id = UID() data = th.Tensor([1, 2, 3, 4]) description = "This is a dummy tensor n1" tags = ["dummy", "test"] obj2 = StorableObject(id=id, data=data, description=description, tags=tags) id = UID() data = th.Tensor([10, 20, 30, 40]) description = "This is a dummy tensor n2" tags = ["dummy", "test"] obj3 = StorableObject(id=id, data=data, description=description, tags=tags) id = UID() data = [obj1, obj2, obj3] description = "This is a dataset" tags = ["dummy", "dataset"] dataset_obj = Dataset(id=id, data=data, description=description, tags=tags) assert not dataset_obj.__contains__(_id=UID())
def test_serde_storable_obj() -> None: id = UID() data = th.Tensor([-1, -2, -3, -4]) description = "This is a dummy id" tags = ["dummy", "test"] obj1 = StorableObject(id=id, data=data, description=description, tags=tags) id = UID() data = th.Tensor([1, 2, 3, 4]) description = "This is a dummy tensor n1" tags = ["dummy", "test"] obj2 = StorableObject(id=id, data=data, description=description, tags=tags) id = UID() data = th.Tensor([10, 20, 30, 40]) description = "This is a dummy tensor n2" tags = ["dummy", "test"] obj3 = StorableObject(id=id, data=data, description=description, tags=tags) id = UID() data = [obj1, obj2, obj3] description = "This is a dataset" tags = ["dummy", "dataset"] dataset_obj = Dataset(id=id, data=data, description=description, tags=tags) blob = sy.serialize(obj=dataset_obj) sy.deserialize(blob=blob)
def test_keys(client, database, cleanup): disk_store = DiskObjectStore(database) a_id = UID() id1 = UID() id2 = UID() storable1 = StorableObject(id=id1, data=tensor1) disk_store.__setitem__(id1, storable1) storable2 = StorableObject(id=id2, data=tensor2) disk_store.__setitem__(id2, storable2) keys = disk_store.keys() assert any(id1 == k for k in keys) assert any(id2 == k for k in keys) assert len(keys) == 2
def test_delete(client, database, cleanup): disk_store = DiskObjectStore(database) _id = UID() storable = StorableObject(id=_id, data=tensor1) disk_store.__setitem__(_id, storable) bin_obj = database.session.query(BinObject).get(str(_id.value)) metadata = (database.session.query(ObjectMetadata).filter_by( obj=str(_id.value)).one()) assert bin_obj is not None assert th.all(th.eq(bin_obj.object, tensor1)) assert metadata is not None assert metadata.tags == [] assert metadata.description == "" assert metadata.read_permissions == {} assert metadata.search_permissions == {} disk_store.delete(_id) assert database.session.query(BinObject).get(str(_id.value)) is None with pytest.raises(NoResultFound) as e_info: database.session.query(ObjectMetadata).filter_by( obj=str(_id.value)).one()
def test_clear(client, database, cleanup): disk_store = DiskObjectStore(database) _id = UID() storable = StorableObject(id=_id, data=tensor1) disk_store.__setitem__(_id, storable) bin_obj = database.session.query(BinObject).get(str(_id.value)) metadata = (database.session.query(ObjectMetadata).filter_by( obj=str(_id.value)).one()) assert bin_obj is not None assert th.all(th.eq(bin_obj.object, tensor1)) assert metadata is not None assert metadata.tags == [] assert metadata.description == "" assert metadata.read_permissions == {} assert metadata.search_permissions == {} retrieved = disk_store.get_object(_id) assert th.all(th.eq(retrieved.data, tensor1)) assert retrieved.id == _id disk_store.clear() assert database.session.query(BinObject).count() == 0 assert database.session.query(ObjectMetadata).count() == 0
def create_dataset(df_json: dict) -> dict: _json = deepcopy(df_json) storage = DiskObjectStore(db) mapping = [] # Separate CSV from metadata for el in _json["tensors"].copy(): _id = UID() _json["tensors"][el]["id"] = str(_id.value) mapping.append((el, _id, _json["tensors"][el].pop("content", None))) # Ensure we have same ID in metadata and dataset df_id = UID() _json["id"] = str(df_id.value) # Create storables from UID/CSV. Update metadata storables = [] for idx, (name, _id, raw_file) in enumerate(mapping): _tensor = pd.read_csv(StringIO(raw_file)) _tensor = th.tensor(_tensor.values.astype(np.float32)) _json["tensors"][name]["shape"] = [int(x) for x in _tensor.size()] _json["tensors"][name]["dtype"] = "{}".format(_tensor.dtype) storage.__setitem__(_id, StorableObject(id=_id, data=_tensor)) # Ensure we have same ID in metadata and dataset db.session.add( DatasetGroup(bin_object=str(_id.value), dataset=str(df_id.value))) json_obj = JsonObject(id=_json["id"], binary=_json) metadata = get_metadata(db) metadata.length += 1 db.session.add(json_obj) db.session.commit() return _json
def generate_id_obj( data: th.Tensor, description: str, tags: List[str] ) -> Tuple[UID, StorableObject]: id = UID() obj = StorableObject(id=id, data=data, description=description, tags=tags) return id, obj
def test_get_objects_of_type(client, database, cleanup): disk_store = DiskObjectStore(database) a_id = UID() id1 = UID() id2 = UID() storable1 = StorableObject(id=id1, data=tensor1) disk_store.__setitem__(id1, storable1) storable2 = StorableObject(id=id2, data=tensor2) disk_store.__setitem__(id2, storable2) selected = disk_store.get_objects_of_type(th.Tensor) selected_data = [x.data for x in selected] assert any(th.all(th.eq(tensor1, d_)) for d_ in selected_data) assert any(th.all(th.eq(tensor2, d_)) for d_ in selected_data) assert len(selected_data) == 2
def test_values(client, database, cleanup): disk_store = DiskObjectStore(database) a_id = UID() id1 = UID() id2 = UID() storable1 = StorableObject(id=id1, data=tensor1) disk_store.__setitem__(id1, storable1) storable2 = StorableObject(id=id2, data=tensor2) disk_store.__setitem__(id2, storable2) values = disk_store.values() values_data = [v.data for v in values] assert any(th.all(th.eq(tensor1, v)) for v in values_data) assert any(th.all(th.eq(tensor2, v)) for v in values_data) assert len(values_data) == 2
def test_create_dataset_with_store_obj() -> None: id = UID() data = UID() description = "This is a dummy id" tags = ["dummy", "test"] obj1 = StorableObject(id=id, data=data, description=description, tags=tags) id = UID() data = th.Tensor([1, 2, 3, 4]) description = "This is a dummy tensor" tags = ["dummy", "test"] obj2 = StorableObject(id=id, data=data, description=description, tags=tags) id = UID() data = [obj1, obj2] description = "This is a dummy tensor" tags = ["dummy", "dataset"] Dataset(id=id, data=data, description=description, tags=tags)
def test_serde_storable_obj() -> None: id = UID() data = th.Tensor([1, 2, 3, 4]) description = "This is a dummy test" tags = ["dummy", "test"] obj = StorableObject(id=id, data=data, description=description, tags=tags) blob = sy.serialize(obj=obj) sy.deserialize(blob=blob)
def test_serde_storable_obj_2() -> None: id = UID() data = th.Tensor([1, 2, 3, 4]) description = "This is a dummy test" tags = ["dummy", "test"] obj = StorableObject(id=id, data=data, description=description, tags=tags) blob = serialize(obj) ds_obj = sy.deserialize(blob=blob) assert obj.id == ds_obj.id assert (obj.data == ds_obj.data).all() assert obj.description == ds_obj.description assert obj.tags == ds_obj.tags
def test_serde_storable_obj_2() -> None: id = UID() data = th.Tensor([-1, -2, -3, -4]) description = "This is a dummy id" tags = ["dummy", "test"] obj1 = StorableObject(id=id, data=data, description=description, tags=tags) id = UID() data = th.Tensor([1, 2, 3, 4]) description = "This is a dummy tensor n1" tags = ["dummy", "test"] obj2 = StorableObject(id=id, data=data, description=description, tags=tags) id = UID() data = th.Tensor([10, 20, 30, 40]) description = "This is a dummy tensor n2" tags = ["dummy", "test"] obj3 = StorableObject(id=id, data=data, description=description, tags=tags) id = UID() data = [obj1, obj2, obj3] description = "This is a dataset" tags = ["dummy", "dataset"] obj = Dataset(id=id, data=data, description=description, tags=tags) blob = serialize(obj) ds_obj = sy.deserialize(blob=blob) assert obj.id == ds_obj.id assert obj.description == ds_obj.description assert obj.tags == ds_obj.tags assert len(obj.data) == len(ds_obj.data) for i in range(len(obj.data)): assert obj.data[i].id == ds_obj.data[i].id assert th.all(th.eq(obj.data[i].data, ds_obj.data[i].data)) assert obj.data[i].description == ds_obj.data[i].description assert obj.data[i].tags == ds_obj.data[i].tags
def create_tensor_msg( msg: CreateTensorMessage, node: AbstractNode, ) -> CreateTensorResponse: try: payload = msg.content new_tensor = th.tensor(payload["tensor"]) new_tensor.tag(*payload.get("tags", [])) new_tensor.describe(payload.get("description", "")) id_at_location = UID() # Step 2: create message which contains object to send storable = StorableObject( id=id_at_location, data=new_tensor, tags=new_tensor.tags, description=new_tensor.description, search_permissions={VerifyAll(): None} if payload.get("searchable", False) else {}, ) obj_msg = SaveObjectAction(obj=storable, address=node.address) signed_message = obj_msg.sign( signing_key=SigningKey( payload["internal_key"].encode("utf-8"), encoder=HexEncoder ) ) node.recv_immediate_msg_without_reply(msg=signed_message) return CreateTensorResponse( address=msg.reply_to, status_code=200, content={ "msg": "Tensor created succesfully!", "tensor_id": str(id_at_location.value), }, ) except Exception as e: return CreateTensorResponse( address=msg.reply_to, status_code=200, content={"error": str(e)}, )
def test_save_object_action_serde() -> None: obj = th.tensor([1, 2, 3]) addr = Address(network=SpecificLocation(), device=SpecificLocation()) storable = StorableObject(id=UID(), data=obj) msg = SaveObjectAction(obj=storable, address=addr) blob = serialize(msg) msg2 = sy.deserialize(blob=blob) assert (msg2.obj.data == msg.obj.data).all() # Tensors do not automatically get IDs anymore # assert msg2.obj.id == msg.obj.id assert msg2.address == msg.address
def update_dataset(key: str, df_json: dict) -> dict: _json = deepcopy(df_json) storage = DiskObjectStore(db) json_obj = db.session.query(JsonObject).get(key) past_json = json_obj.binary past_ids = [x["id"] for x in past_json["tensors"].values()] mapping = [] # Separate CSV from metadata for el in _json["tensors"].copy(): if ( _json["tensors"][el].get("id", None) is not None and _json["tensors"][el].get("id", None) in past_ids ): _json["tensors"][el]["id"] = past_json["tensors"][el]["id"] else: _id = UID() _json["tensors"][el]["id"] = str(_id.value) mapping.append((el, _id, _json["tensors"][el].pop("content", None))) # Ensure we have same ID in metadata and dataset df_id = past_json["id"] _json["id"] = df_id # Clean existing storables in storage db.session.query(DatasetGroup).filter_by(dataset=df_id).delete( synchronize_session=False ) for key in past_ids: storage.delete(key) # Create storables from UID/CSV. Update metadata storables = [] for idx, (name, _id, raw_file) in enumerate(mapping): _tensor = pd.read_csv(StringIO(raw_file)) _tensor = th.tensor(_tensor.values.astype(np.float32)) _json["tensors"][name]["shape"] = [int(x) for x in _tensor.size()] _json["tensors"][name]["dtype"] = "{}".format(_tensor.dtype) storage.__setitem__(_id, StorableObject(id=_id, data=_tensor)) # Ensure we have same ID in metadata and dataset db.session.add(DatasetGroup(bin_object=str(_id.value), dataset=df_id)) setattr(json_obj, "binary", _json) db.session.commit() return _json
def __getitem__(self, key: UID) -> StorableObject: bin_obj = self.db.session.query(BinObject).filter_by( id=str(key.value)).first() obj_metadata = (self.db.session.query(ObjectMetadata).filter_by( obj=str(key.value)).first()) if not bin_obj or not obj_metadata: raise Exception("Object not found!") read_permissions = { VerifyKey(key.encode("utf-8"), encoder=HexEncoder): value for key, value in obj_metadata.read_permissions.items() } obj = StorableObject( id=UID.from_string(bin_obj.id), data=bin_obj.object, description=obj_metadata.description, tags=obj_metadata.tags, read_permissions=read_permissions, search_permissions=syft.lib.python.Dict({VERIFYALL: None}), ) return obj
def update_dataset(self, key: str, df_json: dict) -> dict: _json = deepcopy(df_json) json_obj = self.db.session.query(JsonObject).get(key) bin_obj = self.db.session.query(BinaryObject).get(key) mapping = [] # Separate CSV from metadata for el in _json["tensors"].copy(): _id = UID() _json["tensors"][el]["id"] = _id.value.hex mapping.append((el, _id, _json["tensors"][el].pop("content", None))) # Create storables from UID/CSV # Update metadata storables = [] for idx, (name, _id, raw_file) in enumerate(mapping): _tensor = pd.read_csv(StringIO(raw_file)) _tensor = th.tensor(_tensor.values.astype(np.float32)) _json["tensors"][name]["shape"] = [int(x) for x in _tensor.size()] _json["tensors"][name]["dtype"] = "{}".format(_tensor.dtype) storables.append(StorableObject(id=_id, data=_tensor)) # Ensure we have same ID in metadata and dataset _id = json_obj.id _id = UID.from_string(_id) df = Dataset(id=_id, data=storables) _json["id"] = _id.value.hex metadata = get_metadata(self.db) metadata.length += 1 setattr(bin_obj, "binary", serialize(df, to_bytes=True)) setattr(json_obj, "binary", _json) self.db.session.commit() return _json
def process_items(node, tar_obj, user_key): # Optional fields tags, manifest, description, skip_files = extract_metadata_info(tar_obj) dataset_db = Dataset(id=str(UID().value), manifest=manifest, description=description, tags=tags) db.session.add(dataset_db) data = list() for item in tar_obj.members: if not item.isdir() and (not item.name in skip_files): reader = csv.reader( tar_obj.extractfile(item.name).read().decode().split("\n"), delimiter=",", ) dataset = [] for row in reader: if len(row) != 0: dataset.append(row) dataset = np.array(dataset, dtype=np.float) df = th.tensor(dataset, dtype=th.float32) id_at_location = UID() # Step 2: create message which contains object to send storable = StorableObject( id=id_at_location, data=df, tags=tags + ["#" + item.name.split("/")[-1]], search_permissions={VERIFYALL: None}, ) obj_msg = SaveObjectAction(obj=storable, address=node.address) signed_message = obj_msg.sign(signing_key=SigningKey( user_key.encode("utf-8"), encoder=HexEncoder)) node.recv_immediate_msg_without_reply(msg=signed_message) obj_dataset_relation = BinObjDataset( name=item.name, dataset=dataset_db.id, obj=str(id_at_location.value), dtype=df.__class__.__name__, shape=str(tuple(df.shape)), ) db.session.add(obj_dataset_relation) data.append({ "name": obj_dataset_relation.name, "id": str(id_at_location.value), "tags": tags + ["#" + item.name.split("/")[-1]], "dtype": obj_dataset_relation.dtype, "shape": obj_dataset_relation.shape, }) db.session.commit() ds = model_to_json(dataset_db) ds["data"] = data return ds
def create_storable(_id: UID, data: Tensor, description: str, tags: Iterable[str]) -> StorableObject: obj = StorableObject(id=_id, data=data, description=description, tags=tags) return obj
def test_plan_serialization(client: sy.VirtualMachineClient) -> None: # cumbersome way to get a pointer as input for our actions, # there is probably a better/shorter way t = th.tensor([1, 2, 3]) tensor_pointer = t.send(client) # define actions a1 = GetObjectAction(id_at_location=UID(), address=Address(), reply_to=Address(), msg_id=UID()) a2 = RunFunctionOrConstructorAction( path="torch.Tensor.add", args=tuple(), kwargs={}, id_at_location=UID(), address=Address(), msg_id=UID(), ) a3 = RunClassMethodAction( path="torch.Tensor.add", _self=tensor_pointer, args=[], kwargs={}, id_at_location=UID(), address=Address(), msg_id=UID(), ) a4 = GarbageCollectObjectAction(id_at_location=UID(), address=Address()) a5 = EnumAttributeAction(path="", id_at_location=UID(), address=Address()) a6 = GetOrSetPropertyAction( path="", _self=tensor_pointer, id_at_location=UID(), address=Address(), args=[], kwargs={}, action=PropertyActions.GET, ) a7 = GetSetStaticAttributeAction( path="", id_at_location=UID(), address=Address(), action=StaticAttributeAction.GET, ) a8 = SaveObjectAction(obj=StorableObject(id=UID(), data=t), address=Address()) # define plan plan = Plan([a1, a2, a3, a4, a5, a6, a7, a8]) # serialize / deserialize blob = serialize(plan) plan_reconstructed = sy.deserialize(blob=blob) # test assert isinstance(plan_reconstructed, Plan) assert all(isinstance(a, Action) for a in plan_reconstructed.actions)
def test_create_storable_obj() -> None: id = UID() data = UID() description = "This is a dummy test" tags = ["dummy", "test"] StorableObject(id=id, data=data, description=description, tags=tags)