def create_dataset(df_json: dict) -> dict: _json = deepcopy(df_json) storage = DiskObjectStore(db) mapping = [] # Separate CSV from metadata for el in _json["tensors"].copy(): _id = UID() _json["tensors"][el]["id"] = str(_id.value) mapping.append((el, _id, _json["tensors"][el].pop("content", None))) # Ensure we have same ID in metadata and dataset df_id = UID() _json["id"] = str(df_id.value) # Create storables from UID/CSV. Update metadata storables = [] for idx, (name, _id, raw_file) in enumerate(mapping): _tensor = pd.read_csv(StringIO(raw_file)) _tensor = th.tensor(_tensor.values.astype(np.float32)) _json["tensors"][name]["shape"] = [int(x) for x in _tensor.size()] _json["tensors"][name]["dtype"] = "{}".format(_tensor.dtype) storage.__setitem__(_id, StorableObject(id=_id, data=_tensor)) # Ensure we have same ID in metadata and dataset db.session.add( DatasetGroup(bin_object=str(_id.value), dataset=str(df_id.value))) json_obj = JsonObject(id=_json["id"], binary=_json) metadata = get_metadata(db) metadata.length += 1 db.session.add(json_obj) db.session.commit() return _json
def test_run_function_or_constructor_action_serde() -> None: alice = sy.VirtualMachine(name="alice") alice_client = alice.get_client() args = ( th.tensor([1, 2, 3]).send(alice_client), th.tensor([4, 5, 5]).send(alice_client), ) msg = RunFunctionOrConstructorAction( path="torch.Tensor.add", args=args, kwargs={}, id_at_location=UID(), address=alice_client.address, msg_id=UID(), ) blob = msg.serialize() msg2 = sy.deserialize(blob=blob) assert msg2.path == msg.path # FIXME this cannot be checked before we fix the Pointer serde problem (see _proto2object in Pointer) # assert msg2.args == msg.args assert msg2.kwargs == msg.kwargs assert msg2.address == msg.address assert msg2.id == msg.id assert msg2.id_at_location == msg.id_at_location
def _construct_address() -> Address: """Helper method to construct an Address""" return Address( network=SpecificLocation(id=UID()), domain=SpecificLocation(id=UID()), device=SpecificLocation(id=UID()), vm=SpecificLocation(id=UID()), )
def test_get_object_action_serde() -> None: msg = GetObjectAction(id_at_location=UID(), address=Address(), reply_to=Address(), msg_id=UID()) blob = serialize(msg) msg2 = sy.deserialize(blob=blob) assert msg.id == msg2.id assert msg.id_at_location == msg2.id_at_location assert msg.address == msg2.address assert msg.reply_to == msg2.reply_to
def test_vm_and_vm_id_property_methods() -> None: """Unit test for Address.vm and Address.vm_id methods""" # Test getter vm = SpecificLocation(id=UID()) address_with_vm = Address( network=SpecificLocation(id=UID()), domain=SpecificLocation(id=UID()), device=SpecificLocation(id=UID()), vm=vm, ) # Test device getter assert address_with_vm.vm == vm # Test device setter an_id = UID(value=uuid.UUID(int=333779996850170035686993356951732753684)) new_vm = SpecificLocation(id=an_id) address_with_vm.vm = new_vm assert address_with_vm.vm == new_vm # Test domain_id getter address_without_vm = Address( network=SpecificLocation(id=UID()), domain=SpecificLocation(id=UID()), device=SpecificLocation(id=UID()), ) assert address_with_vm.vm_id == an_id assert address_without_vm.vm_id is None
def test_domain_and_domain_id_property_methods() -> None: """Unit test for Address.domain and Address.domain_id methods""" # Test getter domain = SpecificLocation(id=UID()) address_with_domain = Address( network=SpecificLocation(id=UID()), domain=domain, device=SpecificLocation(id=UID()), vm=SpecificLocation(id=UID()), ) # Test domain getter assert address_with_domain.domain == domain # Test domain setter an_id = UID(value=uuid.UUID(int=333779996850170035686993356951732753684)) new_domain = SpecificLocation(id=an_id) address_with_domain.domain = new_domain assert address_with_domain.domain == new_domain # Test domain_id getter address_without_domain = Address( network=SpecificLocation(id=UID()), device=SpecificLocation(id=UID()), vm=SpecificLocation(id=UID()), ) assert address_with_domain.domain_id == an_id assert address_without_domain.domain_id is None
def test_keys(client, database, cleanup): disk_store = DiskObjectStore(database) a_id = UID() id1 = UID() id2 = UID() storable1 = StorableObject(id=id1, data=tensor1) disk_store.__setitem__(id1, storable1) storable2 = StorableObject(id=id2, data=tensor2) disk_store.__setitem__(id2, storable2) keys = disk_store.keys() assert any(id1 == k for k in keys) assert any(id2 == k for k in keys) assert len(keys) == 2
def test_address_property_method() -> None: """Unit tests for Address.address property method""" address = Address( network=SpecificLocation(id=UID()), domain=SpecificLocation(id=UID()), device=SpecificLocation(id=UID()), vm=SpecificLocation(id=UID()), ) returned_address = address.address assert isinstance(returned_address, Address) assert returned_address.network == address.network assert returned_address.domain == address.domain assert returned_address.device == address.device assert returned_address.vm == address.vm
def test_delete(client, database, cleanup): disk_store = DiskObjectStore(database) _id = UID() storable = StorableObject(id=_id, data=tensor1) disk_store.__setitem__(_id, storable) bin_obj = database.session.query(BinObject).get(str(_id.value)) metadata = (database.session.query(ObjectMetadata).filter_by( obj=str(_id.value)).one()) assert bin_obj is not None assert th.all(th.eq(bin_obj.object, tensor1)) assert metadata is not None assert metadata.tags == [] assert metadata.description == "" assert metadata.read_permissions == {} assert metadata.search_permissions == {} disk_store.delete(_id) assert database.session.query(BinObject).get(str(_id.value)) is None with pytest.raises(NoResultFound) as e_info: database.session.query(ObjectMetadata).filter_by( obj=str(_id.value)).one()
def test_clear(client, database, cleanup): disk_store = DiskObjectStore(database) _id = UID() storable = StorableObject(id=_id, data=tensor1) disk_store.__setitem__(_id, storable) bin_obj = database.session.query(BinObject).get(str(_id.value)) metadata = (database.session.query(ObjectMetadata).filter_by( obj=str(_id.value)).one()) assert bin_obj is not None assert th.all(th.eq(bin_obj.object, tensor1)) assert metadata is not None assert metadata.tags == [] assert metadata.description == "" assert metadata.read_permissions == {} assert metadata.search_permissions == {} retrieved = disk_store.get_object(_id) assert th.all(th.eq(retrieved.data, tensor1)) assert retrieved.id == _id disk_store.clear() assert database.session.query(BinObject).count() == 0 assert database.session.query(ObjectMetadata).count() == 0
def test_proto_serialization() -> None: """Tests that default Address serialization works as expected - to Protobuf""" uid = UID(value=uuid.UUID(int=333779996850170035686993356951732753684)) loc = SpecificLocation(id=uid, name="Test Location") obj = Address( name="Test Address", network=loc, domain=loc, device=loc, vm=loc, ) blob = Address.get_protobuf_schema()( name="Test Address", has_network=True, has_domain=True, has_device=True, has_vm=True, network=sy.serialize(loc), domain=sy.serialize(loc), device=sy.serialize(loc), vm=sy.serialize(loc), ) assert sy.serialize(obj, to_proto=True) == blob assert sy.serialize(obj, to_proto=True) == blob assert sy.serialize(obj, to_proto=True) == blob
def test_proto_deserialization() -> None: """Tests that default Address deserialization works as expected - from Protobuf""" uid = UID(value=uuid.UUID(int=333779996850170035686993356951732753684)) loc = SpecificLocation(id=uid, name="Test Location") obj = Address( network=loc, domain=loc, device=loc, vm=loc, ) blob = Address.get_protobuf_schema()( has_network=True, has_domain=True, has_device=True, has_vm=True, network=loc.serialize(), domain=loc.serialize(), device=loc.serialize(), vm=loc.serialize(), ) obj2 = sy.deserialize(blob=blob, from_proto=True) assert obj == obj2
def create_request( self, user_id, user_name, object_id, reason, request_type, verify_key=None, tags=[], object_type="", ): date = datetime.now() return self.register( id=str(UID().value), user_id=user_id, user_name=user_name, object_id=object_id, date=date, reason=reason, request_type=request_type, verify_key=verify_key, tags=tags, object_type=object_type, )
def test_dict_creation() -> None: d1 = {String("t1"): 1, String("t2"): 2} dict1 = Dict(d1) assert type(getattr(dict1, "id", None)) is UID d2 = dict({"t1": 1, "t2": 2}) dict2 = Dict(d2) dict2._id = UID() assert type(getattr(dict2, "id", None)) is UID d3 = UserDict({"t1": 1, "t2": 2}) dict3 = Dict(**d3) assert type(getattr(dict3, "id", None)) is UID assert dict1.keys() == dict2.keys() assert dict1.keys() == dict3.keys() # ValuesView uses object.__eq__ # https://stackoverflow.com/questions/34312674/why-are-the-values-of-an-ordereddict-not-equal assert dict1.values() != dict2.values() assert dict1.values() != dict3.values() assert dict1.items() == dict2.items() assert dict1.items() == dict3.items() it = list(iter(dict2.values())) assert len(it) == 2 assert type(it) is list
def test_values(client, database, cleanup): disk_store = DiskObjectStore(database) a_id = UID() id1 = UID() id2 = UID() storable1 = StorableObject(id=id1, data=tensor1) disk_store.__setitem__(id1, storable1) storable2 = StorableObject(id=id2, data=tensor2) disk_store.__setitem__(id2, storable2) values = disk_store.values() values_data = [v.data for v in values] assert any(th.all(th.eq(tensor1, v)) for v in values_data) assert any(th.all(th.eq(tensor2, v)) for v in values_data) assert len(values_data) == 2
def test_get_objects_of_type(client, database, cleanup): disk_store = DiskObjectStore(database) a_id = UID() id1 = UID() id2 = UID() storable1 = StorableObject(id=id1, data=tensor1) disk_store.__setitem__(id1, storable1) storable2 = StorableObject(id=id2, data=tensor2) disk_store.__setitem__(id2, storable2) selected = disk_store.get_objects_of_type(th.Tensor) selected_data = [x.data for x in selected] assert any(th.all(th.eq(tensor1, d_)) for d_ in selected_data) assert any(th.all(th.eq(tensor2, d_)) for d_ in selected_data) assert len(selected_data) == 2
def test_to_string() -> None: """Tests that SpecificLocation generates an intuitive string.""" uid = UID(value=uuid.UUID(int=333779996850170035686993356951732753684)) obj = SpecificLocation(id=uid) assert str(obj) == "<SpecificLocation: fb1bb0675bb74c49becee700ab0a1514>" assert obj.__repr__( ) == "<SpecificLocation: fb1bb0675bb74c49becee700ab0a1514>"
def test_specific_location_init_with_specific_id() -> None: """Test that SpecificLocation will use the ID you pass into the constructor""" uid = UID(value=uuid.UUID(int=333779996850170035686993356951732753684)) loc = SpecificLocation(id=uid) assert loc.id == uid
def test_default_serialization() -> None: """Tests that default SpecificLocation serialization works as expected - to Protobuf""" uid = UID(value=uuid.UUID(int=333779996850170035686993356951732753684)) obj = SpecificLocation(id=uid, name="Test") blob = obj.to_proto() assert obj.serialize() == blob
def test_target_id_property_method_with_a_return() -> None: """Unit test for Address.target_id method""" network = SpecificLocation(id=UID()) domain = SpecificLocation(id=UID()) device = SpecificLocation(id=UID()) vm = SpecificLocation(id=UID()) address = Address( network=network, domain=domain, device=device, vm=vm, ) assert address.target_id == vm address.vm = None assert address.target_id == device address.device = None assert address.target_id == domain address.domain = None assert address.target_id == network
def test_default_deserialization() -> None: """Tests that default SpecificLocation deserialization works as expected - from Protobuf""" uid = UID(value=uuid.UUID(int=333779996850170035686993356951732753684)) obj = SpecificLocation(id=uid, name="Test") blob = SpecificLocation.get_protobuf_schema()(id=uid.serialize()) obj2 = sy.deserialize(blob=blob) assert obj == obj2
def test_plan_execution(client: sy.VirtualMachineClient) -> None: tensor_pointer1 = th.tensor([1, 2, 3]).send(client) tensor_pointer2 = th.tensor([4, 5, 6]).send(client) tensor_pointer3 = th.tensor([7, 8, 9]).send(client) result_tensor_pointer1 = th.tensor([0, 0, 0]).send(client) result_tensor_pointer2 = th.tensor([0, 0, 0]).send(client) result1_uid = result_tensor_pointer1.id_at_location result2_uid = result_tensor_pointer2.id_at_location a1 = RunClassMethodAction( path="torch.Tensor.add", _self=tensor_pointer1, args=[tensor_pointer2], kwargs={}, id_at_location=result1_uid, address=Address(), msg_id=UID(), ) a2 = RunClassMethodAction( path="torch.Tensor.add", _self=result_tensor_pointer1, args=[tensor_pointer3], kwargs={}, id_at_location=result2_uid, address=Address(), msg_id=UID(), ) plan = Plan([a1, a2]) plan_pointer = plan.send(client) plan_pointer() expected_tensor1 = th.tensor([5, 7, 9]) expected_tensor2 = th.tensor([12, 15, 18]) assert all(expected_tensor1 == result_tensor_pointer1.get()) assert all(expected_tensor2 == result_tensor_pointer2.get())
def _gen_address_kwargs() -> list: """ Helper method to generate pre-ordered arguments for initializing an Address instance. There are at least 3 arguments, all taken from 'vm', 'device', 'domain', 'network'. """ # the order matches the _gen_icons below all_combos = [] for combination in combinations(ARGUMENTS, 3): all_combos.append(list(combination)) all_combos.append(ARGUMENTS) return [{key: SpecificLocation(id=UID()) for key in combo} for combo in all_combos]
def test_target_emoji_method() -> None: """Unit test for Address.target_emoji method""" an_id = UID(value=uuid.UUID(int=333779996850170035686993356951732753684)) address = Address( network=SpecificLocation(id=an_id), domain=SpecificLocation(id=an_id), device=SpecificLocation(id=an_id), vm=SpecificLocation(id=an_id), ) assert address.target_emoji() == "@<UID:🙍🛖>"
def test_proto_serialization() -> None: """Tests that default SpecificLocation serialization works as expected - to Protobuf""" uid = UID(value=uuid.UUID(int=333779996850170035686993356951732753684)) obj = SpecificLocation(id=uid, name="Test") blob = SpecificLocation.get_protobuf_schema()(id=sy.serialize(uid), name="Test") assert sy.serialize(obj, to_proto=True) == blob assert sy.serialize(obj, to_proto=True) == blob assert sy.serialize(obj, to_proto=True) == blob
def test_garbage_collection_object_action_serde() -> None: uid = UID() addr = Address(network=SpecificLocation(), device=SpecificLocation()) msg = GarbageCollectObjectAction(id_at_location=uid, address=addr) blob = serialize(msg) msg2 = sy.deserialize(blob=blob) assert msg2.id_at_location == msg.id_at_location assert msg2.address == msg.address
def test_binary_deserialization() -> None: """Test that binary SpecificLocation deserialization works as expected""" blob = ( b"\n/syft.core.io.location.specific.SpecificLocation\x12\x1a\n\x12\n\x10" + b"\xfb\x1b\xb0g[\xb7LI\xbe\xce\xe7\x00\xab\n\x15\x14\x12\x04Test") obj = sy.deserialize(blob=blob, from_bytes=True) assert obj == SpecificLocation( id=UID(value=uuid.UUID(int=333779996850170035686993356951732753684)), name="Test", )
def test_binary_serialization() -> None: """Tests that binary SpecificLocation serializes as expected""" uid = UID(value=uuid.UUID(int=333779996850170035686993356951732753684)) obj = SpecificLocation(id=uid, name="Test") blob = ( b"\n/syft.core.io.location.specific.SpecificLocation\x12\x1a\n\x12\n\x10" + b"\xfb\x1b\xb0g[\xb7LI\xbe\xce\xe7\x00\xab\n\x15\x14\x12\x04Test") assert obj.binary() == blob assert obj.to_bytes() == blob assert obj.serialize(to_bytes=True) == blob
def store_json(self, df_json: dict) -> dict: _json = deepcopy(df_json) mapping = [] # Separate CSV from metadata for el in _json["tensors"].copy(): _id = UID() _json["tensors"][el]["id"] = _id.value.hex mapping.append((el, _id, _json["tensors"][el].pop("content", None))) # Create storables from UID/CSV # Update metadata storables = [] for idx, (name, _id, raw_file) in enumerate(mapping): _tensor = pd.read_csv(StringIO(raw_file)) _tensor = th.tensor(_tensor.values.astype(np.float32)) _json["tensors"][name]["shape"] = [int(x) for x in _tensor.size()] _json["tensors"][name]["dtype"] = "{}".format(_tensor.dtype) storables.append(StorableObject(id=_id, data=_tensor)) # Ensure we have same ID in metadata and dataset _id = UID() df = Dataset(id=_id, data=storables) _json["id"] = _id.value.hex bin_obj = BinaryObject(id=df.id.value.hex, binary=serialize(df, to_bytes=True)) json_obj = JsonObject(id=_json["id"], binary=_json) metadata = get_metadata(self.db) metadata.length += 1 self.db.session.add(bin_obj) self.db.session.add(json_obj) self.db.session.commit() return _json
def store_bytes(self, obj: bytes) -> str: _id = UID() bin_obj = BinaryObject(id=_id.value.hex, binary=obj) dataset = deserialize(blob=obj, from_bytes=True) json_obj = dataset_to_dict(dataset) json_obj = JsonObject(id=_id.value.hex, binary=json_obj) metadata = get_metadata(self.db) metadata.length += 1 self.db.session.add(bin_obj) self.db.session.add(json_obj) self.db.session.commit() return _id.value.hex