def test_get_vector(self): """ Test the `get_vector` function. """ vector_list = [1., 2., 3.] # vector is a list self.assertEqual(get_vector(vector_list), vector_list) # vector is a `torch.Tensor` or `numpy.ndarray` vector_mock = Mock() mock_tolist = Mock() mock_tolist.tolist.return_value = vector_list mock_squeeze = Mock(return_value = mock_tolist) vector_mock.squeeze = mock_squeeze self.assertEqual(get_vector(vector_mock), vector_list) # vector is a `tf.Tensor` vector_mock = Mock() mock_tolist = Mock() mock_squeeze = Mock() mock_tolist.tolist.return_value = vector_list mock_squeeze.squeeze.return_value = mock_tolist mock_numpy = Mock(return_value = mock_squeeze) vector_mock.numpy = mock_numpy vector_mock.squeeze = Mock(side_effect = AttributeError("TEST TensorFlow Tensor")) self.assertEqual(get_vector(vector_mock), vector_list) # invalid call type_error_message = ("The type of the 'vector' argument is not supported!\n" "Supported types are `list`, 'numpy.ndarray`, `torch.Tensor` " "and `tf.Tensor`") with self.assertRaises(TypeError) as error: get_vector('[1., 2., 3.]') check_error_message(self, error, type_error_message)
def __init__(self, content: dict): """ Initialize a NearVector class instance. Parameters ---------- content : list The content of the `nearVector` clause. Raises ------ TypeError If 'content' is not of type dict. KeyError If 'content' does not contain "vector". TypeError If 'content["vector"]' is not of type list. AttributeError If invalid 'content' keys are provided. ValueError If 'content' has key "certainty" but the value is not float. """ super().__init__(content) if "vector" not in self._content: raise KeyError("No 'vector' key in `content` argument.") # Check optional fields if "certainty" in self._content: _check_type(var_name='certainty', value=self._content["certainty"], dtype=float) self._content['vector'] = get_vector(self._content['vector'])
def add(self, data_object: dict, class_name: str, uuid: str = None, vector: Sequence = None) -> None: """ Add one object to this batch. Does NOT validate the consistency of the object against the client's schema. Checks the arguments' type and UUIDs' format. Parameters ---------- class_name : str The name of the class this object belongs to. data_object : dict Object to be added as a dict datatype. uuid : str, optional UUID of the object as a string, by default None vector: Sequence, optional The embedding of the object that should be created. Used only class objects that do not have a vectorization module. Supported types are `list`, 'numpy.ndarray`, `torch.Tensor` and `tf.Tensor`, by default None. Raises ------ TypeError If an argument passed is not of an appropriate type. ValueError If 'uuid' is not of a propper form. """ if not isinstance(data_object, dict): raise TypeError("Object must be of type dict") if not isinstance(class_name, str): raise TypeError("Class name must be of type str") batch_item = { "class": class_name, "properties": copy.deepcopy(data_object) } if uuid is not None: batch_item["id"] = get_valid_uuid(uuid) if vector is not None: batch_item["vector"] = get_vector(vector) self._items.append(batch_item)
def validate(self, data_object: Union[dict, str], class_name: str, uuid: Union[str, uuid_lib.UUID, None]=None, vector: Sequence=None ) -> dict: """ Validate an object against weaviate. Parameters ---------- data_object : dict or str Object to be validated. If type is str it should be either an URL or a file. class_name : str Name of the class of the object that should be validated. uuid : str, uuid.UUID or None, optional The UUID of the object that should be validated against weaviate. by default None. vector: Sequence, optional The embedding of the object that should be validated. Used only class objects that do not have a vectorization module. Supported types are `list`, 'numpy.ndarray`, `torch.Tensor` and `tf.Tensor`, by default None. Examples -------- Assume we have a Author class only 'name' property, NO 'age'. >>> client1.data_object.validate( ... data_object = {'name': 'H. Lovecraft'}, ... class_name = 'Author' ... ) {'error': None, 'valid': True} >>> client1.data_object.validate( ... data_object = {'name': 'H. Lovecraft', 'age': 46}, ... class_name = 'Author' ... ) { "error": [ { "message": "invalid object: no such prop with name 'age' found in class 'Author' in the schema. Check your schema files for which properties in this class are available" } ], "valid": false } Returns ------- dict Validation result. E.g. {"valid": bool, "error": None or list} Raises ------ TypeError If argument is of wrong type. ValueError If argument contains an invalid value. weaviate.UnexpectedStatusCodeException If validating the object against Weaviate failed with a different reason. requests.ConnectionError If the network connection to weaviate fails. """ loaded_data_object = _get_dict_from_object(data_object) if not isinstance(class_name, str): raise TypeError(f"Expected class_name of type `str` but was: {type(class_name)}") weaviate_obj = { "class": _capitalize_first_letter(class_name), "properties": loaded_data_object } if uuid is not None: weaviate_obj['id'] = get_valid_uuid(uuid) if vector is not None: weaviate_obj['vector'] = get_vector(vector) path = "/objects/validate" try: response = self._connection.post( path=path, weaviate_object=weaviate_obj ) except RequestsConnectionError as conn_err: raise RequestsConnectionError('Object was not validated against weaviate.')\ from conn_err result: dict = { "error": None } if response.status_code == 200: result["valid"] = True return result if response.status_code == 422: result["valid"] = False result["error"] = response.json()["error"] return result raise UnexpectedStatusCodeException("Validate object", response)
def create(self, data_object: Union[dict, str], class_name: str, uuid: Union[str, uuid_lib.UUID, None]=None, vector: Sequence=None ) -> str: """ Takes a dict describing the object and adds it to weaviate. Parameters ---------- data_object : dict or str Object to be added. If type is str it should be either an URL or a file. class_name : str Class name associated with the object given. uuid : str, uuid.UUID or None, optional Object will be created under this uuid if it is provided. Otherwise weaviate will generate a uuid for this object, by default None. vector: Sequence, optional The embedding of the object that should be created. Used only class objects that do not have a vectorization module. Supported types are `list`, 'numpy.ndarray`, `torch.Tensor` and `tf.Tensor`, by default None. Examples -------- Schema contains a class Author with only 'name' and 'age' primitive property. >>> client.data_object.create( ... data_object = {'name': 'Neil Gaiman', 'age': 60}, ... class_name = 'Author', ... ) '46091506-e3a0-41a4-9597-10e3064d8e2d' >>> client.data_object.create( ... data_object = {'name': 'Andrzej Sapkowski', 'age': 72}, ... class_name = 'Author', ... uuid = 'e067f671-1202-42c6-848b-ff4d1eb804ab' ... ) 'e067f671-1202-42c6-848b-ff4d1eb804ab' Returns ------- str Returns the UUID of the created object if successful. Raises ------ TypeError If argument is of wrong type. ValueError If argument contains an invalid value. weaviate.ObjectAlreadyExistsException If an object with the given uuid already exists within weaviate. weaviate.UnexpectedStatusCodeException If creating the object in Weaviate failed for a different reason, more information is given in the exception. requests.ConnectionError If the network connection to weaviate fails. """ if not isinstance(class_name, str): raise TypeError("Expected class_name of type str but was: "\ + str(type(class_name))) loaded_data_object = _get_dict_from_object(data_object) weaviate_obj = { "class": _capitalize_first_letter(class_name), "properties": loaded_data_object } if uuid is not None: weaviate_obj["id"] = get_valid_uuid(uuid) if vector is not None: weaviate_obj["vector"] = get_vector(vector) path = "/objects" try: response = self._connection.post( path=path, weaviate_object=weaviate_obj ) except RequestsConnectionError as conn_err: raise RequestsConnectionError('Object was not added to Weaviate.') from conn_err if response.status_code == 200: return str(response.json()["id"]) object_does_already_exist = False try: if 'already exists' in response.json()['error'][0]['message']: object_does_already_exist = True except KeyError: pass if object_does_already_exist: raise ObjectAlreadyExistsException(str(uuid)) raise UnexpectedStatusCodeException("Creating object", response)
def replace(self, data_object: Union[dict, str], class_name: str, uuid: Union[str, uuid_lib.UUID], vector: Sequence=None ) -> None: """ Replace an already existing object with the given data object. This method replaces the whole object. Parameters ---------- data_object : dict or str Describes the new values. It may be an URL or path to a json or a python dict describing the new values. class_name : str Name of the class of the object that should be updated. uuid : str or uuid.UUID The UUID of the object that should be changed. vector: Sequence, optional The embedding of the object that should be replaced. Used only class objects that do not have a vectorization module. Supported types are `list`, 'numpy.ndarray`, `torch.Tensor` and `tf.Tensor`, by default None. Examples -------- >>> author_id = client.data_object.create( ... data_object = {'name': 'H. Lovecraft', 'age': 46}, ... class_name = 'Author' ... ) >>> client.data_object.get(author_id) { "additional": {}, "class": "Author", "creationTimeUnix": 1617112817487, "id": "d842a0f4-ad8c-40eb-80b4-bfefc7b1b530", "lastUpdateTimeUnix": 1617112817487, "properties": { "age": 46, "name": "H. Lovecraft" }, "vectorWeights": null } >>> client.data_object.replace( ... data_object = {'name': 'H.P. Lovecraft'}, ... class_name = 'Author', ... uuid = author_id ... ) >>> client.data_object.get(author_id) { "additional": {}, "class": "Author", "id": "d842a0f4-ad8c-40eb-80b4-bfefc7b1b530", "lastUpdateTimeUnix": 1617112838668, "properties": { "name": "H.P. Lovecraft" }, "vectorWeights": null } Raises ------ TypeError If argument is of wrong type. ValueError If argument contains an invalid value. requests.ConnectionError If the network connection to weaviate fails. weaviate.UnexpectedStatusCodeException If weaviate reports a none OK status. """ parsed_object = _get_dict_from_object(data_object) uuid = get_valid_uuid(uuid) weaviate_obj = { "id": uuid, "class": _capitalize_first_letter(class_name), "properties": parsed_object } if vector is not None: weaviate_obj['vector'] = get_vector(vector) path = f"/objects/{uuid}" try: response = self._connection.put( path=path, weaviate_object=weaviate_obj ) except RequestsConnectionError as conn_err: raise RequestsConnectionError('Object was not replaced.') from conn_err if response.status_code == 200: # Successful update return raise UnexpectedStatusCodeException("Replace object", response)
def update(self, data_object: Union[dict, str], class_name: str, uuid: Union[str, uuid_lib.UUID], vector: Sequence=None ) -> None: """ Update the given object with the already existing object in weaviate. Overwrites only the specified fields, the unspecified ones remain unchanged. Parameters ---------- data_object : dict or str The object states the fields that should be updated. Fields not specified by in the 'data_object' remain unchanged. Fields that are None will not be changed. If type is str it should be either an URL or a file. class_name : str The class name of the object. uuid : str or uuid.UUID The ID of the object that should be changed. vector: Sequence, optional The embedding of the object that should be updated. Used only class objects that do not have a vectorization module. Supported types are `list`, 'numpy.ndarray`, `torch.Tensor` and `tf.Tensor`, by default None. Examples -------- >>> author_id = client.data_object.create( ... data_object = {'name': 'Philip Pullman', 'age': 64}, ... class_name = 'Author' ... ) >>> client.data_object.get(author_id) { "additional": {}, "class": "Author", "creationTimeUnix": 1617111215172, "id": "bec2bca7-264f-452a-a5bb-427eb4add068", "lastUpdateTimeUnix": 1617111215172, "properties": { "age": 64, "name": "Philip Pullman" }, "vectorWeights": null } >>> client.data_object.update( ... data_object = {'age': 74}, ... class_name = 'Author', ... uuid = author_id ... ) >>> client.data_object.get(author_id) { "additional": {}, "class": "Author", "creationTimeUnix": 1617111215172, "id": "bec2bca7-264f-452a-a5bb-427eb4add068", "lastUpdateTimeUnix": 1617111215172, "properties": { "age": 74, "name": "Philip Pullman" }, "vectorWeights": null } Raises ------ TypeError If argument is of wrong type. ValueError If argument contains an invalid value. requests.ConnectionError If the network connection to weaviate fails. weaviate.UnexpectedStatusCodeException If weaviate reports a none successful status. """ if not isinstance(class_name, str): raise TypeError("Class must be type str") uuid = get_valid_uuid(uuid) object_dict = _get_dict_from_object(data_object) weaviate_obj = { "id": uuid, "class": _capitalize_first_letter(class_name), "properties": object_dict } if vector is not None: weaviate_obj['vector'] = get_vector(vector) path = f"/objects/{uuid}" try: response = self._connection.patch( path=path, weaviate_object=weaviate_obj ) except RequestsConnectionError as conn_err: raise RequestsConnectionError('Object was not updated.') from conn_err if response.status_code == 204: # Successful merge return raise UnexpectedStatusCodeException("Update of the object not successful", response)