def model(data, prediction): dataset_meta = DatasetAnalyzer.analyze(data) output_meta = DatasetAnalyzer.analyze(prediction) return Model('test model', SklearnModelWrapper(), input_meta=dataset_meta, output_meta=output_meta, requirements=Requirements([]))
def create(cls, model_object, input_data, model_name: str = None, additional_artifacts: ArtifactCollection = None, additional_requirements: AnyRequirements = None, custom_wrapper: ModelWrapper = None, custom_artifact: ArtifactCollection = None, custom_input_meta: DatasetType = None, custom_output_meta: DatasetType = None, custom_prediction=None, custom_requirements: AnyRequirements = None) -> 'Model': """ Creates Model instance from arbitrary model objects and sample of input data :param model_object: The model object to analyze. :param input_data: The image to run. :param model_name: The model name. :param additional_artifacts: Additional artifact. :param additional_requirements: Additional requirements. :param custom_wrapper: Custom model wrapper. :param custom_artifact: Custom artifact collection to replace all other. :param custom_input_meta: Custom input DatasetType. :param custom_output_meta: Custom output DatasetType. :param custom_prediction: Custom prediction output. :param custom_requirements: Custom requirements to replace all other. :returns: :py:class:`Model` """ wrapper: ModelWrapper = custom_wrapper or ModelAnalyzer.analyze( model_object) name = model_name or _generate_model_name(wrapper) artifact = custom_artifact or WrapperArtifactCollection(wrapper) if additional_artifacts is not None: artifact += additional_artifacts input_meta = custom_input_meta or DatasetAnalyzer.analyze(input_data) prediction = custom_prediction or wrapper.predict(input_data) output_meta = custom_output_meta or DatasetAnalyzer.analyze(prediction) if custom_requirements is not None: requirements = resolve_requirements(custom_requirements) else: requirements = get_object_requirements(model_object) requirements += get_object_requirements(input_data) requirements += get_object_requirements(prediction) if additional_requirements is not None: requirements += additional_requirements model = Model(name, wrapper, None, input_meta, output_meta, requirements) model._unpersisted_artifacts = artifact return model
def _prepare_methods_and_requirements(self, input_data): requirements = Requirements() requirements += self._model_requirements() arg_type = DatasetAnalyzer.analyze(input_data) requirements += arg_type.requirements methods = {} for exposed, wrapped in self._exposed_methods_mapping().items(): output_data = self._call_method(wrapped, input_data) out_type = DatasetAnalyzer.analyze(output_data) methods[exposed] = (wrapped, arg_type, out_type) requirements += out_type.requirements return methods, requirements
def test_number(): ndt = DatasetAnalyzer.analyze(np.float32(.5)) assert issubclass(ndt, NumpyNumberDatasetType) assert ndt.requirements.modules == ['numpy'] payload = dumps(ndt) ndt2 = loads(payload, DatasetType) assert ndt == ndt2
def test_ndarray(): nat = DatasetAnalyzer.analyze(np.array([1, 2, 3])) assert issubclass(nat, NumpyNdarrayDatasetType) payload = dumps(nat) nat2 = loads(payload, DatasetType) assert nat == nat2
def test_bytes_type(): b = b'hello' bdt = DatasetAnalyzer.analyze(b) assert bdt.serialize(b) == b assert bdt.deserialize(b) == b assert bdt.requirements.modules == []
def test_feed_dict_type__serialization(tensor): obj = {tensor: np.array([[1]])} fdt = DatasetAnalyzer.analyze(obj) payload = dumps(obj, fdt) obj2 = loads(payload, fdt) assert obj[tensor] == obj2[tensor.name]
def fdt(tensor): # force loading of dataset hooks import ebonite.ext.tensorflow # noqa return DatasetAnalyzer.analyze({ tensor: np.array([[1]]), 'a': np.array([[1]]) })
def test_feed_dict_type__serialization(): tensor = tf.placeholder('float', (1, 1), name="weight") obj = {tensor: np.array([[1]])} fdt = DatasetAnalyzer.analyze(obj) payload = dumps(obj, fdt) obj2 = loads(payload, fdt) assert obj[tensor] == obj2[tensor.name]
def test_feed_dict_type__self_serialization(): tensor = tf.placeholder('float', (1, 1), name="weight") fdt = DatasetAnalyzer.analyze({ tensor: np.array([[1]]), 'a': np.array([[1]]) }) assert issubclass(fdt, FeedDictDatasetType) payload = dumps(fdt) fdt2 = loads(payload, DatasetType) assert fdt == fdt2
def test_all(df): df_type = DatasetAnalyzer.analyze(df) obj = serialize(df, df_type) payload = json.dumps(obj) loaded = json.loads(payload) data = deserialize(loaded, df_type) assert df is not data pandas_assert(data, df)
def test_all(data2): df_type = DatasetAnalyzer.analyze(data2) obj = serialize(data2, df_type) payload = json.dumps(obj) loaded = json.loads(payload) data = deserialize(loaded, df_type) assert data2.equals(data) assert data2 is not data
def from_feed_dict(cls, feed_dict): """ Factory method to create :class:`FeedDictDatasetType` from feed dict :param feed_dict: feed dict :return: :class:`FeedDictDatasetType` instance """ types = {} for k, v in feed_dict.items(): types[cls.get_key(k)] = DatasetAnalyzer.analyze(v) return FeedDictDatasetType(types)
def test_torch__tensors_list(first_tensor, second_tensor): # this import ensures that this dataset type is registered in `DatasetAnalyzer` from ebonite.ext.torch.dataset import TorchTensorDatasetType # noqa tensor_list = [first_tensor, second_tensor] tdt = DatasetAnalyzer.analyze(tensor_list) assert len(tdt.items) == 2 assert tdt.items[0].shape == (5, 5) assert tdt.items[0].dtype == 'int32' assert tdt.items[0].list_size() == 5 assert tdt.items[1].shape == (5, 10) assert tdt.items[1].dtype == 'float32' assert tdt.items[1].list_size() == 5 assert type_to_schema(tdt) == { 'properties': { 0: { 'items': { 'items': {'type': 'integer'}, 'maxItems': 5, 'minItems': 5, 'type': 'array' }, 'maxItems': 5, 'minItems': 5, 'type': 'array' }, 1: { 'items': { 'items': {'type': 'number'}, 'maxItems': 10, 'minItems': 10, 'type': 'array' }, 'maxItems': 5, 'minItems': 5, 'type': 'array' } }, 'required': [0, 1], 'type': 'object' } tensor_list_deser = tdt.deserialize(tdt.serialize(tensor_list)) assert len(tensor_list) == len(tensor_list_deser) assert all(torch.equal(tensor, tensor_deser) and tensor.dtype == tensor_deser.dtype for tensor, tensor_deser in zip(tensor_list, tensor_list_deser))
def test_dict_with_list_dataset_type(): data = {'a': ['b']} dt = DatasetAnalyzer.analyze(data) assert dt == DictDatasetType( {'a': TupleLikeListDatasetType([PrimitiveDatasetType('str')])}) assert serialize(data, dt) == data assert deserialize(data, dt) == data with pytest.raises(DeserializationError): deserialize('', dt) with pytest.raises(SerializationError): serialize('', dt) payload = serialize(dt) assert payload == { 'type': 'dict', 'item_types': { 'a': { 'type': 'tuple_like_list', 'items': [{ 'type': 'primitive', 'ptype': 'str' }] } } } payload = serialize(DTHolder(dt)) assert payload == { 'dt': { 'type': 'dict', 'item_types': { 'a': { 'type': 'tuple_like_list', 'items': [{ 'type': 'primitive', 'ptype': 'str' }] } } } }
def test_datetime(): data = pd.DataFrame([{ 'a': 1, 'b': datetime.now() }, { 'a': 2, 'b': datetime.now() }]) df_type = DatasetAnalyzer.analyze(data) assert issubclass(df_type, DataFrameType) obj = serialize(data, df_type) payload = json.dumps(obj) loaded = json.loads(payload) data2 = deserialize(loaded, df_type) assert data.equals(data2) assert data2 is not data
def test_torch__single_tensor(first_tensor): # this import ensures that this dataset type is registered in `DatasetAnalyzer` from ebonite.ext.torch.dataset import TorchTensorDatasetType # noqa tdt = DatasetAnalyzer.analyze(first_tensor) assert tdt.requirements.modules == ['torch'] assert tdt.shape == (None, 5) assert tdt.dtype == 'int32' assert type_to_schema(tdt) == { 'items': { 'items': { 'type': 'integer' }, 'maxItems': 5, 'minItems': 5, 'type': 'array' }, 'type': 'array' } tensor_deser = tdt.deserialize(tdt.serialize(first_tensor)) assert torch.equal(first_tensor, tensor_deser) assert first_tensor.dtype == tensor_deser.dtype
def tlldt(): return DatasetAnalyzer.analyze(['a', 1])
def test_dict_type_requirements(): tlldt = DatasetAnalyzer.analyze({'a': 10, 'b': np.float32(4.2)}) assert tlldt.requirements.modules == ['numpy']
def ddt(): return DatasetAnalyzer.analyze({'a': 1})
def test_list_type_requirements(): tlldt = DatasetAnalyzer.analyze([np.float32(7.3), np.float32(4.2)]) assert tlldt.requirements.modules == ['numpy']
def tdt_list(first_tensor, second_tensor): # this import ensures that this dataset type is registered in `DatasetAnalyzer` from ebonite.ext.torch.dataset import TorchTensorDatasetType # noqa tensor_list = [first_tensor, second_tensor] return DatasetAnalyzer.analyze(tensor_list)
def test_tuple_type_requirements(): tlldt = DatasetAnalyzer.analyze(('a', 1, np.float32(4.2))) assert tlldt.requirements.modules == ['numpy']
def tdt(): return DatasetAnalyzer.analyze(('a', 1))
def test_number(): ndt = DatasetAnalyzer.analyze(np.float32(.5)) assert issubclass(ndt, NumpyNumberDatasetType) payload = dumps(ndt) ndt2 = loads(payload, DatasetType) assert ndt == ndt2
def test_feed_dict_type__key_error(): tensor = tf.placeholder('float', (1, 1), name="weight") with pytest.raises(ValueError): DatasetAnalyzer.analyze({tensor: np.array([[1]]), 1: 1})
def dtype_df(dmatrix_df): return DatasetAnalyzer.analyze(dmatrix_df)
def ldt(): return DatasetAnalyzer.analyze([1, 1])
def dtype_np(dmatrix_np): return DatasetAnalyzer.analyze(dmatrix_np)
def test_feed_dict_type__key_error(tensor): with pytest.raises(ValueError): DatasetAnalyzer.analyze({tensor: np.array([[1]]), 1: 1})