Exemple #1
0
def model(data, prediction):
    dataset_meta = DatasetAnalyzer.analyze(data)
    output_meta = DatasetAnalyzer.analyze(prediction)
    return Model('test model',
                 SklearnModelWrapper(),
                 input_meta=dataset_meta,
                 output_meta=output_meta,
                 requirements=Requirements([]))
Exemple #2
0
    def create(cls,
               model_object,
               input_data,
               model_name: str = None,
               additional_artifacts: ArtifactCollection = None,
               additional_requirements: AnyRequirements = None,
               custom_wrapper: ModelWrapper = None,
               custom_artifact: ArtifactCollection = None,
               custom_input_meta: DatasetType = None,
               custom_output_meta: DatasetType = None,
               custom_prediction=None,
               custom_requirements: AnyRequirements = None) -> 'Model':
        """
        Creates Model instance from arbitrary model objects and sample of input data

        :param model_object: The model object to analyze.
        :param input_data: The image to run.
        :param model_name: The model name.
        :param additional_artifacts: Additional artifact.
        :param additional_requirements: Additional requirements.
        :param custom_wrapper: Custom model wrapper.
        :param custom_artifact: Custom artifact collection to replace all other.
        :param custom_input_meta: Custom input DatasetType.
        :param custom_output_meta: Custom output DatasetType.
        :param custom_prediction: Custom prediction output.
        :param custom_requirements: Custom requirements to replace all other.
        :returns: :py:class:`Model`
        """
        wrapper: ModelWrapper = custom_wrapper or ModelAnalyzer.analyze(
            model_object)
        name = model_name or _generate_model_name(wrapper)

        artifact = custom_artifact or WrapperArtifactCollection(wrapper)
        if additional_artifacts is not None:
            artifact += additional_artifacts

        input_meta = custom_input_meta or DatasetAnalyzer.analyze(input_data)
        prediction = custom_prediction or wrapper.predict(input_data)
        output_meta = custom_output_meta or DatasetAnalyzer.analyze(prediction)

        if custom_requirements is not None:
            requirements = resolve_requirements(custom_requirements)
        else:
            requirements = get_object_requirements(model_object)
            requirements += get_object_requirements(input_data)
            requirements += get_object_requirements(prediction)

        if additional_requirements is not None:
            requirements += additional_requirements
        model = Model(name, wrapper, None, input_meta, output_meta,
                      requirements)
        model._unpersisted_artifacts = artifact
        return model
Exemple #3
0
    def _prepare_methods_and_requirements(self, input_data):
        requirements = Requirements()
        requirements += self._model_requirements()

        arg_type = DatasetAnalyzer.analyze(input_data)
        requirements += arg_type.requirements

        methods = {}
        for exposed, wrapped in self._exposed_methods_mapping().items():
            output_data = self._call_method(wrapped, input_data)
            out_type = DatasetAnalyzer.analyze(output_data)

            methods[exposed] = (wrapped, arg_type, out_type)
            requirements += out_type.requirements
        return methods, requirements
Exemple #4
0
def test_number():
    ndt = DatasetAnalyzer.analyze(np.float32(.5))
    assert issubclass(ndt, NumpyNumberDatasetType)
    assert ndt.requirements.modules == ['numpy']
    payload = dumps(ndt)
    ndt2 = loads(payload, DatasetType)
    assert ndt == ndt2
Exemple #5
0
def test_ndarray():
    nat = DatasetAnalyzer.analyze(np.array([1, 2, 3]))
    assert issubclass(nat, NumpyNdarrayDatasetType)
    payload = dumps(nat)
    nat2 = loads(payload, DatasetType)

    assert nat == nat2
def test_bytes_type():
    b = b'hello'
    bdt = DatasetAnalyzer.analyze(b)

    assert bdt.serialize(b) == b
    assert bdt.deserialize(b) == b
    assert bdt.requirements.modules == []
Exemple #7
0
def test_feed_dict_type__serialization(tensor):
    obj = {tensor: np.array([[1]])}
    fdt = DatasetAnalyzer.analyze(obj)

    payload = dumps(obj, fdt)
    obj2 = loads(payload, fdt)

    assert obj[tensor] == obj2[tensor.name]
Exemple #8
0
def fdt(tensor):
    # force loading of dataset hooks
    import ebonite.ext.tensorflow  # noqa

    return DatasetAnalyzer.analyze({
        tensor: np.array([[1]]),
        'a': np.array([[1]])
    })
Exemple #9
0
def test_feed_dict_type__serialization():
    tensor = tf.placeholder('float', (1, 1), name="weight")
    obj = {tensor: np.array([[1]])}
    fdt = DatasetAnalyzer.analyze(obj)

    payload = dumps(obj, fdt)
    obj2 = loads(payload, fdt)

    assert obj[tensor] == obj2[tensor.name]
Exemple #10
0
def test_feed_dict_type__self_serialization():
    tensor = tf.placeholder('float', (1, 1), name="weight")
    fdt = DatasetAnalyzer.analyze({
        tensor: np.array([[1]]),
        'a': np.array([[1]])
    })
    assert issubclass(fdt, FeedDictDatasetType)
    payload = dumps(fdt)
    fdt2 = loads(payload, DatasetType)
    assert fdt == fdt2
Exemple #11
0
def test_all(df):
    df_type = DatasetAnalyzer.analyze(df)

    obj = serialize(df, df_type)
    payload = json.dumps(obj)
    loaded = json.loads(payload)
    data = deserialize(loaded, df_type)

    assert df is not data
    pandas_assert(data, df)
Exemple #12
0
def test_all(data2):
    df_type = DatasetAnalyzer.analyze(data2)

    obj = serialize(data2, df_type)
    payload = json.dumps(obj)
    loaded = json.loads(payload)
    data = deserialize(loaded, df_type)

    assert data2.equals(data)
    assert data2 is not data
Exemple #13
0
    def from_feed_dict(cls, feed_dict):
        """
        Factory method to create :class:`FeedDictDatasetType` from feed dict

        :param feed_dict: feed dict
        :return: :class:`FeedDictDatasetType` instance
        """
        types = {}
        for k, v in feed_dict.items():
            types[cls.get_key(k)] = DatasetAnalyzer.analyze(v)
        return FeedDictDatasetType(types)
Exemple #14
0
def test_torch__tensors_list(first_tensor, second_tensor):
    # this import ensures that this dataset type is registered in `DatasetAnalyzer`
    from ebonite.ext.torch.dataset import TorchTensorDatasetType  # noqa

    tensor_list = [first_tensor, second_tensor]
    tdt = DatasetAnalyzer.analyze(tensor_list)

    assert len(tdt.items) == 2
    assert tdt.items[0].shape == (5, 5)
    assert tdt.items[0].dtype == 'int32'
    assert tdt.items[0].list_size() == 5
    assert tdt.items[1].shape == (5, 10)
    assert tdt.items[1].dtype == 'float32'
    assert tdt.items[1].list_size() == 5
    assert type_to_schema(tdt) == {
        'properties': {
            0: {
                'items': {
                    'items': {'type': 'integer'},
                    'maxItems': 5,
                    'minItems': 5,
                    'type': 'array'
                },
                'maxItems': 5,
                'minItems': 5,
                'type': 'array'
            },
            1: {
                'items': {
                    'items': {'type': 'number'},
                    'maxItems': 10,
                    'minItems': 10,
                    'type': 'array'
                },
                'maxItems': 5,
                'minItems': 5,
                'type': 'array'
            }
        },
        'required': [0, 1],
        'type': 'object'
    }

    tensor_list_deser = tdt.deserialize(tdt.serialize(tensor_list))
    assert len(tensor_list) == len(tensor_list_deser)
    assert all(torch.equal(tensor, tensor_deser) and tensor.dtype == tensor_deser.dtype
               for tensor, tensor_deser in zip(tensor_list, tensor_list_deser))
Exemple #15
0
def test_dict_with_list_dataset_type():
    data = {'a': ['b']}
    dt = DatasetAnalyzer.analyze(data)

    assert dt == DictDatasetType(
        {'a': TupleLikeListDatasetType([PrimitiveDatasetType('str')])})

    assert serialize(data, dt) == data
    assert deserialize(data, dt) == data

    with pytest.raises(DeserializationError):
        deserialize('', dt)

    with pytest.raises(SerializationError):
        serialize('', dt)

    payload = serialize(dt)
    assert payload == {
        'type': 'dict',
        'item_types': {
            'a': {
                'type': 'tuple_like_list',
                'items': [{
                    'type': 'primitive',
                    'ptype': 'str'
                }]
            }
        }
    }

    payload = serialize(DTHolder(dt))
    assert payload == {
        'dt': {
            'type': 'dict',
            'item_types': {
                'a': {
                    'type': 'tuple_like_list',
                    'items': [{
                        'type': 'primitive',
                        'ptype': 'str'
                    }]
                }
            }
        }
    }
Exemple #16
0
def test_datetime():
    data = pd.DataFrame([{
        'a': 1,
        'b': datetime.now()
    }, {
        'a': 2,
        'b': datetime.now()
    }])
    df_type = DatasetAnalyzer.analyze(data)
    assert issubclass(df_type, DataFrameType)

    obj = serialize(data, df_type)
    payload = json.dumps(obj)
    loaded = json.loads(payload)
    data2 = deserialize(loaded, df_type)

    assert data.equals(data2)
    assert data2 is not data
Exemple #17
0
def test_torch__single_tensor(first_tensor):
    # this import ensures that this dataset type is registered in `DatasetAnalyzer`
    from ebonite.ext.torch.dataset import TorchTensorDatasetType  # noqa

    tdt = DatasetAnalyzer.analyze(first_tensor)

    assert tdt.requirements.modules == ['torch']
    assert tdt.shape == (None, 5)
    assert tdt.dtype == 'int32'
    assert type_to_schema(tdt) == {
        'items': {
            'items': {
                'type': 'integer'
            },
            'maxItems': 5,
            'minItems': 5,
            'type': 'array'
        },
        'type': 'array'
    }

    tensor_deser = tdt.deserialize(tdt.serialize(first_tensor))
    assert torch.equal(first_tensor, tensor_deser)
    assert first_tensor.dtype == tensor_deser.dtype
Exemple #18
0
def tlldt():
    return DatasetAnalyzer.analyze(['a', 1])
Exemple #19
0
def test_dict_type_requirements():
    tlldt = DatasetAnalyzer.analyze({'a': 10, 'b': np.float32(4.2)})
    assert tlldt.requirements.modules == ['numpy']
Exemple #20
0
def ddt():
    return DatasetAnalyzer.analyze({'a': 1})
Exemple #21
0
def test_list_type_requirements():
    tlldt = DatasetAnalyzer.analyze([np.float32(7.3), np.float32(4.2)])
    assert tlldt.requirements.modules == ['numpy']
Exemple #22
0
def tdt_list(first_tensor, second_tensor):
    # this import ensures that this dataset type is registered in `DatasetAnalyzer`
    from ebonite.ext.torch.dataset import TorchTensorDatasetType  # noqa

    tensor_list = [first_tensor, second_tensor]
    return DatasetAnalyzer.analyze(tensor_list)
Exemple #23
0
def test_tuple_type_requirements():
    tlldt = DatasetAnalyzer.analyze(('a', 1, np.float32(4.2)))
    assert tlldt.requirements.modules == ['numpy']
Exemple #24
0
def tdt():
    return DatasetAnalyzer.analyze(('a', 1))
Exemple #25
0
def test_number():
    ndt = DatasetAnalyzer.analyze(np.float32(.5))
    assert issubclass(ndt, NumpyNumberDatasetType)
    payload = dumps(ndt)
    ndt2 = loads(payload, DatasetType)
    assert ndt == ndt2
Exemple #26
0
def test_feed_dict_type__key_error():
    tensor = tf.placeholder('float', (1, 1), name="weight")
    with pytest.raises(ValueError):
        DatasetAnalyzer.analyze({tensor: np.array([[1]]), 1: 1})
Exemple #27
0
def dtype_df(dmatrix_df):
    return DatasetAnalyzer.analyze(dmatrix_df)
Exemple #28
0
def ldt():
    return DatasetAnalyzer.analyze([1, 1])
Exemple #29
0
def dtype_np(dmatrix_np):
    return DatasetAnalyzer.analyze(dmatrix_np)
Exemple #30
0
def test_feed_dict_type__key_error(tensor):
    with pytest.raises(ValueError):
        DatasetAnalyzer.analyze({tensor: np.array([[1]]), 1: 1})