def test_generic_serde(): gen = SizedTestType(10) payload = serialize(gen) assert payload == {'size': 10} new_gen = deserialize(payload, SizedTestType) assert issubclass(new_gen, SizedTestType) assert new_gen.size == 10 obj = AClass([1 for _ in range(10)]) assert serialize(obj, gen) == serialize(obj, new_gen)
def test_hierarchy_serde(): gen = ChildGenericType(10) payload = serialize(gen) assert payload == {'size': 10, 'type': 'child'} new_gen = deserialize(payload, AbstractType) assert issubclass(new_gen, ChildGenericType) assert new_gen.size == 10 obj = AClass([1 for _ in range(10)]) assert serialize(obj, gen) == serialize(obj, new_gen)
def __call__(self, *args, **kwargs): if args and kwargs: raise ValueError( 'Parameters should be passed either in positional or in keyword fashion, not both' ) if len(args) > len(self.method.args) or len(kwargs) > len( self.method.args): raise ValueError( f'Too much parameters given, expected: {len(self.method.args)}' ) data = {} for i, arg in enumerate(self.method.args): obj = None if len(args) > i: obj = args[i] if arg.name in kwargs: obj = kwargs[arg.name] if obj is None: raise ValueError( f'Parameter with name "{arg.name}" (position {i}) should be passed' ) data[arg.name] = serialize(obj, arg.type) logger.debug('Calling server method "%s", args: %s ...', self.method.name, data) out = self.call_method(self.method.name, data) logger.debug('Server call returned %s', out) return deserialize(out, self.method.out_type)
def serde_and_compare(obj, obj_type=None, true_payload=None, check_payload=True): if obj_type is None: obj_type = type(obj) check_subtype = False check_instance = True else: check_subtype = not issubclass(obj_type, Serializer) check_instance = False payload = pyjackson.serialize(obj, obj_type) if true_payload is not None: if check_payload: assert true_payload == payload payload = true_payload new_obj = pyjackson.deserialize(payload, obj_type) if check_subtype: assert issubclass(type(new_obj), obj_type), '{} type must be subtype of {}'.format( new_obj, obj_type) elif check_instance: assert isinstance(new_obj, obj_type) assert obj == new_obj
def main(): ebnt = ebonite.Ebonite.local(clear=True) data, target = get_data() # we want easy way to transform anything to datasets, so its either this or ebonite.create_dataset (same for metrics) # for now there is no difference, however if we want manage datasets with meta and art repos, we use client # or create with ebonite.create_... and then push with ebnt.push_... like for models # dataset = ebnt.create_dataset(data, target) # here we postpone setting task input and output types for easy task creation task = ebnt.get_or_create_task('my_project', 'regression_is_my_profession') task.add_metric('auc', roc_auc_score) task.add_metric('custom', my_custom_metric) task.add_evaluation('train', data, target, ['auc', 'custom']) pprint(task.evaluation_sets) pprint(task.datasets) pprint(task.metrics) # omit providing dataset as we already have it in task mc = task.create_and_push_model(constant, data, model_name='constant') mt = task.create_and_push_model(truth, data, model_name='truth') pprint(mc.wrapper.methods) pprint(mt.wrapper.methods) # maybe save result to models? also need different ways to evaluate "not all" result = task.evaluate_all() print(result) ebnt._bind(task) task.save() pprint(serialize(task))
def test_unordered_columns(df_type, data): data_rev = data[list(reversed(data.columns))] obj = serialize(data_rev, df_type) data2 = deserialize(obj, df_type) assert data.equals(data2), f'{data} \n!=\n{data2}' assert data2 is not data
def test_dataframe_type(df_type): assert df_type.requirements.modules == ['pandas'] data = pd.DataFrame([{'a': 1, 'b': 1}, {'a': 2, 'b': 2}]) obj = serialize(data, df_type) data2 = deserialize(obj, df_type) assert data.equals(data2)
def test_df_type(df_type_fx, request): df_type = request.getfixturevalue(df_type_fx) assert issubclass(df_type, DataFrameType) obj = serialize(df_type) new_df_type = deserialize(obj, DatasetType) assert df_type == new_df_type
def test_dict_with_list_dataset_type(): data = {'a': ['b']} dt = DatasetAnalyzer.analyze(data) assert dt == DictDatasetType( {'a': TupleLikeListDatasetType([PrimitiveDatasetType('str')])}) assert serialize(data, dt) == data assert deserialize(data, dt) == data with pytest.raises(DeserializationError): deserialize('', dt) with pytest.raises(SerializationError): serialize('', dt) payload = serialize(dt) assert payload == { 'type': 'dict', 'item_types': { 'a': { 'type': 'tuple_like_list', 'items': [{ 'type': 'primitive', 'ptype': 'str' }] } } } payload = serialize(DTHolder(dt)) assert payload == { 'dt': { 'type': 'dict', 'item_types': { 'a': { 'type': 'tuple_like_list', 'items': [{ 'type': 'primitive', 'ptype': 'str' }] } } } }
def test_set_hint(): @make_string class CClass(Comparable): def __init__(self, value: Set[str]): self.value = value value = CClass({'a', 'b'}) serde_and_compare(value) assert serialize(value) in [{'value': ['a', 'b']}, {'value': ['b', 'a']}]
def test_unsized(times): real_data = [[1, 2] for _ in range(times)] array = MockNumpyNdarray(real_data) container = MultidimUnsizedArrayContainer(array) ser = serialize(container) assert real_data == ser['arr'] new_container = deserialize(ser, MultidimUnsizedArrayContainer) assert new_container == container
def test_dataframe_type(): data = pd.DataFrame([{'a': 1, 'b': 1}, {'a': 2, 'b': 2}]) df_type = DataFrameType(['a', 'b']) obj = serialize(data, df_type) data2 = deserialize(obj, df_type) assert data.equals(data2)
def test_dataframe_type(df_type, data): assert df_type.requirements.modules == ['pandas'] obj = serialize(data, df_type) payload = json.dumps(obj) loaded = json.loads(payload) data2 = deserialize(loaded, df_type) assert data.equals(data2)
def test_inner_hierarchy_serde(): holder = GenericTypeHolder(ChildGenericType(15)) payload = serialize(holder) assert payload == {'gen_type': {'type': 'child', 'size': 15}} new_holder = deserialize(payload, GenericTypeHolder) assert holder == new_holder
def test_all(data2): df_type = DatasetAnalyzer.analyze(data2) obj = serialize(data2, df_type) payload = json.dumps(obj) loaded = json.loads(payload) data = deserialize(loaded, df_type) assert data2.equals(data) assert data2 is not data
def wrapper_meta(self) -> dict: """ :return: pyjackson representation of :class:`~ebonite.core.objects.wrapper.ModelWrapper` for this model: e.g., this provides possibility to move a model between repositories without its dependencies being installed """ if self._wrapper_meta is None: if self._wrapper is None: raise ValueError("Either 'wrapper' or 'wrapper_meta' should be provided") self._wrapper_meta = serialize(self._wrapper) return self._wrapper_meta
def test_all(df): df_type = DatasetAnalyzer.analyze(df) obj = serialize(df, df_type) payload = json.dumps(obj) loaded = json.loads(payload) data = deserialize(loaded, df_type) assert df is not data pandas_assert(data, df)
def test_multidim(): real_data = [[1, 2, 3], [4, 5, 6]] ext_type = MockNumpyNdarray(real_data) c = MultidimArrayContainer(ext_type) ser = serialize(c) assert real_data == ser['arr'] deser = deserialize(ser, MultidimArrayContainer) assert deser == c
def test_sized(): real_data = [1, 2, 3] ext_type = MockNumpyNdarray(real_data) c = SizedArrayContainer(ext_type) ser = serialize(c) assert real_data == ser['arr'] deser = deserialize(ser, SizedArrayContainer) assert deser == c
def save(self, path: str = None): path = path or SSCIConf.CONFIG_PATH try: with open(path, "r") as f: data = yaml.safe_load(f) except FileNotFoundError: data = {"ssci": {}} data["ssci"]["deploy"] = serialize(self) with open(path, "w") as f: yaml.safe_dump(data, f)
def test_set(): value = {AClass('a'), AClass('b')} serde_and_compare(value, Set[AClass]) assert serialize(value, Set[AClass]) in [[{ 'value': 'a' }, { 'value': 'b' }], [{ 'value': 'b' }, { 'value': 'a' }]]
def test_type_hierarchy__type_import(): payload = { 'type': 'tests.not_imported_directly.ChildClass', 'field': 'aaa' } obj = deserialize(payload, RootClass) assert isinstance(obj, RootClass) assert obj.__class__.__name__ == 'ChildClass' assert obj.field == 'aaa' new_payload = serialize(obj) assert new_payload == payload
def test_list_dataset_type(ldt): assert ldt == ListDatasetType(PrimitiveDatasetType('int'), 2) payload = serialize(ldt) assert payload == { 'type': 'list', 'dtype': { 'type': 'primitive', 'ptype': 'int' }, 'size': 2 }
def test_pipeline__load(meta, model, task_saved_art): task_saved_art.push_model(model) p = model.as_pipeline('predict') task_saved_art.add_pipeline(p) p = deserialize(serialize(meta.get_pipeline_by_id(p.id)), Pipeline) assert p is not None assert len(p.models) == 0 p.bind_meta_repo(meta) p.load() assert len(p.models) == 1 assert model.name in p.models assert p.models[model.name] == model
def test_dict_dataset_type(ddt): assert ddt == DictDatasetType({'a': PrimitiveDatasetType('int')}) payload = serialize(ddt) assert payload == { 'type': 'dict', 'item_types': { "a": { 'type': 'primitive', 'ptype': 'int' } } }
def test_inner_list_dataset_type(ldt): dth = DTHolder(ldt) payload = serialize(dth) assert payload == { 'dt': { 'type': 'list', 'dtype': { 'type': 'primitive', 'ptype': 'int' }, 'size': 2 } }
def test_inner_dict_dataset_type(ddt): dth = DTHolder(ddt) payload = serialize(dth) assert payload == { 'dt': { 'type': 'dict', 'item_types': { "a": { 'type': 'primitive', 'ptype': 'int' } } } }
def test_inner_tuple_like_list_dataset_type(tlldt): dth = DTHolder(tlldt) payload = serialize(dth) assert payload == { 'dt': { 'type': 'tuple_like_list', 'items': [{ 'type': 'primitive', 'ptype': 'str' }, { 'type': 'primitive', 'ptype': 'int' }] } }
def test_tuple_like_list_dataset_type(tlldt): assert tlldt == TupleLikeListDatasetType( [PrimitiveDatasetType('str'), PrimitiveDatasetType('int')]) payload = serialize(tlldt) assert payload == { 'type': 'tuple_like_list', 'items': [{ 'type': 'primitive', 'ptype': 'str' }, { 'type': 'primitive', 'ptype': 'int' }] }
def test_datetime(): data = pd.DataFrame([{ 'a': 1, 'b': datetime.now() }, { 'a': 2, 'b': datetime.now() }]) df_type = DatasetAnalyzer.analyze(data) assert issubclass(df_type, DataFrameType) obj = serialize(data, df_type) payload = json.dumps(obj) loaded = json.loads(payload) data2 = deserialize(loaded, df_type) assert data.equals(data2) assert data2 is not data