def test_custom_layers(self): """Test adding custom layers to the definition""" # Make a simple model model = _make_simple_model() tmpdir = mkdtemp() try: # Save it model_path = os.path.join(tmpdir, 'model.hd5') model.save(model_path) # Create the metadata metadata = KerasModel.create_model( model_path, ['y'], custom_objects={'Dense': keras.layers.Dense}) metadata.set_title('test').set_name('test') # Make sure it has the custom object definitions self.assertIn('Dense', metadata['servable']['options']['custom_objects']) # Validate it against DLHub schema validate_against_dlhub_schema(metadata.to_dict(), 'servable') finally: shutil.rmtree(tmpdir) # Test the errors with self.assertRaises(ValueError) as exc: metadata.add_custom_object('BadLayer', float) self.assertIn('subclass', str(exc.exception))
def test_multinetwork(self): model = MultiNetwork() with TemporaryDirectory() as tp: model_path = os.path.join(tp, 'model.pth') torch.save(model, model_path) metadata = TorchModel.create_model(model_path, [(None, 4)]*2, [(None, 1)]*2, input_type='float', output_type=['float', 'float']) metadata.set_name('t').set_title('t') # Test the output shapes self.assertEqual(metadata['servable']['methods']['run']['input'], {'type': 'tuple', 'description': 'Tuple of tensors', 'element_types': [{'type': 'ndarray', 'description': 'Tensor', 'shape': [None, 4], 'item_type': {'type': 'float'}}, {'type': 'ndarray', 'description': 'Tensor', 'shape': [None, 4], 'item_type': {'type': 'float'}}]}) self.assertEqual(metadata['servable']['methods']['run']['output'], {'type': 'tuple', 'description': 'Tuple of tensors', 'element_types': [{'type': 'ndarray', 'description': 'Tensor', 'shape': [None, 1], 'item_type': {'type': 'float'}}, {'type': 'ndarray', 'description': 'Tensor', 'shape': [None, 1], 'item_type': {'type': 'float'}}]}) validate_against_dlhub_schema(metadata.to_dict(), 'servable')
def test_pickle(self): pickle_path = os.path.abspath( os.path.join(os.path.dirname(__file__), 'model.pkl')) # Make the model model = PythonClassMethodModel.create_model(pickle_path, 'predict_proba', {'fake': 'kwarg'}) model.set_title('Python example').set_name("class_method") # Make sure it throws value errors if inputs are not set with self.assertRaises(ValueError): model.to_dict() # Define the input and output types model.set_inputs('ndarray', 'Features for each entry', shape=[None, 4]) model.set_outputs('ndarray', 'Predicted probabilities of being each iris species', shape=[None, 3]) # Make sure attempting to set "unpack" fails with self.assertRaises(ValueError): model.set_unpack_inputs(True) # Add some requirements model.add_requirement('scikit-learn', 'detect') model.add_requirement('numpy', 'detect') model.add_requirement( 'sklearn', 'latest') # Dummy project, version # shouldn't change # Check the model output output = model.to_dict() assert output['dlhub']['files'] == {'pickle': pickle_path} assert output['dlhub']['dependencies']['python'] == { 'scikit-learn': skl_version, 'numpy': numpy_version, 'sklearn': '0.0' } assert output['servable']['shim'] == 'python.PythonClassMethodServable' assert 'run' in output['servable']['methods'] assert output['servable']['methods']['run']['input'] == { 'type': 'ndarray', 'description': 'Features for each entry', 'shape': [None, 4] } assert output['servable']['methods']['run']['output'] == { 'type': 'ndarray', 'description': 'Predicted probabilities of being each iris species', 'shape': [None, 3] } assert (output['servable']['methods']['run']['method_details'] ['class_name'].endswith('.SVC')) assert (output['servable']['methods']['run']['method_details'] ['method_name'] == 'predict_proba') self.assertEqual([pickle_path], model.list_files()) validate_against_dlhub_schema(output, 'servable')
def test_multinetwork(tmpdir): model = MultiNetwork() model_path = os.path.join(tmpdir, 'model.pth') torch.save(model, model_path) metadata = TorchModel.create_model(model_path, [(None, 4)] * 2, [(None, 1)] * 2, input_type='float', output_type=['float', 'float']) metadata.set_name('t').set_title('t') # Test the output shapes assert metadata.servable.methods['run'].input.dict(exclude_none=True) == { 'type': 'tuple', 'description': 'Tuple of tensors', 'element_types': [{ 'type': 'ndarray', 'description': 'Tensor', 'shape': [None, 4], 'item_type': { 'type': 'float' } }, { 'type': 'ndarray', 'description': 'Tensor', 'shape': [None, 4], 'item_type': { 'type': 'float' } }] } assert metadata.servable.methods["run"].output.dict(exclude_none=True) == { 'type': 'tuple', 'description': 'Tuple of tensors', 'element_types': [{ 'type': 'ndarray', 'description': 'Tensor', 'shape': [None, 1], 'item_type': { 'type': 'float' } }, { 'type': 'ndarray', 'description': 'Tensor', 'shape': [None, 1], 'item_type': { 'type': 'float' } }] } validate_against_dlhub_schema(metadata.to_dict(), 'servable')
def test_torch_single_input(self): # Make a Keras model model = _make_simple_model() # Save it to disk tempdir = mkdtemp() try: model_path = os.path.join(tempdir, 'model.pt') torch.save(model, model_path) # Create a model metadata = TorchModel.create_model(model_path, (2, 4), (3, 5)) metadata.set_title('Torch Test') metadata.set_name('mlp') output = metadata.to_dict() self.assertEqual(output, { "datacite": {"creators": [], "titles": [{"title": "Torch Test"}], "publisher": "DLHub", "publicationYear": _year, "identifier": {"identifier": "10.YET/UNASSIGNED", "identifierType": "DOI"}, "resourceType": {"resourceTypeGeneral": "InteractiveResource"}, "descriptions": [], "fundingReferences": [], "relatedIdentifiers": [], "alternateIdentifiers": [], "rightsList": []}, "dlhub": {"version": __version__, "domains": [], "visible_to": ["public"], 'type': 'servable', "name": "mlp", "files": {"model": model_path}, "dependencies": {"python": { 'torch': torch.__version__ }}}, "servable": {"methods": {"run": { "input": {"type": "ndarray", "description": "Tensor", "shape": [2, 4], "item_type": {"type": "float"}}, "output": {"type": "ndarray", "description": "Tensor", "shape": [3, 5], "item_type": {"type": "float"}}, "parameters": {}, "method_details": { "method_name": "__call__" }}}, "type": "Torch Model", "shim": "torch.TorchServable", "model_type": "Deep NN", "model_summary": """Net( (conv1): Conv2d(1, 20, kernel_size=(5, 5), stride=(1, 1)) (conv2): Conv2d(20, 50, kernel_size=(5, 5), stride=(1, 1)) (fc1): Linear(in_features=800, out_features=500, bias=True) (fc2): Linear(in_features=500, out_features=10, bias=True) )"""}}) # Validate against schema validate_against_dlhub_schema(output, 'servable') finally: shutil.rmtree(tempdir)
def publish_servable(self, model): """Submit a servable to DLHub If this servable has not been published before, it will be assigned a unique identifier. If it has been published before (DLHub detects if it has an identifier), then DLHub will update the servable to the new version. Args: model (BaseMetadataModel): Servable to be submitted Returns: (string): Task ID of this submission, used for checking for success """ # Get the metadata metadata = model.to_dict(simplify_paths=True) # Mark the method used to submit the model metadata['dlhub']['transfer_method'] = {'POST': 'file'} # Validate against the servable schema validate_against_dlhub_schema(metadata, 'servable') # Wipe the fx cache so we don't keep reusing an old servable self.clear_funcx_cache() # Get the data to be submitted as a ZIP file fp, zip_filename = mkstemp('.zip') os.close(fp) os.unlink(zip_filename) try: model.get_zip_file(zip_filename) # Get the authorization headers headers = {} self.authorizer.set_authorization_header(headers) # Submit data to DLHub service with open(zip_filename, 'rb') as zf: reply = requests.post(slash_join(self.base_url, 'publish'), headers=headers, files={ 'json': ('dlhub.json', json.dumps(metadata), 'application/json'), 'file': ('servable.zip', zf, 'application/octet-stream') }) # Return the task id if reply.status_code != 200: raise Exception(reply.text) return reply.json()['task_id'] finally: os.unlink(zip_filename)
def test_tf(self): # Make a model and save it to disk self.make_model() # Create the description model = TensorFlowModel.create_model(tf_export_path).set_title('TF Test')\ .set_name('tf-test') # Generate the metadata for the test metadata = model.to_dict(simplify_paths=True) # Check whether the 'x' is listed first for the multiple-input model or second self.assertEqual({'other': ['saved_model.pb', os.path.join('variables', 'variables.data-00000-of-00001'), os.path.join('variables', 'variables.index')]}, metadata['dlhub']['files']) self.assertEqual(metadata['dlhub']['dependencies'], {'python': {'tensorflow': tf.__version__}}) self.assertEqual(metadata['servable'], {'methods': {'run': { 'input': {'type': 'ndarray', 'description': 'x', 'shape': [None, 3], 'item_type': {'type': 'float'}}, 'output': {'type': 'ndarray', 'description': 'y', 'shape': [None, 3], 'item_type': {'type': 'float'}}, 'parameters': {}, 'method_details': {'input_nodes': ['Input:0'], 'output_nodes': ['add:0']} }, 'length': { 'input': {'type': 'ndarray', 'description': 'x', 'shape': [None, 3], 'item_type': {'type': 'float'}}, 'output': {'type': 'float', 'description': 'len'}, 'parameters': {}, 'method_details': {'input_nodes': ['Input:0'], 'output_nodes': ['Sum:0']} }, 'scalar_multiply': { 'input': {'type': 'tuple', 'description': 'Arguments', 'element_types': [ {'type': 'ndarray', 'description': 'x', 'shape': [None, 3], 'item_type': {'type': 'float'}}, {'type': 'float', 'description': 'z'} ]}, 'output': {'type': 'ndarray', 'description': 'scale_mult', 'shape': [None, 3], 'item_type': {'type': 'float'}}, 'parameters': {}, 'method_details': {'input_nodes': ['Input:0', 'ScalarMultiple:0'], 'output_nodes': ['scale_mult:0']} }}, 'shim': 'tensorflow.TensorFlowServable', 'type': 'TensorFlow Model'}) validate_against_dlhub_schema(metadata, 'servable')
def test_codemeta(self): m = TabularDataset() # Read in the codemeta file odir = os.getcwd() try: os.chdir(os.path.dirname(__file__)) m.read_codemeta_file() finally: os.chdir(odir) # Check that it produces valid datacite validate_against_dlhub_schema(m['datacite'], 'datacite-v4.1')
def test_visibility(): model = PythonStaticMethodModel.create_model('numpy.linalg', 'norm') model.set_name('1d_norm') model.set_title('Norm of a 1D Array') model.set_inputs('ndarray', 'Array to be normed', shape=[None]) model.set_outputs('number', 'Norm of the array') model.set_visibility(users=['bec215bc-9169-4be9-af49-4872b5e11ef8' ]) # Setting visibility to a user validate_against_dlhub_schema(model.to_dict(), 'servable') assert model.dlhub.visible_to[0].startswith('urn:globus:auth:identity:') model.set_visibility(groups=['fdb38a24-03c1-11e3-86f7-12313809f035' ]) # Setting visibility to a group validate_against_dlhub_schema(model.to_dict(), 'servable') assert len( model.dlhub.visible_to) == 1 # Ensure was replaced, not appended assert model.dlhub.visible_to[0].startswith('urn:globus:groups:id:') model.set_visibility(users=['foo']) # Test using a non-UUID for user with raises(ValidationError): validate_against_dlhub_schema(model.to_dict(), 'servable') model.set_visibility() # Default visibility is "public" validate_against_dlhub_schema(model.to_dict(), 'servable') assert model.dlhub.visible_to == ['public']
def test_multiarg(self): """Test making descriptions with more than one argument""" # Initialize the model model = PythonStaticMethodModel.from_function_pointer(max) model.set_name('test').set_title('test') # Define the inputs and outputs model.set_inputs('tuple', 'Two numbers', element_types=[ compose_argument_block('float', 'A number'), compose_argument_block('float', 'A second number') ]) model.set_outputs('float', 'Maximum of the two numbers') # Mark that the inputs should be unpacked model.set_unpack_inputs(True) # Check the description self.assertEqual( model['servable']['methods']['run'], { 'input': { 'type': 'tuple', 'description': 'Two numbers', 'element_types': [{ 'type': 'float', 'description': 'A number' }, { 'type': 'float', 'description': 'A second number' }] }, 'output': { 'type': 'float', 'description': 'Maximum of the two numbers' }, 'method_details': { 'module': 'builtins', 'method_name': 'max', 'unpack': True, 'autobatch': False }, 'parameters': {} }) validate_against_dlhub_schema(model.to_dict(), 'servable')
def test_load_model(self): model_path = os.path.abspath(os.path.join(os.path.dirname(__file__), 'model.pkl')) # Load the model model_info = ScikitLearnModel.create_model(model_path, n_input_columns=4, classes=3) model_info.set_title('Sklearn example').set_name('sklearn') # Print out the metadata metadata = model_info.to_dict() # Test key components assert metadata['dlhub']['dependencies']['python'] == { 'scikit-learn': '0.19.1' # The version used to save the model } assert metadata['servable']['shim'] == 'sklearn.ScikitLearnServable' assert metadata['servable']['model_type'] == 'SVC' assert metadata['servable']['methods']['run'] == { "input": { "type": "ndarray", "shape": [None, 4], "description": ("List of records to evaluate with model. " "Each record is a list of 4 variables."), "item_type": { "type": "float" } }, "output": { "type": "ndarray", "shape": [None, 3], "description": "Probabilities for membership in each of 3 classes", "item_type": { "type": "float" } }, "parameters": {}, "method_details": { "method_name": "_predict_proba" } } assert metadata['servable']['model_summary'].startswith('SVC(') assert metadata['servable']['options'] == { 'is_classifier': True, 'serialization_method': 'pickle', 'classes': ['Class 1', 'Class 2', 'Class 3'] } # Check the schema validation validate_against_dlhub_schema(model_info.to_dict(), 'servable')
def test_keras_multioutput(self): # Make a Keras model input_layer = keras.layers.Input(shape=(4, )) dense = keras.layers.Dense(16, activation='relu')(input_layer) output_1 = keras.layers.Dense(1, activation='relu')(dense) output_2 = keras.layers.Dense(2, activation='softmax')(dense) model = keras.models.Model([input_layer], [output_1, output_2]) model.compile(optimizer='rmsprop', loss='mse') # Save it to disk tempdir = mkdtemp() try: model_path = os.path.join(tempdir, 'model.hd5') model.save(model_path) # Create a model metadata = KerasModel.create_model(model_path, [['y'], ['yes', 'no']]) metadata.set_title('Keras Test') metadata.set_name('mlp') self.assertEqual( metadata['servable']['methods']['run']['output'], { 'type': 'tuple', 'description': 'Tuple of tensors', 'element_types': [{ 'type': 'ndarray', 'description': 'Tensor', 'shape': [None, 1] }, { 'type': 'ndarray', 'description': 'Tensor', 'shape': [None, 2] }] }) output = metadata.to_dict() # Validate against schema validate_against_dlhub_schema(output, 'servable') finally: shutil.rmtree(tempdir)
def test_keras_single_input(self): # Make a Keras model model = _make_simple_model() # Save it to disk tempdir = mkdtemp() try: model_path = os.path.join(tempdir, 'model.hd5') model.save(model_path) # Create a model metadata = KerasModel.create_model(model_path, ["y"]) metadata.set_title('Keras Test') metadata.set_name('mlp') # Validate against schema output = metadata.to_dict() validate_against_dlhub_schema(output, 'servable') finally: shutil.rmtree(tempdir)
def test_tf(self): # Make a model and save it to disk if tf.__version__ < '2': _make_model_v1() else: _make_model_v2() # Create the description model = TensorFlowModel.create_model(tf_export_path).set_title('TF Test')\ .set_name('tf-test') # Generate the metadata for the test metadata = model.to_dict(simplify_paths=True) # Make sure the files are there my_files = metadata['dlhub']['files']['other'] assert 'saved_model.pb' in my_files assert os.path.join('variables', 'variables.data-00000-of-00001') in my_files assert os.path.join('variables', 'variables.index') in my_files # Check the tensorflow version self.assertEqual(metadata['dlhub']['dependencies'], {'python': {'tensorflow': tf.__version__}}) # Check whether the 'x' is listed first for the multiple-input model or second my_methods = metadata['servable']['methods'] assert my_methods['run']['input']['type'] == 'ndarray' assert my_methods['run']['input']['shape'] == [None, 3] assert my_methods['run']['input']['item_type'] == {'type': 'float'} assert my_methods['scalar_multiply']['input']['type'] == 'tuple' assert my_methods['scalar_multiply']['input']['element_types'][0]['shape'] == [None, 3] assert my_methods['scalar_multiply']['input']['element_types'][1]['shape'] == [] assert 'length' in my_methods assert 'scalar_multiply' in my_methods # Check the shim assert metadata['servable']['shim'] == 'tensorflow.TensorFlowServable' validate_against_dlhub_schema(metadata, 'servable')
def test_function(): f = math.sqrt # Make the model model = PythonStaticMethodModel.from_function_pointer(f, autobatch=True) model.set_name("static_method").set_title('Python example') # Describe the inputs/outputs model.set_inputs('list', 'List of numbers', item_type='float') model.set_outputs('float', 'Square root of the number') # Generate the output output = model.to_dict() assert output['servable'] == { 'type': 'Python static method', 'shim': 'python.PythonStaticMethodServable', 'options': {}, 'methods': { 'run': { 'input': { 'type': 'list', 'description': 'List of numbers', 'item_type': { 'type': 'float' } }, 'output': { 'type': 'float', 'description': 'Square root of the number' }, 'parameters': {}, 'method_details': { 'module': 'math', 'method_name': 'sqrt', 'autobatch': True } } } } validate_against_dlhub_schema(output, 'servable')
def test_multiargs_pickle(tmpdir): # Make an example class x = ExampleClass(2) # Save a pickle filename = str(tmpdir / 'test.pkl') with open(filename, 'wb') as fp: pkl.dump(x, fp) # Make the metadata file model = PythonClassMethodModel.create_model(filename, 'f') model.set_title('Example function') model.set_name('function') model.set_inputs('tuple', 'inputs', element_types=[compose_argument_block('float', 'Number')] * 2) model.set_outputs('float', 'Output') model.set_unpack_inputs(True) # Make the servable validate_against_dlhub_schema(model.to_dict(), 'servable') servable = PythonClassMethodServable(**model.to_dict()) # Test the servable assert 4 == servable.run([1, 2])[0]
def test_torch_single_input(tmpdir): # Make a Keras model model = _make_simple_model() # Save it to disk model_path = os.path.join(tmpdir, 'model.pt') torch.save(model, model_path) # Create a model metadata = TorchModel.create_model(model_path, (2, 4), (3, 5)) metadata.set_title('Torch Test') metadata.set_name('mlp') output = metadata.to_dict() assert output["dlhub"] == { "version": __version__, "domains": [], "visible_to": ['public'], 'type': 'servable', "name": "mlp", "files": { "model": model_path }, "dependencies": { "python": { 'torch': torch.__version__ } } } print("\n" + output["servable"]["model_summary"]) assert output["servable"] == { "methods": { "run": { "input": { "type": "ndarray", "description": "Tensor", "shape": [2, 4], "item_type": { "type": "float" } }, "output": { "type": "ndarray", "description": "Tensor", "shape": [3, 5], "item_type": { "type": "float" } }, "parameters": {}, "method_details": { "method_name": "__call__" } } }, "type": "Torch Model", "shim": "torch.TorchServable", "model_type": "Deep NN", "model_summary": """Net( (conv1): Conv2d(1, 20, kernel_size=(5, 5), stride=(1, 1)) (conv2): Conv2d(20, 50, kernel_size=(5, 5), stride=(1, 1)) (fc1): Linear(in_features=800, out_features=500, bias=True) (fc2): Linear(in_features=500, out_features=10, bias=True) )""", "options": {} } # Validate against schema validate_against_dlhub_schema(output, 'servable')
def test_pipeline(self): """Make a pipeline composed of two numpy steps""" # Generate the two steps step1 = PythonStaticMethodModel.create_model('numpy', 'max', function_kwargs={'axis': 1})\ .set_name('step1') step2 = PythonStaticMethodModel.create_model('numpy', 'mean').set_name('step2') # Make the pipeline pipeline = PipelineModel().set_title( 'Average of Column Maximums').set_name('numpy_test') pipeline.add_step('username', step1.name, 'Maximum of each column', {'axis': 0}) pipeline.add_step('username', step2.name, 'Average of the maximums') # Generate the pipeline metadata metadata = pipeline.to_dict() correct_metadata = { 'datacite': { 'creators': [], 'titles': [{ 'title': 'Average of Column Maximums' }], 'publisher': 'DLHub', 'publicationYear': _year, 'identifier': { 'identifier': '10.YET/UNASSIGNED', 'identifierType': 'DOI' }, 'resourceType': { 'resourceTypeGeneral': 'InteractiveResource' }, "descriptions": [], "fundingReferences": [], "relatedIdentifiers": [], "alternateIdentifiers": [], "rightsList": [] }, 'dlhub': { 'version': __version__, 'domains': [], 'visible_to': ['public'], 'name': 'numpy_test', 'type': 'pipeline', 'files': {} }, 'pipeline': { 'steps': [{ 'author': 'username', 'name': step1.name, 'description': 'Maximum of each column', 'parameters': { 'axis': 0 } }, { 'author': 'username', 'name': step2.name, 'description': 'Average of the maximums' }] } } self.assertEqual(metadata, correct_metadata) validate_against_dlhub_schema(metadata, 'pipeline')
def test_pickle(self): pickle_path = os.path.abspath(os.path.join(os.path.dirname(__file__), 'model.pkl')) # Make the model model = PythonClassMethodModel.create_model(pickle_path, 'predict_proba', {'fake': 'kwarg'}) model.set_title('Python example').set_name("class_method") # Make sure it throws value errors if inputs are not set with self.assertRaises(ValueError): model.to_dict() # Define the input and output types model.set_inputs('ndarray', 'Features for each entry', shape=[None, 4]) model.set_outputs('ndarray', 'Predicted probabilities of being each iris species', shape=[None, 3]) # Make sure attempting to set "unpack" fails with self.assertRaises(ValueError): model.set_unpack_inputs(True) # Add some requirements model.add_requirement('scikit-learn', 'detect') model.add_requirement('numpy', 'detect') model.add_requirement('sklearn', 'latest') # Dummy project, version # shouldn't change # Check the model output output = model.to_dict() correct_output = { 'datacite': { 'creators': [], 'titles': [{ 'title': 'Python example' }], 'publisher': 'DLHub', 'resourceType': { 'resourceTypeGeneral': 'InteractiveResource' }, 'identifier': { 'identifier': '10.YET/UNASSIGNED', 'identifierType': 'DOI' }, 'publicationYear': _year, "descriptions": [], "fundingReferences": [], "relatedIdentifiers": [], "alternateIdentifiers": [], "rightsList": [] }, 'dlhub': { 'version': __version__, 'domains': [], 'visible_to': ['public'], 'name': 'class_method', 'type': 'servable', 'files': { 'pickle': pickle_path }, 'dependencies': { 'python': { 'scikit-learn': skl_version, 'numpy': numpy_version, 'sklearn': '0.0' } } }, 'servable': { 'type': 'Python class method', 'shim': 'python.PythonClassMethodServable', 'methods': { 'run': { 'input': { 'type': 'ndarray', 'description': 'Features for each entry', 'shape': [None, 4] }, 'output': { 'type': 'ndarray', 'description': 'Predicted probabilities of being each iris species', 'shape': [None, 3] }, 'parameters': { 'fake': 'kwarg' }, 'method_details': { 'class_name': 'sklearn.svm.classes.SVC', 'method_name': 'predict_proba' }, } } } } self.assertEqual(output, correct_output) self.assertEqual([pickle_path], model.list_files()) validate_against_dlhub_schema(output, 'servable')
def test_keras_single_input(self): # Make a Keras model model = _make_simple_model() # Save it to disk tempdir = mkdtemp() try: model_path = os.path.join(tempdir, 'model.hd5') model.save(model_path) # Create a model metadata = KerasModel.create_model(model_path, ["y"]) metadata.set_title('Keras Test') metadata.set_name('mlp') output = metadata.to_dict() self.assertEqual( output, { "datacite": { "creators": [], "titles": [{ "title": "Keras Test" }], "publisher": "DLHub", "publicationYear": _year, "identifier": { "identifier": "10.YET/UNASSIGNED", "identifierType": "DOI" }, "resourceType": { "resourceTypeGeneral": "InteractiveResource" }, "descriptions": [], "fundingReferences": [], "relatedIdentifiers": [], "alternateIdentifiers": [], "rightsList": [] }, "dlhub": { "version": __version__, "domains": [], "visible_to": ["public"], 'type': 'servable', "name": "mlp", "files": { "model": model_path }, "dependencies": { "python": { 'keras': keras_version, 'h5py': h5py_version, 'tensorflow': tf_version } } }, "servable": { "methods": { "run": { "input": { "type": "ndarray", "description": "Tensor", "shape": [None, 1] }, "output": { "type": "ndarray", "description": "Tensor", "shape": [None, 1] }, "parameters": {}, "method_details": { "method_name": "predict", "classes": ["y"] } } }, "type": "Keras Model", "shim": "keras.KerasServable", "model_type": "Deep NN", "model_summary": """_________________________________________________________________ Layer (type) Output Shape Param # ================================================================= hidden (Dense) (None, 16) 32 _________________________________________________________________ output (Dense) (None, 1) 17 ================================================================= Total params: 49 Trainable params: 49 Non-trainable params: 0 _________________________________________________________________ """ } }) # noqa: W291 (trailing whitespace needed for text match) # Validate against schema validate_against_dlhub_schema(output, 'servable') finally: shutil.rmtree(tempdir)
def test_tabular_dataset(self): data_path = os.path.abspath( os.path.join(os.path.dirname(__file__), 'test.csv')) m = TabularDataset.create_model(data_path) # Add some nonsense m.set_title('Example dataset') m.set_name('example_dataset') with self.assertRaises(ValueError): m.set_name('has whitespace') m.mark_inputs(['x']) m.mark_labels(['y']) m.annotate_column('x', description='Input variable', units='cm') m.annotate_column('y', data_type='scalar') self.assertEqual( m.to_dict(), { "datacite": { "titles": [{ 'title': "Example dataset" }], "creators": [], "publisher": "DLHub", "resourceType": { "resourceTypeGeneral": "Dataset" }, "publicationYear": _year, 'identifier': { 'identifier': '10.YET/UNASSIGNED', 'identifierType': 'DOI' }, "descriptions": [], "fundingReferences": [], "relatedIdentifiers": [], "alternateIdentifiers": [], "rightsList": [], }, "dlhub": { "version": __version__, "visible_to": ["public"], "domains": [], "name": "example_dataset", "type": "dataset", "files": { 'data': data_path } }, "dataset": { "columns": [{ "name": "x", "description": "Input variable", "type": "integer", "units": "cm" }, { "name": "y", "type": "scalar" }], "inputs": ["x"], "labels": ["y"], "format": "csv", "read_options": {} } }) validate_against_dlhub_schema(m.to_dict(), "dataset") # Test the simplification of files metadata = m.to_dict(simplify_paths=True) self.assertEqual({'data': 'test.csv'}, metadata['dlhub']['files']) # Make sure the paths saved in the object have not changed self.assertEqual({'data': data_path}, m['dlhub']['files'])
def test_load_model(self): model_path = os.path.abspath( os.path.join(os.path.dirname(__file__), 'model.pkl')) # Load the model model_info = ScikitLearnModel.create_model(model_path, n_input_columns=4, classes=3) model_info.set_title('Sklearn example').set_name('sklearn') expected = { 'datacite': { 'creators': [], 'publisher': 'DLHub', 'titles': [{ 'title': 'Sklearn example' }], 'resourceType': { 'resourceTypeGeneral': "InteractiveResource" }, 'identifier': { 'identifier': '10.YET/UNASSIGNED', 'identifierType': 'DOI' }, 'publicationYear': _year, "descriptions": [], "fundingReferences": [], "relatedIdentifiers": [], "alternateIdentifiers": [], "rightsList": [] }, "dlhub": { "version": __version__, "visible_to": ["public"], 'name': 'sklearn', "domains": [], 'type': 'servable', 'files': { 'model': model_path }, 'dependencies': { 'python': { 'scikit-learn': '0.19.1' } } }, 'servable': { 'type': 'Scikit-learn estimator', 'model_type': 'SVC', 'shim': 'sklearn.ScikitLearnServable', 'methods': { 'run': { "input": { "type": "ndarray", "shape": [None, 4], "description": ("List of records to evaluate with model. " "Each record is a list of 4 variables."), "item_type": { "type": "float" } }, "output": { "type": "ndarray", "shape": [None, 3], "description": "Probabilities for membership in each of 3 classes", "item_type": { "type": "float" } }, "parameters": {}, "method_details": { "method_name": "_predict_proba" } } }, 'model_summary': ("SVC(C=1, cache_size=200, class_weight=None, coef0=0.0,\n" " decision_function_shape='ovr', degree=3, gamma='auto', " "kernel='linear',\n" " max_iter=-1, probability=True, random_state=None," " shrinking=True, tol=0.001,\n" " verbose=False)"), 'options': { 'is_classifier': True, 'serialization_method': 'pickle', 'classes': ['Class 1', 'Class 2', 'Class 3'] } } } self.assertEquals(model_info.to_dict(), expected) validate_against_dlhub_schema(model_info.to_dict(), 'servable')
model.set_title("Classification Model for AMDET Properties") model.set_name("deep-smiles_model") model.set_abstract( "A deep learning model that predicts AMDET properties given a SMILES string of a molecule." ) model.add_alternate_identifier("https://github.com/MengyuanZhu/Deep-SMILES", "URL") # Add requirements model.add_requirement('tensorflow', 'detect') model.add_requirement('keras', 'detect') # Sanity Check: Make sure it fits the schema metadata = model.to_dict() print(json.dumps(metadata, indent=2)) validate_against_dlhub_schema(metadata, 'servable') with open('model_metadata.json', 'w') as fp: json.dump(metadata, fp, indent=2) # Describe the encoding step # The first step is to turn a string into a list of integers string_length = model.input['shape'][-1] model = PythonStaticMethodModel('app', 'encode_string', function_kwargs={'length': string_length}, autobatch=True) # Describe the inputs and outputs model.set_inputs('list', 'List of SMILES strings', item_type='string') model.set_outputs('list', 'List of encoded strings.',
def test_dataset(self): m = Dataset().set_authors(["Ward, Logan"], ["University of Chicago"])\ .set_title("Example dataset").add_alternate_identifier("10.11", "DOI")\ .add_related_identifier("10.11", "DOI", 'IsDescribedBy')\ .add_funding_reference("ANL LDRD", '1', 'ISNI', '201801', 'DLHub', 'http://funding.uri')\ .set_version(1)\ .add_rights("https://www.gnu.org/licenses/gpl-3.0.en.html", "GPL v3.0")\ .set_abstract("Abstract").set_methods("Methods")\ .set_visibility(['public']).set_domains(["materials science"]).set_name("example_data") correct_entry = { "datacite": { "creators": [{ "givenName": "Logan", "familyName": "Ward", "affiliations": "University of Chicago" }], "titles": [{ 'title': "Example dataset" }], "publisher": 'DLHub', "publicationYear": _year, "version": '1', "resourceType": { "resourceTypeGeneral": "Dataset" }, "descriptions": [{ "description": "Abstract", "descriptionType": "Abstract" }, { "description": "Methods", "descriptionType": "Methods" }], "fundingReferences": [{ "awardNumber": { "awardNumber": "201801", "awardURI": "http://funding.uri" }, "awardTitle": "DLHub", "funderIdentifier": { 'funderIdentifier': '1', 'funderIdentifierType': 'ISNI' }, "funderName": "ANL LDRD" }], "relatedIdentifiers": [{ "relatedIdentifier": "10.11", "relatedIdentifierType": "DOI", "relationType": "IsDescribedBy" }], "alternateIdentifiers": [{ "alternateIdentifier": "10.11", "alternateIdentifierType": "DOI" }], "rightsList": [{ "rightsURI": "https://www.gnu.org/licenses/gpl-3.0.en.html", "rights": "GPL v3.0" }], 'identifier': { 'identifier': '10.YET/UNASSIGNED', 'identifierType': 'DOI' }, }, "dlhub": { "version": __version__, "visible_to": ["public"], "domains": ["materials science"], "name": "example_data", 'type': 'dataset', "files": {} }, "dataset": {} } self.assertEqual(m.to_dict(), correct_entry) validate_against_dlhub_schema(m.to_dict(), "dataset")
def test_function(self): f = math.sqrt # Make the model model = PythonStaticMethodModel.from_function_pointer(f, autobatch=True) model.set_name("static_method").set_title('Python example') # Describe the inputs/outputs model.set_inputs('list', 'List of numbers', item_type='float') model.set_outputs('float', 'Square root of the number') # Generate the output output = model.to_dict() correct_output = { 'datacite': { 'creators': [], 'titles': [{ 'title': 'Python example' }], 'publisher': 'DLHub', 'resourceType': { 'resourceTypeGeneral': 'InteractiveResource' }, 'identifier': { 'identifier': '10.YET/UNASSIGNED', 'identifierType': 'DOI' }, 'publicationYear': _year, "descriptions": [], "fundingReferences": [], "relatedIdentifiers": [], "alternateIdentifiers": [], "rightsList": [] }, 'dlhub': { 'version': __version__, 'domains': [], 'visible_to': ['public'], "name": "static_method", 'type': 'servable', 'files': {} }, 'servable': { 'type': 'Python static method', 'shim': 'python.PythonStaticMethodServable', 'methods': { 'run': { 'input': { 'type': 'list', 'description': 'List of numbers', 'item_type': { 'type': 'float' } }, 'output': { 'type': 'float', 'description': 'Square root of the number' }, 'parameters': {}, 'method_details': { 'module': 'math', 'method_name': 'sqrt', 'autobatch': True } } } } } self.assertEqual(output, correct_output) validate_against_dlhub_schema(output, 'servable')
def dump_metadata(model, path): metadata = model.to_dict() print(json.dumps(metadata, indent=2)) validate_against_dlhub_schema(metadata, 'servable') with open(path, 'w') as fp: json.dump(metadata, fp, indent=2)