def test_repo2docker(): odir = os.getcwd() try: os.chdir(repotestdir) # Make sure the servable finds the repo2docker files model = PythonStaticMethodModel() model.parse_repo2docker_configuration() assert {os.path.join(repotestdir, 'Dockerfile'), os.path.join(repotestdir, 'postBuild')} == \ set(model.dlhub.files['other']) finally: os.chdir(odir)
def test_static(): # Make an example static method model = PythonStaticMethodModel.create_model('numpy', 'max', autobatch=False, function_kwargs={'axis': 0}) model.set_title('Example function') model.set_name('function') model.set_inputs('ndarray', 'Matrix', shape=[None, None]) model.set_outputs('ndarray', 'Max of a certain axis', shape=[None]) # Make the servable servable = PythonStaticMethodServable(**model.to_dict()) # Test it out, _ = servable.run([[1, 2], [3, 4]]) assert np.isclose([3, 4], out).all() # Test giving it parameters assert np.isclose([2, 4], servable.run([[1, 2], [3, 4]], parameters=dict(axis=1))[0]).all() # Test the autobatch model.servable.methods['run'].method_details['autobatch'] = True servable = PythonStaticMethodServable(**model.to_dict()) out, _ = servable.run([[1, 2], [3, 4]]) assert np.isclose([2, 4], out).all()
def shim(): model = PythonStaticMethodModel.from_function_pointer(run) model.set_name('test') model.set_title('Test') model.set_inputs('bool', 'Whether to run successfully') model.set_outputs('bool', 'Should always be True') return PythonStaticMethodServable(**model.to_dict())
def test_repo2docker(self): odir = os.getcwd() try: os.chdir(repotestdir) # Make sure the servable finds the repo2docker files model = PythonStaticMethodModel() model.parse_repo2docker_configuration() self.assertEquals( { os.path.join(repotestdir, 'Dockerfile'), os.path.join(repotestdir, 'postBuild') }, set(model['dlhub']['files']['other'])) finally: os.chdir(odir)
def test_visibility(): model = PythonStaticMethodModel.create_model('numpy.linalg', 'norm') model.set_name('1d_norm') model.set_title('Norm of a 1D Array') model.set_inputs('ndarray', 'Array to be normed', shape=[None]) model.set_outputs('number', 'Norm of the array') model.set_visibility(users=['bec215bc-9169-4be9-af49-4872b5e11ef8' ]) # Setting visibility to a user validate_against_dlhub_schema(model.to_dict(), 'servable') assert model.dlhub.visible_to[0].startswith('urn:globus:auth:identity:') model.set_visibility(groups=['fdb38a24-03c1-11e3-86f7-12313809f035' ]) # Setting visibility to a group validate_against_dlhub_schema(model.to_dict(), 'servable') assert len( model.dlhub.visible_to) == 1 # Ensure was replaced, not appended assert model.dlhub.visible_to[0].startswith('urn:globus:groups:id:') model.set_visibility(users=['foo']) # Test using a non-UUID for user with raises(ValidationError): validate_against_dlhub_schema(model.to_dict(), 'servable') model.set_visibility() # Default visibility is "public" validate_against_dlhub_schema(model.to_dict(), 'servable') assert model.dlhub.visible_to == ['public']
def test_submit(self): # Make an example function model = PythonStaticMethodModel.create_model('numpy.linalg', 'norm') model.set_name('1d_norm') model.set_title('Norm of a 1D Array') model.set_inputs('ndarray', 'Array to be normed', shape=[None]) model.set_outputs('number', 'Norm of the array') # Submit the model self.dl.publish_servable(model)
def test_loader(self): # Make an example static method model = PythonStaticMethodModel.create_model( 'numpy', 'max', autobatch=False, function_kwargs={'axis': 0}) model.set_title('Example function') model.set_name('function') model.set_inputs('ndarray', 'Matrix', shape=[None, None]) model.set_outputs('ndarray', 'Max of a certain axis', shape=[None]) # Test the loader servable = create_servable(model.to_dict()) self.assertIsInstance(servable, PythonStaticMethodServable) self.assertEqual([0], servable.run([0, -1])[0])
def test_multiarg(self): """Test making descriptions with more than one argument""" # Initialize the model model = PythonStaticMethodModel.from_function_pointer(max) model.set_name('test').set_title('test') # Define the inputs and outputs model.set_inputs('tuple', 'Two numbers', element_types=[ compose_argument_block('float', 'A number'), compose_argument_block('float', 'A second number') ]) model.set_outputs('float', 'Maximum of the two numbers') # Mark that the inputs should be unpacked model.set_unpack_inputs(True) # Check the description self.assertEqual( model['servable']['methods']['run'], { 'input': { 'type': 'tuple', 'description': 'Two numbers', 'element_types': [{ 'type': 'float', 'description': 'A number' }, { 'type': 'float', 'description': 'A second number' }] }, 'output': { 'type': 'float', 'description': 'Maximum of the two numbers' }, 'method_details': { 'module': 'builtins', 'method_name': 'max', 'unpack': True, 'autobatch': False }, 'parameters': {} }) validate_against_dlhub_schema(model.to_dict(), 'servable')
def test_multiargs(): # Make the maximum function model = PythonStaticMethodModel.from_function_pointer(max) \ .set_name('test').set_title('test') # Describe the inputs model.set_inputs('tuple', 'Two numbers', element_types=[ compose_argument_block('float', 'A number'), compose_argument_block('float', 'A second number') ]) model.set_outputs('float', 'Maximum of the two numbers') model.set_unpack_inputs(True) # Make sure the shim works servable = PythonStaticMethodServable(**model.to_dict()) assert servable.run((1, 2))[0] == 2
def test_single_file_input(tmpdir): # Make the metadata model model = PythonStaticMethodModel.from_function_pointer(os.path.isfile).set_name('test') model.set_title('test') model.set_inputs('file', 'A file') model.set_outputs('boolean', 'Whether it exists') # Make the servable servable = PythonStaticMethodServable(**model.to_dict()) # Run on local file assert servable.run({'url': __file__})[0] if system() != 'Windows': assert servable.run({'url': 'file:///' + __file__})[0] # Run on remote file assert servable.run({ 'url': 'https://www.google.com/images/branding/googlelogo/1x/googlelogo_color_272x92dp.png'} )[0]
def test_multiargs_autobatch(): # Make the maximum function model = PythonStaticMethodModel.from_function_pointer(max, autobatch=True) \ .set_name('test').set_title('test') # Describe the inputs model.set_inputs('list', 'List of pairs of numbers', item_type=compose_argument_block( 'tuple', 'Two numbers', element_types=[ compose_argument_block('float', 'A number'), compose_argument_block('float', 'A second number') ])) model.set_outputs('list', 'Maximum of each pair', item_type='float') model.set_unpack_inputs(True) # Make sure the shim works servable = PythonStaticMethodServable(**model.to_dict()) out, _ = servable.run([(1, 2)]) assert out == [2]
def test_single_file_list_input(): # Make the metadata model model = PythonStaticMethodModel.from_function_pointer(os.path.isfile, autobatch=True) model.set_name('test') model.set_title('test') model.set_inputs('list', 'List of files', item_type='file') model.set_outputs('list', 'Whether each file exists', item_type='boolean') # Make the servable servable = PythonStaticMethodServable(**model.to_dict()) # Run on local file assert servable.run([{'url': __file__}])[0] if system() != 'Windows': assert servable.run([{'url': 'file:///' + __file__}])[0] # Fail on Windows # Run on remote file assert servable.run([{ 'url': 'https://www.google.com/images/branding/googlelogo/1x/googlelogo_color_272x92dp.png' }])[0]
def test_file_multiinput(): model = PythonStaticMethodModel.from_function_pointer(multifile_input) model.set_name('test') model.set_title('test') model.set_inputs('tuple', 'Several things', element_types=[ compose_argument_block('file', 'Single file'), compose_argument_block('list', 'Multiple files', item_type='file'), compose_argument_block('boolean', 'Something random') ]) model.set_outputs('bool', 'Should be True') model.set_unpack_inputs(True) # Make the servable servable = PythonStaticMethodServable(**model.to_dict()) # Test it assert servable.run([ {'url': __file__}, [{'url': __file__}], True ])[0]
def test_function(): f = math.sqrt # Make the model model = PythonStaticMethodModel.from_function_pointer(f, autobatch=True) model.set_name("static_method").set_title('Python example') # Describe the inputs/outputs model.set_inputs('list', 'List of numbers', item_type='float') model.set_outputs('float', 'Square root of the number') # Generate the output output = model.to_dict() assert output['servable'] == { 'type': 'Python static method', 'shim': 'python.PythonStaticMethodServable', 'options': {}, 'methods': { 'run': { 'input': { 'type': 'list', 'description': 'List of numbers', 'item_type': { 'type': 'float' } }, 'output': { 'type': 'float', 'description': 'Square root of the number' }, 'parameters': {}, 'method_details': { 'module': 'math', 'method_name': 'sqrt', 'autobatch': True } } } } validate_against_dlhub_schema(output, 'servable')
def test_function(self): f = math.sqrt # Make the model model = PythonStaticMethodModel.from_function_pointer(f, autobatch=True) model.set_name("static_method").set_title('Python example') # Describe the inputs/outputs model.set_inputs('list', 'List of numbers', item_type='float') model.set_outputs('float', 'Square root of the number') # Generate the output output = model.to_dict() correct_output = { 'datacite': { 'creators': [], 'titles': [{ 'title': 'Python example' }], 'publisher': 'DLHub', 'resourceType': { 'resourceTypeGeneral': 'InteractiveResource' }, 'identifier': { 'identifier': '10.YET/UNASSIGNED', 'identifierType': 'DOI' }, 'publicationYear': _year, "descriptions": [], "fundingReferences": [], "relatedIdentifiers": [], "alternateIdentifiers": [], "rightsList": [] }, 'dlhub': { 'version': __version__, 'domains': [], 'visible_to': ['public'], "name": "static_method", 'type': 'servable', 'files': {} }, 'servable': { 'type': 'Python static method', 'shim': 'python.PythonStaticMethodServable', 'methods': { 'run': { 'input': { 'type': 'list', 'description': 'List of numbers', 'item_type': { 'type': 'float' } }, 'output': { 'type': 'float', 'description': 'Square root of the number' }, 'parameters': {}, 'method_details': { 'module': 'math', 'method_name': 'sqrt', 'autobatch': True } } } } } self.assertEqual(output, correct_output) validate_against_dlhub_schema(output, 'servable')
from dlhub_sdk.models.servables.python import PythonStaticMethodModel from dlhub_sdk.utils.schemas import validate_against_dlhub_schema from PIL import Image import json # Create a model that performs the 'no-opt' operation model = PythonStaticMethodModel.from_function_pointer(Image.open) model.set_title("Image reading function") model.set_name('read_image') model.set_abstract("Reads an image file into an aray") model.set_inputs('file', 'Image file', file_type='image/*') model.set_outputs('ndarray', 'Image contents', shape=[None, None, None]) # Add PIL as a dependency model.add_requirement('PIL') # Sanity Check: Make sure it fits the schema metadata = model.to_dict() validate_against_dlhub_schema(metadata, 'servable') print(json.dumps(metadata, indent=2)) with open('dlhub.json', 'w') as fp: json.dump(metadata, fp, indent=2)
def host_model(model_path, preprocessor_path, training_data_path, model_title, model_name, model_type="scikit-learn"): # Assume the model will be hosted with a list of the input column names # input_columns # Also assume the model will be hosted with the needed preprocessing routine # scaler_path #input_columns = get_input_columns(training_data_path=training_data_path, exclude_columns=exclude_columns) #n_input_columns = len(input_columns) dl = DLHubClient() # Create the model from saved .pkl file. This one was from mastml run if model_type == 'scikit-learn': #model_info = ScikitLearnModel.create_model(model_path, # n_input_columns=n_input_columns, # serialization_method=serialization_method) model = PythonStaticMethodModel.from_function_pointer( run_dlhub_prediction) else: raise ValueError("Only scikit-learn models supported at this time") # Some model descriptive info model.set_name(model_name).set_title(model_title) #model_info.set_domains(["materials science"]) # Describe the inputs/outputs model.set_inputs('list', 'list of material compositions to predict', item_type='string') model.set_outputs(data_type='float', description='Predicted value from trained sklearn model') # Add additional files to model servable- needed to do featurization of predictions using DLHub log.info('Submitting preprocessor file to DLHub:') log.info(os.path.abspath(preprocessor_path)) log.info('Submitting model file to DLHub:') log.info(os.path.abspath(model_path)) log.info('Submitting training data file to DLHub:') log.info(os.path.abspath(training_data_path)) log.info('Submitting mastml directory to DLHub:') log.info(os.path.join(os.path.abspath(mastml.__path__[0]))) # Need to change model, preprocessor names to be standard model.pkl and preprocessor.pkl names. Copy them and change names model_dirname = os.path.dirname(model_path) #shutil.copy(model_path, os.path.join(model_dirname, 'model.pkl')) shutil.copy(model_path, os.path.join(os.getcwd(), 'model.pkl')) preprocessor_dirname = os.path.dirname(preprocessor_path) #shutil.copy(preprocessor_path, os.path.join(preprocessor_dirname, 'preprocessor.pkl')) shutil.copy(preprocessor_path, os.path.join(os.getcwd(), 'preprocessor.pkl')) #model_path = os.path.join(model_dirname, 'model.pkl') #preprocessor_path = os.path.join(preprocessor_dirname, 'preprocessor.pkl') shutil.copy(training_data_path, os.path.join(os.getcwd(), 'selected.csv')) model.add_directory(os.path.join(os.path.abspath(mastml.__path__[0])), recursive=True) #model.add_file(os.path.abspath(model_path)) #model.add_file(os.path.abspath(preprocessor_path)) # Add the preprocessor .pkl file #model.add_file(os.path.abspath(training_data_path)) # Add the training_data .csv file model.add_file('model.pkl') model.add_file('preprocessor.pkl') model.add_file('selected.csv') # Add pip installable dependency for MAST-ML model.add_requirement('mastml', 'latest') res = dl.publish_servable(model) return dl, res
"""Create a pipeline that predicts formation energies of a list of strings""" from dlhub_sdk.models.servables.python import PythonStaticMethodModel from dlhub_sdk.models.pipeline import PipelineModel from pymatgen import Composition import pickle as pkl import json # Load in the model and featurizer steps with open('featurize_info.pkl', 'rb') as fp: feat_info = pkl.load(fp) with open('model_info.pkl', 'rb') as fp: model_info = pkl.load(fp) # Make a new step that takes a list of strings, and returns a list of Python objects convert_info = PythonStaticMethodModel.from_function_pointer(Composition, autobatch=True) convert_info.set_title( "Convert List of Strings to Pymatgen Composition Objects") convert_info.set_inputs("list", "List of strings", item_type="string") convert_info.set_outputs("list", "List of pymatgen composition objects", item_type={ 'type': 'python object', 'python_type': 'pymatgen.core.Composition' }) convert_info.add_requirement('pymatgen', 'latest') # Compile them into a Pipeline pipeline_info = PipelineModel().set_name('delta_e-predictor') pipeline_info.set_title("Predict Formation Enthalpy from Composition") pipeline_info.add_step("username", convert_info.name,
def test_pipeline(self): """Make a pipeline composed of two numpy steps""" # Generate the two steps step1 = PythonStaticMethodModel.create_model('numpy', 'max', function_kwargs={'axis': 1})\ .set_name('step1') step2 = PythonStaticMethodModel.create_model('numpy', 'mean').set_name('step2') # Make the pipeline pipeline = PipelineModel().set_title( 'Average of Column Maximums').set_name('numpy_test') pipeline.add_step('username', step1.name, 'Maximum of each column', {'axis': 0}) pipeline.add_step('username', step2.name, 'Average of the maximums') # Generate the pipeline metadata metadata = pipeline.to_dict() correct_metadata = { 'datacite': { 'creators': [], 'titles': [{ 'title': 'Average of Column Maximums' }], 'publisher': 'DLHub', 'publicationYear': _year, 'identifier': { 'identifier': '10.YET/UNASSIGNED', 'identifierType': 'DOI' }, 'resourceType': { 'resourceTypeGeneral': 'InteractiveResource' }, "descriptions": [], "fundingReferences": [], "relatedIdentifiers": [], "alternateIdentifiers": [], "rightsList": [] }, 'dlhub': { 'version': __version__, 'domains': [], 'visible_to': ['public'], 'name': 'numpy_test', 'type': 'pipeline', 'files': {} }, 'pipeline': { 'steps': [{ 'author': 'username', 'name': step1.name, 'description': 'Maximum of each column', 'parameters': { 'axis': 0 } }, { 'author': 'username', 'name': step2.name, 'description': 'Average of the maximums' }] } } self.assertEqual(metadata, correct_metadata) validate_against_dlhub_schema(metadata, 'pipeline')
from dlhub_sdk.models.servables.python import PythonStaticMethodModel from dlhub_sdk.models.pipeline import PipelineModel from dlhub_sdk.utils.types import compose_argument_block from skimage.transform import resize from skimage.io import imread import pickle as pkl import json # Load in the model and featurizer steps with open('model_info.pkl', 'rb') as fp: model_info = pkl.load(fp) # Make a new step that reads files in from disk read_info = PythonStaticMethodModel.from_function_pointer( imread, autobatch=True, function_kwargs={'as_gray': True}) read_info.set_title("Read in a list of pictures to a grayscale file") read_info.set_name("read_grayscale_image") read_info.set_inputs("list", "List of paths to files", item_type="string") read_info.set_outputs("list", "List of images as ndarrays", item_type=compose_argument_block('ndarray', 'Image', shape=[None, None])) read_info.add_requirement('scikit-image', 'detect') # Make a step to reshape the to 28x28x1 resize_info = PythonStaticMethodModel.from_function_pointer( resize, autobatch=True, function_kwargs={
from dlhub_sdk.models.servables.python import PythonStaticMethodModel from dlhub_sdk.utils.schemas import validate_against_dlhub_schema import json import os # Create a model that invokes the "run" function from the model = PythonStaticMethodModel.create_model('application', 'run') # Describe the inputs and outputs model.set_inputs('list', 'Paths to all images in a dataset', item_type='string') model.set_outputs('ndarray', 'Accumulated result of decoding all the images', shape=[208, 208]) # Add provenance information model.set_title("Deep-Learning Super-resolution Image Reconstruction (DSIR)") model.set_name('dsir') model.set_authors(['Duarte, Alex'], ['The Institute of Photonic Sciences']) model.add_alternate_identifier( "https://github.com/leaxp/Deep-Learning-Super-Resolution-Image-Reconstruction-DSIR", 'URL') # Add requirements model.add_requirement('torch', 'detect') model.add_requirement('torchvision', 'detect') # Add app.py, which holds the noop function, to the list of associated files to be submitted model.add_file("app.py")
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(message)s') # Define test input data mols = ['C', 'CC', 'CCC'] authors = ["Ward, Logan", "Dandu, Naveen", "Blaiszik, Ben", "Narayanan, Badri", "Assary, Rajeev S.", "Redfern, Paul C.", "Foster, Ian", "Curtiss, Larry A."] affil = ["Argonne National Laboratory"] * 3 + ["University of Louisville"] + ["Argonne National Laboratory"] * 4 # Parse the user input parser = ArgumentParser(description='''Post a MPNN-based solvation energy model to DLHub.''') parser.add_argument('--test', help='Just test out the submission and print the metadata', action='store_true') args = parser.parse_args() # Write out the generic components of the model model = PythonStaticMethodModel.from_function_pointer(evaluate_molecules) # Descriptions of the model interface model.set_outputs( 'dict', 'Solvation energy predictions', properties={ 'smiles': compose_argument_block('list', 'List of molecules run with the model', item_type='string'), 'solvation-energies': compose_argument_block( 'ndarray', 'Predicted solvation energy for each molecule in each solvent', shape=[None, None] ), 'dielectric-constants': compose_argument_block('list', 'Dielectric constants for solvents', item_type='float'), 'training-set-distance': compose_argument_block('list', 'Distance to nearest molecules in training set.' ' Normalized based on the distances in the test set', item_type='float'), 'expected-error': compose_argument_block('list', 'Estimated uncertainty in the prediction based on distance' ' from training set',
from dlhub_sdk.models.servables.python import PythonStaticMethodModel from dlhub_sdk.utils.schemas import validate_against_dlhub_schema from dlhub_sdk.utils.types import compose_argument_block import json import os # Create a model that invokes the "run" function from the model = PythonStaticMethodModel.create_model('app', 'run', function_kwargs={'relax': False}) # Describe the inputs and outputs model.set_inputs('string', 'Molecule in XYZ format') model.set_outputs( 'dict', 'Forces and energies of the molecule', properties={ 'energy': compose_argument_block('number', 'Energy of the whole system'), 'forces': compose_argument_block('ndarray', 'Forces acting on each atom in each direction', shape=[None, 3]) }) # Add provenance information model.set_title("SchNet C20 Force and Energy Predictor") model.set_name('schnet_c20') model.set_domains(['physics']) model.set_abstract( "A model based on the SchNet architecture that predicts the energy and forces of a C20 molecule. Useful for molecular dynmaics simulations."
# Add requirements model.add_requirement('tensorflow', 'detect') model.add_requirement('keras', 'detect') # Sanity Check: Make sure it fits the schema metadata = model.to_dict() print(json.dumps(metadata, indent=2)) validate_against_dlhub_schema(metadata, 'servable') with open('model_metadata.json', 'w') as fp: json.dump(metadata, fp, indent=2) # Describe the encoding step # The first step is to turn a string into a list of integers string_length = model.input['shape'][-1] model = PythonStaticMethodModel('app', 'encode_string', function_kwargs={'length': string_length}, autobatch=True) # Describe the inputs and outputs model.set_inputs('list', 'List of SMILES strings', item_type='string') model.set_outputs('list', 'List of encoded strings.', item_type=compose_argument_block( 'list', 'Encoded string. List of integers where each ' 'value is the index of the character in the ' 'library, or 0 if it is padded', item_type='integer')) # Add provenance information model.set_authors(["Zhu, Mengyuan"], ["Georgia State University"]) model.set_title("String Encoder for Classification Model for AMDET Properties") model.set_name("deep-smiles_enocoder")
import os # Make a logger logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(message)s') # Parse the user input parser = ArgumentParser(description='''Publish model to DLHub''') parser.add_argument('--test', help='Just test out the submission and print the metadata', action='store_true') args = parser.parse_args() logging.info(f'Starting publication') # Load in the model tf_model = load_model('model_19-0.00.h5') # Write out the model description model = PythonStaticMethodModel.from_function_pointer(inference) # Descriptions of the model interface model.set_inputs('ndarray', 'Gravity waveform measurement', shape=tf_model.input_shape) model.set_outputs('dict', 'Estimated of properties of merging black holes', properties={ 'q': compose_argument_block('list', 'Mass ratio', item_type='float'), 's1': compose_argument_block('list', 'Spin of primary', item_type='float'), 's2': compose_argument_block('list', 'Spin of primary', item_type='float'), 'S_eff': compose_argument_block('list', 'Effective spin', item_type='float'), 'chi': compose_argument_block('list', 'Effective spin parameter', item_type='float'), } ) # Provenance information for the model model.add_related_identifier("2004.09524", "arXiv", "IsDescribedBy")