def test_model_info(): with TempDir(chdr=True) as tmp: sig = ModelSignature( inputs=Schema([ColSpec("integer", "x"), ColSpec("integer", "y")]), outputs=Schema([ColSpec(name=None, type="double")]), ) input_example = {"x": 1, "y": 2} experiment_id = mlflow.create_experiment("test") with mlflow.start_run(experiment_id=experiment_id) as run: model_info = Model.log( "some/path", TestFlavor, signature=sig, input_example=input_example ) local_path = _download_artifact_from_uri( "runs:/{}/some/path".format(run.info.run_id), output_path=tmp.path("") ) assert model_info.run_id == run.info.run_id assert model_info.artifact_path == "some/path" assert model_info.model_uri == "runs:/{}/some/path".format(run.info.run_id) loaded_model = Model.load(os.path.join(local_path, "MLmodel")) assert model_info.utc_time_created == loaded_model.utc_time_created assert model_info.model_uuid == loaded_model.model_uuid assert model_info.flavors == { "flavor1": {"a": 1, "b": 2}, "flavor2": {"x": 1, "y": 2}, } path = os.path.join(local_path, model_info.saved_input_example_info["artifact_path"]) x = _dataframe_from_json(path) assert x.to_dict(orient="records")[0] == input_example assert model_info.signature_dict == sig.to_dict()
def test_model_signature(): signature1 = ModelSignature(inputs=Schema( [ColSpec(DataType.boolean), ColSpec(DataType.binary)]), outputs=Schema([ ColSpec(name=None, type=DataType.double), ColSpec(name=None, type=DataType.double) ])) signature2 = ModelSignature(inputs=Schema( [ColSpec(DataType.boolean), ColSpec(DataType.binary)]), outputs=Schema([ ColSpec(name=None, type=DataType.double), ColSpec(name=None, type=DataType.double) ])) assert signature1 == signature2 signature3 = ModelSignature(inputs=Schema( [ColSpec(DataType.boolean), ColSpec(DataType.binary)]), outputs=Schema([ ColSpec(name=None, type=DataType.float), ColSpec(name=None, type=DataType.double) ])) assert signature3 != signature1 as_json = json.dumps(signature1.to_dict()) signature4 = ModelSignature.from_dict(json.loads(as_json)) assert signature1 == signature4 signature5 = ModelSignature(inputs=Schema( [ColSpec(DataType.boolean), ColSpec(DataType.binary)]), outputs=None) as_json = json.dumps(signature5.to_dict()) signature6 = ModelSignature.from_dict(json.loads(as_json)) assert signature5 == signature6
def test_model_save_load(): m = Model(artifact_path="some/path", run_id="123", flavors={ "flavor1": { "a": 1, "b": 2 }, "flavor2": { "x": 1, "y": 2 }, }, signature=ModelSignature( inputs=Schema( [ColSpec("integer", "x"), ColSpec("integer", "y")]), outputs=Schema([ColSpec(name=None, type="double")])), saved_input_example_info={ "x": 1, "y": 2 }) assert m.get_input_schema() == m.signature.inputs assert m.get_output_schema() == m.signature.outputs x = Model(artifact_path="some/other/path", run_id="1234") assert x.get_input_schema() is None assert x.get_output_schema() is None n = Model(artifact_path="some/path", run_id="123", flavors={ "flavor1": { "a": 1, "b": 2 }, "flavor2": { "x": 1, "y": 2 }, }, signature=ModelSignature( inputs=Schema( [ColSpec("integer", "x"), ColSpec("integer", "y")]), outputs=Schema([ColSpec(name=None, type="double")])), saved_input_example_info={ "x": 1, "y": 2 }) n.utc_time_created = m.utc_time_created assert m == n n.signature = None assert m != n with TempDir() as tmp: m.save(tmp.path("model")) o = Model.load(tmp.path("model")) assert m == o assert m.to_json() == o.to_json() assert m.to_yaml() == o.to_yaml()
def test_model_signature_with_colspec_and_tensorspec(): signature1 = ModelSignature(inputs=Schema([ColSpec(DataType.double)])) signature2 = ModelSignature(inputs=Schema([TensorSpec(np.dtype("float"), (-1, 28, 28))])) assert signature1 != signature2 assert signature2 != signature1 signature3 = ModelSignature( inputs=Schema([ColSpec(DataType.double)]), outputs=Schema([TensorSpec(np.dtype("float"), (-1, 28, 28))]), ) signature4 = ModelSignature( inputs=Schema([ColSpec(DataType.double)]), outputs=Schema([ColSpec(DataType.double)]), ) assert signature3 != signature4 assert signature4 != signature3
def test_model_log_with_input_example_succeeds(): with TempDir(chdr=True) as tmp: sig = ModelSignature( inputs=Schema([ ColSpec("integer", "a"), ColSpec("string", "b"), ColSpec("boolean", "c"), ColSpec("string", "d"), ColSpec("datetime", "e"), ]), outputs=Schema([ColSpec(name=None, type="double")]), ) input_example = pd.DataFrame( { "a": np.int32(1), "b": "test string", "c": True, "d": date.today(), "e": np.datetime64("2020-01-01T00:00:00"), }, index=[0], ) local_path, _ = _log_model_with_signature_and_example( tmp, sig, input_example) loaded_model = Model.load(os.path.join(local_path, "MLmodel")) path = os.path.join( local_path, loaded_model.saved_input_example_info["artifact_path"]) x = _dataframe_from_json(path, schema=sig.inputs) # date column will get deserialized into string input_example["d"] = input_example["d"].apply(lambda x: x.isoformat()) assert x.equals(input_example)
def test_model_log_with_databricks_runtime(): dbr = "8.3.x-snapshot-gpu-ml-scala2.12" with TempDir(chdr=True) as tmp, mock.patch( "mlflow.models.model.get_databricks_runtime", return_value=dbr): sig = ModelSignature( inputs=Schema([ColSpec("integer", "x"), ColSpec("integer", "y")]), outputs=Schema([ColSpec(name=None, type="double")]), ) input_example = {"x": 1, "y": 2} local_path, r = _log_model_with_signature_and_example( tmp, sig, input_example) loaded_model = Model.load(os.path.join(local_path, "MLmodel")) assert loaded_model.run_id == r.info.run_id assert loaded_model.artifact_path == "some/path" assert loaded_model.flavors == { "flavor1": { "a": 1, "b": 2 }, "flavor2": { "x": 1, "y": 2 }, } assert loaded_model.signature == sig path = os.path.join( local_path, loaded_model.saved_input_example_info["artifact_path"]) x = _dataframe_from_json(path) assert x.to_dict(orient="records")[0] == input_example assert loaded_model.databricks_runtime == dbr
def test_model_log(): with TempDir(chdr=True) as tmp: sig = ModelSignature( inputs=Schema([ColSpec("integer", "x"), ColSpec("integer", "y")]), outputs=Schema([ColSpec(name=None, type="double")]), ) input_example = {"x": 1, "y": 2} local_path, r = _log_model_with_signature_and_example( tmp, sig, input_example) loaded_model = Model.load(os.path.join(local_path, "MLmodel")) assert loaded_model.run_id == r.info.run_id assert loaded_model.artifact_path == "some/path" assert loaded_model.flavors == { "flavor1": { "a": 1, "b": 2 }, "flavor2": { "x": 1, "y": 2 }, } assert loaded_model.signature == sig path = os.path.join( local_path, loaded_model.saved_input_example_info["artifact_path"]) x = _dataframe_from_json(path) assert x.to_dict(orient="records")[0] == input_example assert not hasattr(loaded_model, "databricks_runtime")
def train( proj_name: str, Model: str, dataset_cls: str, net_fn: str, net_args: Dict, dataset_args: Dict, ): """ Train Function """ dataset_module = importlib.import_module( f"manythings.data.dta_{dataset_cls}") dataset_cls_ = getattr(dataset_module, dataset_cls) network_module = importlib.import_module(f"manythings.networks.{net_fn}") network_fn_ = getattr(network_module, net_fn) model_module = importlib.import_module(f"manythings.models.{Model}") model_cls_ = getattr(model_module, Model) config = { "model": Model, "dataset_cls": dataset_cls, "net_fn": net_fn, "net_args": net_args, "dataset_args": dataset_args } input_schema = Schema([ TensorSpec(np.dtype(np.uint8), (-1, 71), "encoder_input"), TensorSpec(np.dtype(np.uint8), (-1, 93), "decoder_input") ]) output_schema = Schema([TensorSpec(np.dtype(np.float32), (-1, 93))]) signature = ModelSignature(inputs=input_schema, outputs=output_schema) data = dataset_cls_() data.load_or_generate() data.preprocess() with wandb.init(project=proj_name, config=config): """""" config = wandb.config model = model_cls_(dataset_cls_, network_fn_, net_args, dataset_args) callbacks = [ WandbCallback( # training_data=( # [data.encoder_input_data, data.decoder_input_data], # data.decoder_target_data # ), # log_weights=True, # log_gradients=True ) ] model.fit(callbacks=callbacks) mlflow.keras.save_model(model.network, "saved_models/seq2seq", signature=signature)
def test_model_load_input_example_failures(): with TempDir(chdr=True) as tmp: input_example = np.array([[3, 4, 5]], dtype=np.int32) sig = ModelSignature( inputs=Schema([ TensorSpec(type=input_example.dtype, shape=input_example.shape) ]), outputs=Schema([ColSpec(name=None, type="double")]), ) local_path, _ = _log_model_with_signature_and_example( tmp, sig, input_example) loaded_model = Model.load(os.path.join(local_path, "MLmodel")) loaded_example = loaded_model.load_input_example(local_path) assert loaded_example is not None with pytest.raises(FileNotFoundError, match="No such file or directory"): loaded_model.load_input_example( os.path.join(local_path, "folder_which_does_not_exist")) path = os.path.join( local_path, loaded_model.saved_input_example_info["artifact_path"]) os.remove(path) with pytest.raises(FileNotFoundError, match="No such file or directory"): loaded_model.load_input_example(local_path)
def from_dict(cls, model_dict): """Load a model from its YAML representation.""" if "signature" in model_dict and isinstance(model_dict["signature"], dict): model_dict = model_dict.copy() model_dict["signature"] = ModelSignature.from_dict(model_dict["signature"]) return cls(**model_dict)
def test_model_load_input_example_no_signature(): with TempDir(chdr=True) as tmp: input_example = np.array([[3, 4, 5]], dtype=np.int32) sig = ModelSignature( inputs=Schema([TensorSpec(type=input_example.dtype, shape=input_example.shape)]), outputs=Schema([ColSpec(name=None, type="double")]), ) local_path, _ = _log_model_with_signature_and_example(tmp, sig, input_example=None) loaded_model = Model.load(os.path.join(local_path, "MLmodel")) loaded_example = loaded_model.load_input_example(local_path) assert loaded_example is None
def test_model_load_input_example_scipy(): with TempDir(chdr=True) as tmp: input_example = csc_matrix(np.arange(0, 12, 0.5).reshape(3, 8)) sig = ModelSignature( inputs=Schema([TensorSpec(type=input_example.data.dtype, shape=input_example.shape)]), outputs=Schema([ColSpec(name=None, type="double")]), ) local_path, _ = _log_model_with_signature_and_example(tmp, sig, input_example) loaded_model = Model.load(os.path.join(local_path, "MLmodel")) loaded_example = loaded_model.load_input_example(local_path) assert isinstance(loaded_example, csc_matrix) assert np.array_equal(input_example.data, loaded_example.data)
def test_model_log_with_input_example_succeeds(): with TempDir(chdr=True) as tmp: experiment_id = mlflow.create_experiment("test") sig = ModelSignature( inputs=Schema([ ColSpec("integer", "a"), ColSpec("string", "b"), ColSpec("boolean", "c"), ColSpec("string", "d"), ColSpec("datetime", "e"), ]), outputs=Schema([ColSpec(name=None, type="double")]), ) input_example = pd.DataFrame( { "a": np.int32(1), "b": "test string", "c": True, "d": date.today(), "e": np.datetime64("2020-01-01T00:00:00"), }, index=[0], ) with mlflow.start_run(experiment_id=experiment_id) as r: Model.log("some/path", TestFlavor, signature=sig, input_example=input_example) local_path = _download_artifact_from_uri("runs:/{}/some/path".format( r.info.run_id), output_path=tmp.path("")) loaded_model = Model.load(os.path.join(local_path, "MLmodel")) path = os.path.join( local_path, loaded_model.saved_input_example_info["artifact_path"]) x = _dataframe_from_json(path, schema=sig.inputs) # date column will get deserialized into string input_example["d"] = input_example["d"].apply(lambda x: x.isoformat()) assert x.equals(input_example)
trainY = tf.keras.utils.to_categorical(train_Y) testY = tf.keras.utils.to_categorical(test_Y) model = tf.keras.models.Sequential() model.add(Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_uniform', input_shape=(28, 28, 1))) model.add(MaxPooling2D((2, 2))) model.add(Flatten()) model.add(Dense(100, activation='relu', kernel_initializer='he_uniform')) model.add(Dense(10, activation='softmax')) opt = SGD(lr=0.01, momentum=0.9) model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy']) model.fit(trainX, trainY, epochs=1, batch_size=32, validation_data=(testX, testY)) input_schema = Schema([ TensorSpec(np.dtype(np.uint8), (-1, 28, 28, 1)), ]) output_schema = Schema([TensorSpec(np.dtype(np.float32), (-1, 10))]) signature = ModelSignature(inputs=input_schema, outputs=output_schema) input_example = np.array([ [[ 0, 0, 0, 0], [ 0, 134, 25, 56], [253, 242, 195, 6], [ 0, 93, 82, 82]], [[ 0, 23, 46, 0], [ 33, 13, 36, 166], [ 76, 75, 0, 255], [ 33, 44, 11, 82]] ], dtype=np.uint8) mlflow.keras.log_model(model, "mnist_cnn", signature=signature, input_example=input_example)
def test_model_signature_with_tensorspec(): signature1 = ModelSignature( inputs=Schema([TensorSpec(np.dtype("float"), (-1, 28, 28))]), outputs=Schema([TensorSpec(np.dtype("float"), (-1, 10))]), ) signature2 = ModelSignature( inputs=Schema([TensorSpec(np.dtype("float"), (-1, 28, 28))]), outputs=Schema([TensorSpec(np.dtype("float"), (-1, 10))]), ) # Single type mismatch assert signature1 == signature2 signature3 = ModelSignature( inputs=Schema([TensorSpec(np.dtype("float"), (-1, 28, 28))]), outputs=Schema([TensorSpec(np.dtype("int"), (-1, 10))]), ) assert signature3 != signature1 # Name mismatch signature4 = ModelSignature( inputs=Schema([TensorSpec(np.dtype("float"), (-1, 28, 28))]), outputs=Schema([TensorSpec(np.dtype("float"), (-1, 10), "misMatch")]), ) assert signature3 != signature4 as_json = json.dumps(signature1.to_dict()) signature5 = ModelSignature.from_dict(json.loads(as_json)) assert signature1 == signature5 # Test with name signature6 = ModelSignature( inputs=Schema([ TensorSpec(np.dtype("float"), (-1, 28, 28), name="image"), TensorSpec(np.dtype("int"), (-1, 10), name="metadata"), ]), outputs=Schema( [TensorSpec(np.dtype("float"), (-1, 10), name="outputs")]), ) signature7 = ModelSignature( inputs=Schema([ TensorSpec(np.dtype("float"), (-1, 28, 28), name="image"), TensorSpec(np.dtype("int"), (-1, 10), name="metadata"), ]), outputs=Schema( [TensorSpec(np.dtype("float"), (-1, 10), name="outputs")]), ) assert signature6 == signature7 assert signature1 != signature6 # Test w/o output signature8 = ModelSignature(inputs=Schema( [TensorSpec(np.dtype("float"), (-1, 28, 28))]), outputs=None) as_json = json.dumps(signature8.to_dict()) signature9 = ModelSignature.from_dict(json.loads(as_json)) assert signature8 == signature9
def _infer_signature(onnx_model): onnx_model_bytes = onnx_model.SerializeToString() onnx_runtime = onnxruntime.InferenceSession(onnx_model_bytes) inputs = _infer_schema(onnx_runtime.get_inputs()) outputs = _infer_schema(onnx_runtime.get_outputs()) return ModelSignature(inputs, outputs)