Beispiel #1
0
def test_model_info():
    with TempDir(chdr=True) as tmp:
        sig = ModelSignature(
            inputs=Schema([ColSpec("integer", "x"), ColSpec("integer", "y")]),
            outputs=Schema([ColSpec(name=None, type="double")]),
        )
        input_example = {"x": 1, "y": 2}

        experiment_id = mlflow.create_experiment("test")
        with mlflow.start_run(experiment_id=experiment_id) as run:
            model_info = Model.log(
                "some/path", TestFlavor, signature=sig, input_example=input_example
            )
        local_path = _download_artifact_from_uri(
            "runs:/{}/some/path".format(run.info.run_id), output_path=tmp.path("")
        )

        assert model_info.run_id == run.info.run_id
        assert model_info.artifact_path == "some/path"
        assert model_info.model_uri == "runs:/{}/some/path".format(run.info.run_id)

        loaded_model = Model.load(os.path.join(local_path, "MLmodel"))
        assert model_info.utc_time_created == loaded_model.utc_time_created
        assert model_info.model_uuid == loaded_model.model_uuid

        assert model_info.flavors == {
            "flavor1": {"a": 1, "b": 2},
            "flavor2": {"x": 1, "y": 2},
        }

        path = os.path.join(local_path, model_info.saved_input_example_info["artifact_path"])
        x = _dataframe_from_json(path)
        assert x.to_dict(orient="records")[0] == input_example

        assert model_info.signature_dict == sig.to_dict()
def test_model_signature():
    signature1 = ModelSignature(inputs=Schema(
        [ColSpec(DataType.boolean),
         ColSpec(DataType.binary)]),
                                outputs=Schema([
                                    ColSpec(name=None, type=DataType.double),
                                    ColSpec(name=None, type=DataType.double)
                                ]))
    signature2 = ModelSignature(inputs=Schema(
        [ColSpec(DataType.boolean),
         ColSpec(DataType.binary)]),
                                outputs=Schema([
                                    ColSpec(name=None, type=DataType.double),
                                    ColSpec(name=None, type=DataType.double)
                                ]))
    assert signature1 == signature2
    signature3 = ModelSignature(inputs=Schema(
        [ColSpec(DataType.boolean),
         ColSpec(DataType.binary)]),
                                outputs=Schema([
                                    ColSpec(name=None, type=DataType.float),
                                    ColSpec(name=None, type=DataType.double)
                                ]))
    assert signature3 != signature1
    as_json = json.dumps(signature1.to_dict())
    signature4 = ModelSignature.from_dict(json.loads(as_json))
    assert signature1 == signature4
    signature5 = ModelSignature(inputs=Schema(
        [ColSpec(DataType.boolean),
         ColSpec(DataType.binary)]),
                                outputs=None)
    as_json = json.dumps(signature5.to_dict())
    signature6 = ModelSignature.from_dict(json.loads(as_json))
    assert signature5 == signature6
def test_model_save_load():
    m = Model(artifact_path="some/path",
              run_id="123",
              flavors={
                  "flavor1": {
                      "a": 1,
                      "b": 2
                  },
                  "flavor2": {
                      "x": 1,
                      "y": 2
                  },
              },
              signature=ModelSignature(
                  inputs=Schema(
                      [ColSpec("integer", "x"),
                       ColSpec("integer", "y")]),
                  outputs=Schema([ColSpec(name=None, type="double")])),
              saved_input_example_info={
                  "x": 1,
                  "y": 2
              })
    assert m.get_input_schema() == m.signature.inputs
    assert m.get_output_schema() == m.signature.outputs
    x = Model(artifact_path="some/other/path", run_id="1234")
    assert x.get_input_schema() is None
    assert x.get_output_schema() is None

    n = Model(artifact_path="some/path",
              run_id="123",
              flavors={
                  "flavor1": {
                      "a": 1,
                      "b": 2
                  },
                  "flavor2": {
                      "x": 1,
                      "y": 2
                  },
              },
              signature=ModelSignature(
                  inputs=Schema(
                      [ColSpec("integer", "x"),
                       ColSpec("integer", "y")]),
                  outputs=Schema([ColSpec(name=None, type="double")])),
              saved_input_example_info={
                  "x": 1,
                  "y": 2
              })
    n.utc_time_created = m.utc_time_created
    assert m == n
    n.signature = None
    assert m != n
    with TempDir() as tmp:
        m.save(tmp.path("model"))
        o = Model.load(tmp.path("model"))
    assert m == o
    assert m.to_json() == o.to_json()
    assert m.to_yaml() == o.to_yaml()
Beispiel #4
0
def test_model_signature_with_colspec_and_tensorspec():
    signature1 = ModelSignature(inputs=Schema([ColSpec(DataType.double)]))
    signature2 = ModelSignature(inputs=Schema([TensorSpec(np.dtype("float"), (-1, 28, 28))]))
    assert signature1 != signature2
    assert signature2 != signature1

    signature3 = ModelSignature(
        inputs=Schema([ColSpec(DataType.double)]),
        outputs=Schema([TensorSpec(np.dtype("float"), (-1, 28, 28))]),
    )
    signature4 = ModelSignature(
        inputs=Schema([ColSpec(DataType.double)]), outputs=Schema([ColSpec(DataType.double)]),
    )
    assert signature3 != signature4
    assert signature4 != signature3
Beispiel #5
0
def test_model_log_with_input_example_succeeds():
    with TempDir(chdr=True) as tmp:
        sig = ModelSignature(
            inputs=Schema([
                ColSpec("integer", "a"),
                ColSpec("string", "b"),
                ColSpec("boolean", "c"),
                ColSpec("string", "d"),
                ColSpec("datetime", "e"),
            ]),
            outputs=Schema([ColSpec(name=None, type="double")]),
        )
        input_example = pd.DataFrame(
            {
                "a": np.int32(1),
                "b": "test string",
                "c": True,
                "d": date.today(),
                "e": np.datetime64("2020-01-01T00:00:00"),
            },
            index=[0],
        )

        local_path, _ = _log_model_with_signature_and_example(
            tmp, sig, input_example)
        loaded_model = Model.load(os.path.join(local_path, "MLmodel"))
        path = os.path.join(
            local_path, loaded_model.saved_input_example_info["artifact_path"])
        x = _dataframe_from_json(path, schema=sig.inputs)

        # date column will get deserialized into string
        input_example["d"] = input_example["d"].apply(lambda x: x.isoformat())
        assert x.equals(input_example)
Beispiel #6
0
def test_model_log_with_databricks_runtime():
    dbr = "8.3.x-snapshot-gpu-ml-scala2.12"
    with TempDir(chdr=True) as tmp, mock.patch(
            "mlflow.models.model.get_databricks_runtime", return_value=dbr):
        sig = ModelSignature(
            inputs=Schema([ColSpec("integer", "x"),
                           ColSpec("integer", "y")]),
            outputs=Schema([ColSpec(name=None, type="double")]),
        )
        input_example = {"x": 1, "y": 2}
        local_path, r = _log_model_with_signature_and_example(
            tmp, sig, input_example)

        loaded_model = Model.load(os.path.join(local_path, "MLmodel"))
        assert loaded_model.run_id == r.info.run_id
        assert loaded_model.artifact_path == "some/path"
        assert loaded_model.flavors == {
            "flavor1": {
                "a": 1,
                "b": 2
            },
            "flavor2": {
                "x": 1,
                "y": 2
            },
        }
        assert loaded_model.signature == sig
        path = os.path.join(
            local_path, loaded_model.saved_input_example_info["artifact_path"])
        x = _dataframe_from_json(path)
        assert x.to_dict(orient="records")[0] == input_example
        assert loaded_model.databricks_runtime == dbr
Beispiel #7
0
def test_model_log():
    with TempDir(chdr=True) as tmp:
        sig = ModelSignature(
            inputs=Schema([ColSpec("integer", "x"),
                           ColSpec("integer", "y")]),
            outputs=Schema([ColSpec(name=None, type="double")]),
        )
        input_example = {"x": 1, "y": 2}
        local_path, r = _log_model_with_signature_and_example(
            tmp, sig, input_example)

        loaded_model = Model.load(os.path.join(local_path, "MLmodel"))
        assert loaded_model.run_id == r.info.run_id
        assert loaded_model.artifact_path == "some/path"
        assert loaded_model.flavors == {
            "flavor1": {
                "a": 1,
                "b": 2
            },
            "flavor2": {
                "x": 1,
                "y": 2
            },
        }
        assert loaded_model.signature == sig
        path = os.path.join(
            local_path, loaded_model.saved_input_example_info["artifact_path"])
        x = _dataframe_from_json(path)
        assert x.to_dict(orient="records")[0] == input_example
        assert not hasattr(loaded_model, "databricks_runtime")
def train(
    proj_name: str,
    Model: str,
    dataset_cls: str,
    net_fn: str,
    net_args: Dict,
    dataset_args: Dict,
):
    """ Train Function """

    dataset_module = importlib.import_module(
        f"manythings.data.dta_{dataset_cls}")
    dataset_cls_ = getattr(dataset_module, dataset_cls)

    network_module = importlib.import_module(f"manythings.networks.{net_fn}")
    network_fn_ = getattr(network_module, net_fn)

    model_module = importlib.import_module(f"manythings.models.{Model}")
    model_cls_ = getattr(model_module, Model)

    config = {
        "model": Model,
        "dataset_cls": dataset_cls,
        "net_fn": net_fn,
        "net_args": net_args,
        "dataset_args": dataset_args
    }

    input_schema = Schema([
        TensorSpec(np.dtype(np.uint8), (-1, 71), "encoder_input"),
        TensorSpec(np.dtype(np.uint8), (-1, 93), "decoder_input")
    ])

    output_schema = Schema([TensorSpec(np.dtype(np.float32), (-1, 93))])

    signature = ModelSignature(inputs=input_schema, outputs=output_schema)
    data = dataset_cls_()
    data.load_or_generate()
    data.preprocess()

    with wandb.init(project=proj_name, config=config):
        """"""
        config = wandb.config
        model = model_cls_(dataset_cls_, network_fn_, net_args, dataset_args)

        callbacks = [
            WandbCallback(
                # training_data=(
                #     [data.encoder_input_data, data.decoder_input_data],
                #     data.decoder_target_data
                # ),
                # log_weights=True,
                # log_gradients=True
            )
        ]

        model.fit(callbacks=callbacks)
        mlflow.keras.save_model(model.network,
                                "saved_models/seq2seq",
                                signature=signature)
Beispiel #9
0
def test_model_load_input_example_failures():
    with TempDir(chdr=True) as tmp:
        input_example = np.array([[3, 4, 5]], dtype=np.int32)
        sig = ModelSignature(
            inputs=Schema([
                TensorSpec(type=input_example.dtype, shape=input_example.shape)
            ]),
            outputs=Schema([ColSpec(name=None, type="double")]),
        )

        local_path, _ = _log_model_with_signature_and_example(
            tmp, sig, input_example)
        loaded_model = Model.load(os.path.join(local_path, "MLmodel"))
        loaded_example = loaded_model.load_input_example(local_path)
        assert loaded_example is not None

        with pytest.raises(FileNotFoundError,
                           match="No such file or directory"):
            loaded_model.load_input_example(
                os.path.join(local_path, "folder_which_does_not_exist"))

        path = os.path.join(
            local_path, loaded_model.saved_input_example_info["artifact_path"])
        os.remove(path)
        with pytest.raises(FileNotFoundError,
                           match="No such file or directory"):
            loaded_model.load_input_example(local_path)
Beispiel #10
0
    def from_dict(cls, model_dict):
        """Load a model from its YAML representation."""
        if "signature" in model_dict and isinstance(model_dict["signature"], dict):
            model_dict = model_dict.copy()
            model_dict["signature"] = ModelSignature.from_dict(model_dict["signature"])

        return cls(**model_dict)
Beispiel #11
0
def test_model_load_input_example_no_signature():
    with TempDir(chdr=True) as tmp:
        input_example = np.array([[3, 4, 5]], dtype=np.int32)
        sig = ModelSignature(
            inputs=Schema([TensorSpec(type=input_example.dtype, shape=input_example.shape)]),
            outputs=Schema([ColSpec(name=None, type="double")]),
        )

        local_path, _ = _log_model_with_signature_and_example(tmp, sig, input_example=None)
        loaded_model = Model.load(os.path.join(local_path, "MLmodel"))
        loaded_example = loaded_model.load_input_example(local_path)
        assert loaded_example is None
Beispiel #12
0
def test_model_load_input_example_scipy():
    with TempDir(chdr=True) as tmp:
        input_example = csc_matrix(np.arange(0, 12, 0.5).reshape(3, 8))
        sig = ModelSignature(
            inputs=Schema([TensorSpec(type=input_example.data.dtype, shape=input_example.shape)]),
            outputs=Schema([ColSpec(name=None, type="double")]),
        )

        local_path, _ = _log_model_with_signature_and_example(tmp, sig, input_example)
        loaded_model = Model.load(os.path.join(local_path, "MLmodel"))
        loaded_example = loaded_model.load_input_example(local_path)

        assert isinstance(loaded_example, csc_matrix)
        assert np.array_equal(input_example.data, loaded_example.data)
Beispiel #13
0
def test_model_log_with_input_example_succeeds():
    with TempDir(chdr=True) as tmp:
        experiment_id = mlflow.create_experiment("test")
        sig = ModelSignature(
            inputs=Schema([
                ColSpec("integer", "a"),
                ColSpec("string", "b"),
                ColSpec("boolean", "c"),
                ColSpec("string", "d"),
                ColSpec("datetime", "e"),
            ]),
            outputs=Schema([ColSpec(name=None, type="double")]),
        )
        input_example = pd.DataFrame(
            {
                "a": np.int32(1),
                "b": "test string",
                "c": True,
                "d": date.today(),
                "e": np.datetime64("2020-01-01T00:00:00"),
            },
            index=[0],
        )
        with mlflow.start_run(experiment_id=experiment_id) as r:
            Model.log("some/path",
                      TestFlavor,
                      signature=sig,
                      input_example=input_example)

        local_path = _download_artifact_from_uri("runs:/{}/some/path".format(
            r.info.run_id),
                                                 output_path=tmp.path(""))
        loaded_model = Model.load(os.path.join(local_path, "MLmodel"))
        path = os.path.join(
            local_path, loaded_model.saved_input_example_info["artifact_path"])
        x = _dataframe_from_json(path, schema=sig.inputs)

        # date column will get deserialized into string
        input_example["d"] = input_example["d"].apply(lambda x: x.isoformat())
        assert x.equals(input_example)
trainY = tf.keras.utils.to_categorical(train_Y)
testY = tf.keras.utils.to_categorical(test_Y)

model = tf.keras.models.Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_uniform', input_shape=(28, 28, 1)))
model.add(MaxPooling2D((2, 2)))
model.add(Flatten())
model.add(Dense(100, activation='relu', kernel_initializer='he_uniform'))
model.add(Dense(10, activation='softmax'))
opt = SGD(lr=0.01, momentum=0.9)
model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])
model.fit(trainX, trainY, epochs=1, batch_size=32, validation_data=(testX, testY))

input_schema = Schema([
  TensorSpec(np.dtype(np.uint8), (-1, 28, 28, 1)),
])
output_schema = Schema([TensorSpec(np.dtype(np.float32), (-1, 10))])
signature = ModelSignature(inputs=input_schema, outputs=output_schema)
input_example = np.array([
   [[  0,   0,   0,   0],
    [  0, 134,  25,  56],
    [253, 242, 195,   6],
    [  0,  93,  82,  82]],
   [[  0,  23,  46,   0],
    [ 33,  13,  36, 166],
    [ 76,  75,   0, 255],
    [ 33,  44,  11,  82]]
], dtype=np.uint8)

mlflow.keras.log_model(model, "mnist_cnn", signature=signature, input_example=input_example)
def test_model_signature_with_tensorspec():
    signature1 = ModelSignature(
        inputs=Schema([TensorSpec(np.dtype("float"), (-1, 28, 28))]),
        outputs=Schema([TensorSpec(np.dtype("float"), (-1, 10))]),
    )
    signature2 = ModelSignature(
        inputs=Schema([TensorSpec(np.dtype("float"), (-1, 28, 28))]),
        outputs=Schema([TensorSpec(np.dtype("float"), (-1, 10))]),
    )
    # Single type mismatch
    assert signature1 == signature2
    signature3 = ModelSignature(
        inputs=Schema([TensorSpec(np.dtype("float"), (-1, 28, 28))]),
        outputs=Schema([TensorSpec(np.dtype("int"), (-1, 10))]),
    )
    assert signature3 != signature1
    # Name mismatch
    signature4 = ModelSignature(
        inputs=Schema([TensorSpec(np.dtype("float"), (-1, 28, 28))]),
        outputs=Schema([TensorSpec(np.dtype("float"), (-1, 10), "misMatch")]),
    )
    assert signature3 != signature4
    as_json = json.dumps(signature1.to_dict())
    signature5 = ModelSignature.from_dict(json.loads(as_json))
    assert signature1 == signature5

    # Test with name
    signature6 = ModelSignature(
        inputs=Schema([
            TensorSpec(np.dtype("float"), (-1, 28, 28), name="image"),
            TensorSpec(np.dtype("int"), (-1, 10), name="metadata"),
        ]),
        outputs=Schema(
            [TensorSpec(np.dtype("float"), (-1, 10), name="outputs")]),
    )
    signature7 = ModelSignature(
        inputs=Schema([
            TensorSpec(np.dtype("float"), (-1, 28, 28), name="image"),
            TensorSpec(np.dtype("int"), (-1, 10), name="metadata"),
        ]),
        outputs=Schema(
            [TensorSpec(np.dtype("float"), (-1, 10), name="outputs")]),
    )
    assert signature6 == signature7
    assert signature1 != signature6

    # Test w/o output
    signature8 = ModelSignature(inputs=Schema(
        [TensorSpec(np.dtype("float"), (-1, 28, 28))]),
                                outputs=None)
    as_json = json.dumps(signature8.to_dict())
    signature9 = ModelSignature.from_dict(json.loads(as_json))
    assert signature8 == signature9
def _infer_signature(onnx_model):
    onnx_model_bytes = onnx_model.SerializeToString()
    onnx_runtime = onnxruntime.InferenceSession(onnx_model_bytes)
    inputs = _infer_schema(onnx_runtime.get_inputs())
    outputs = _infer_schema(onnx_runtime.get_outputs())
    return ModelSignature(inputs, outputs)