예제 #1
0
    def test_preview(self, tmpdir, api, loss, optimizer, metrics):
        model = (api(num_hidden=1, num_classes=2, input_dim=3)
                 if api != ConfigSubclassModel else api(
                     hidden_units=[16, 16, 10]))

        tiledb_uri = os.path.join(tmpdir, "model_array")

        # Compiles the model if optimizer is present
        if optimizer:
            model.compile(loss=loss, optimizer=optimizer, metrics=[metrics])

        # With model given as argument
        if model.built:
            tiledb_model_obj = TensorflowKerasTileDBModel(uri=tiledb_uri,
                                                          model=model)
            s = io.StringIO()
            model.summary(print_fn=lambda x: s.write(x + "\n"))
            model_summary = s.getvalue()
            assert tiledb_model_obj.preview() == model_summary
        else:
            # Model should be built before preview it
            with pytest.raises(ValueError):
                tiledb_model_obj = TensorflowKerasTileDBModel(uri=tiledb_uri,
                                                              model=model)
                tiledb_model_obj.preview()

        # When model is None then preview returns empty string
        tiledb_model_obj_none = TensorflowKerasTileDBModel(uri=tiledb_uri,
                                                           model=None)
        assert tiledb_model_obj_none.preview() == ""
예제 #2
0
    def test_sequential_model_save_load_without_input_shape(
            self, tmpdir, api, loss, optimizer, metrics):
        if optimizer is None or loss != keras.losses.MSE:
            pytest.skip()
        model = keras.models.Sequential()
        model.add(keras.layers.Dense(2))
        model.add(keras.layers.RepeatVector(3))
        model.add(keras.layers.TimeDistributed(keras.layers.Dense(3)))
        model.compile(
            loss=loss,
            optimizer=optimizer,
            metrics=metrics,
            weighted_metrics=metrics,
            sample_weight_mode="temporal",
        )
        data_x = np.random.random((1, 3))
        data_y = np.random.random((1, 3, 3))
        model.train_on_batch(data_x, data_y)

        tiledb_uri = os.path.join(tmpdir, "model_array")
        tiledb_model_obj = TensorflowKerasTileDBModel(uri=tiledb_uri,
                                                      model=model)
        tiledb_model_obj.save(include_optimizer=True)
        loaded_model = tiledb_model_obj.load(compile_model=True)

        # Assert model predictions are equal
        np.testing.assert_array_equal(loaded_model.predict(data_x),
                                      model.predict(data_x))
예제 #3
0
    def test_save_load_for_rnn_layers(self, tmpdir, api, loss, optimizer,
                                      metrics):
        inputs = keras.Input([10, 10], name="train_input")
        rnn_layers = [
            keras.layers.LSTMCell(size,
                                  recurrent_dropout=0,
                                  name="rnn_cell%d" % i)
            for i, size in enumerate([32, 32])
        ]
        rnn_output = keras.layers.RNN(rnn_layers,
                                      return_sequences=True,
                                      name="rnn_layer")(inputs)
        pred_feat = keras.layers.Dense(10,
                                       name="prediction_features")(rnn_output)
        pred = keras.layers.Softmax()(pred_feat)
        model = keras.Model(inputs=[inputs], outputs=[pred, pred_feat])

        tiledb_uri = os.path.join(tmpdir, "model_array")
        tiledb_model_obj = TensorflowKerasTileDBModel(uri=tiledb_uri,
                                                      model=model)
        tiledb_model_obj.save(include_optimizer=False)
        loaded_model = tiledb_model_obj.load(compile_model=False)

        data = np.random.rand(50, 10, 10)

        # Assert model predictions are equal
        np.testing.assert_array_equal(loaded_model.predict(data),
                                      model.predict(data))
예제 #4
0
    def test_save_load_with_dense_features(self, tmpdir, api, loss, optimizer,
                                           metrics):
        if optimizer is None:
            pytest.skip()
        cols = [
            feature_column_lib.numeric_column("a"),
            feature_column_lib.indicator_column(
                feature_column_lib.categorical_column_with_vocabulary_list(
                    "b", ["one", "two"])),
        ]
        input_layers = {
            "a": keras.layers.Input(shape=(1, ), name="a"),
            "b": keras.layers.Input(shape=(1, ), name="b", dtype="string"),
        }

        fc_layer = dense_features.DenseFeatures(cols)(input_layers)
        output = keras.layers.Dense(10)(fc_layer)

        model = keras.models.Model(input_layers, output)

        model.compile(
            loss=loss,
            optimizer=optimizer,
            metrics=[metrics],
        )

        tiledb_uri = os.path.join(tmpdir, "model_array")
        tiledb_model_obj = TensorflowKerasTileDBModel(uri=tiledb_uri,
                                                      model=model)
        tiledb_model_obj.save(include_optimizer=True)
        loaded_model = tiledb_model_obj.load(compile_model=True)

        model_opt_weights = batch_get_value(getattr(model.optimizer,
                                                    "weights"))
        loaded_opt_weights = batch_get_value(
            getattr(loaded_model.optimizer, "weights"))

        # Assert optimizer weights are equal
        for weight_model, weight_loaded_model in zip(model_opt_weights,
                                                     loaded_opt_weights):
            np.testing.assert_array_equal(weight_model, weight_loaded_model)

        inputs_a = np.arange(10).reshape(10, 1)
        inputs_b = np.arange(10).reshape(10, 1).astype("str")

        # Assert model predictions are equal
        np.testing.assert_array_equal(
            loaded_model.predict({
                "a": inputs_a,
                "b": inputs_b
            }),
            model.predict({
                "a": inputs_a,
                "b": inputs_b
            }),
        )
예제 #5
0
    def test_exception_raise_file_property_in_meta_error(self, tmpdir):
        model = keras.models.Sequential()
        model.add(keras.layers.Flatten(input_shape=(10, 10)))
        tiledb_array = os.path.join(tmpdir, "model_array")
        tiledb_obj = TensorflowKerasTileDBModel(uri=tiledb_array, model=model)
        with pytest.raises(ValueError) as ex:
            tiledb_obj.save(meta={
                "TILEDB_ML_MODEL_ML_FRAMEWORK":
                "TILEDB_ML_MODEL_ML_FRAMEWORK"
            }, )

        assert "Please avoid using file property key names as metadata keys!" in str(
            ex.value)
예제 #6
0
    def test_save_model_to_tiledb_array(self, tmpdir, api, loss, optimizer,
                                        metrics):
        model = (api(num_hidden=1, num_classes=2, input_dim=3)
                 if api != ConfigSubclassModel else api(
                     hidden_units=[16, 16, 10]))

        tiledb_uri = os.path.join(tmpdir, "model_array")

        # Compiles the model if optimizer is present
        if optimizer:
            model.compile(loss=loss, optimizer=optimizer, metrics=[metrics])

        if not model.built:
            model.build(tuple(np.random.randint(20, size=2)))
        tiledb_model_obj = TensorflowKerasTileDBModel(uri=tiledb_uri,
                                                      model=model)
        tiledb_model_obj.save(include_optimizer=True if optimizer else False)
        assert tiledb.array_exists(tiledb_uri)
예제 #7
0
    def test_functional_model_save_load_with_custom_loss_and_metric(
            self, tmpdir, api, loss, optimizer, metrics):
        if optimizer is None or loss != keras.losses.SparseCategoricalCrossentropy(
        ):
            pytest.skip()
        inputs = keras.Input(shape=(4, ))
        x = keras.layers.Dense(8, activation="relu")(inputs)
        outputs = keras.layers.Dense(3, activation="softmax")(x)
        model = keras.Model(inputs=inputs, outputs=outputs)
        custom_loss = keras.layers.Lambda(lambda x: keras.backend.sum(x * x))(
            x)
        model.add_loss(custom_loss)
        model.add_metric(custom_loss, aggregation="mean", name="custom_loss")

        model.compile(
            loss=loss,
            optimizer=optimizer,
            metrics=[metrics],
        )

        data_x = np.random.normal(size=(32, 4))
        data_y = np.random.randint(0, 3, size=32)
        model.train_on_batch(data_x, data_y)

        tiledb_uri = os.path.join(tmpdir, "model_array")
        tiledb_model_obj = TensorflowKerasTileDBModel(uri=tiledb_uri,
                                                      model=model)
        tiledb_model_obj.save(include_optimizer=True)
        loaded_model = tiledb_model_obj.load(compile_model=True)

        # Assert all evaluation results are the same.
        assert all([
            a == pytest.approx(b, 1e-9) for a, b in zip(
                model.evaluate(data_x, data_y),
                loaded_model.evaluate(data_x, data_y),
            )
        ])

        # Assert model predictions are equal
        np.testing.assert_array_equal(loaded_model.predict(data_x),
                                      model.predict(data_x))
예제 #8
0
    def test_save_model_to_tiledb_array_predictions(self, tmpdir, api, loss,
                                                    optimizer, metrics):
        model = (api(num_hidden=1, num_classes=2, input_dim=3)
                 if api != ConfigSubclassModel else api(
                     hidden_units=[16, 16, 10]))

        tiledb_uri = os.path.join(tmpdir, "model_array")

        # Compiles the model if optimizer is present
        if optimizer:
            model.compile(loss=loss, optimizer=optimizer, metrics=[metrics])

        input_shape = tuple(np.random.randint(20, size=2))
        if not model.built:
            model.build(input_shape)
        tiledb_model_obj = TensorflowKerasTileDBModel(uri=tiledb_uri,
                                                      model=model)
        tiledb_model_obj.save(include_optimizer=True if optimizer else False)

        loaded_model = (tiledb_model_obj.load(
            compile_model=False,
            custom_objects={"ConfigSubclassModel": ConfigSubclassModel},
            input_shape=input_shape,
        ) if api == ConfigSubclassModel else tiledb_model_obj.load(
            compile_model=False))

        data = np.random.rand(
            100, input_shape[-1] if api == ConfigSubclassModel else 3)

        # Assert model predictions are equal
        np.testing.assert_array_equal(loaded_model.predict(data),
                                      model.predict(data))
예제 #9
0
    def test_get_cloud_uri_call_for_models_on_tiledb_cloud(
            self, tmpdir, mocker):
        model = keras.models.Sequential()
        model.add(keras.layers.Flatten(input_shape=(10, 10)))
        uri = os.path.join(tmpdir, "model_array")

        mock_get_cloud_uri = mocker.patch(
            "tiledb.ml.models.base.get_cloud_uri", return_value=uri)

        _ = TensorflowKerasTileDBModel(uri=uri,
                                       namespace="test_namespace",
                                       model=model)

        mock_get_cloud_uri.assert_called_once_with(uri, "test_namespace")
예제 #10
0
    def test_update_file_properties_call(self, tmpdir, mocker):
        model = keras.models.Sequential()
        model.add(keras.layers.Flatten(input_shape=(10, 10)))

        # Get model summary in a string
        s = io.StringIO()
        model.summary(print_fn=lambda x: s.write(x + "\n"))
        model_summary = s.getvalue()

        uri = os.path.join(tmpdir, "model_array")

        mocker.patch("tiledb.ml.models.base.get_cloud_uri", return_value=uri)

        tiledb_obj = TensorflowKerasTileDBModel(uri=uri,
                                                namespace="test_namespace",
                                                model=model)

        mock_update_file_properties = mocker.patch(
            "tiledb.ml.models.tensorflow_keras.update_file_properties",
            return_value=None,
        )
        mocker.patch(
            "tiledb.ml.models.tensorflow_keras.TensorflowKerasTileDBModel._write_array"
        )

        tiledb_obj.save()

        file_properties_dict = {
            "TILEDB_ML_MODEL_ML_FRAMEWORK": "TENSORFLOW KERAS",
            "TILEDB_ML_MODEL_ML_FRAMEWORK_VERSION": tf.__version__,
            "TILEDB_ML_MODEL_STAGE": "STAGING",
            "TILEDB_ML_MODEL_PYTHON_VERSION": platform.python_version(),
            "TILEDB_ML_MODEL_PREVIEW": model_summary,
        }

        mock_update_file_properties.assert_called_once_with(
            uri, file_properties_dict)
예제 #11
0
    def test_file_properties(self, tmpdir):
        model = keras.models.Sequential()
        model.add(keras.layers.Flatten(input_shape=(10, 10)))

        # Get model summary in a string
        s = io.StringIO()
        model.summary(print_fn=lambda x: s.write(x + "\n"))
        model_summary = s.getvalue()

        uri = os.path.join(tmpdir, "model_array")
        tiledb_obj = TensorflowKerasTileDBModel(uri=uri, model=model)

        assert (tiledb_obj._file_properties["TILEDB_ML_MODEL_ML_FRAMEWORK"] ==
                "TENSORFLOW KERAS")
        assert tiledb_obj._file_properties[
            "TILEDB_ML_MODEL_STAGE"] == "STAGING"
        assert (tiledb_obj._file_properties["TILEDB_ML_MODEL_PYTHON_VERSION"]
                == platform.python_version())
        assert (
            tiledb_obj._file_properties["TILEDB_ML_MODEL_ML_FRAMEWORK_VERSION"]
            == tf.__version__)
        assert tiledb_obj._file_properties[
            "TILEDB_ML_MODEL_PREVIEW"] == model_summary
예제 #12
0
def test_load_tiledb_error_with_wrong_uri():
    tiledb_model_obj = TensorflowKerasTileDBModel(uri="dummy_uri")
    with pytest.raises(tiledb.TileDBError):
        tiledb_model_obj.load(compile_model=False)
예제 #13
0
    def test_save_load_with_sequence_features(self, tmpdir, api, loss,
                                              optimizer, metrics):
        if optimizer is None:
            pytest.skip()

        cols = [
            feature_column_lib.sequence_numeric_column("a"),
            feature_column_lib.indicator_column(
                feature_column_lib.
                sequence_categorical_column_with_vocabulary_list(
                    "b", ["one", "two"])),
        ]
        input_layers = {
            "a":
            keras.layers.Input(shape=(None, 1), sparse=True, name="a"),
            "b":
            keras.layers.Input(shape=(None, 1),
                               sparse=True,
                               name="b",
                               dtype="string"),
        }

        fc_layer, _ = ksfc.SequenceFeatures(cols)(input_layers)
        x = keras.layers.GRU(32)(fc_layer)
        output = keras.layers.Dense(10)(x)

        model = keras.models.Model(input_layers, output)

        model.compile(
            loss=loss,
            optimizer=optimizer,
            metrics=[metrics],
        )

        tiledb_uri = os.path.join(tmpdir, "model_array")
        tiledb_model_obj = TensorflowKerasTileDBModel(uri=tiledb_uri,
                                                      model=model)
        tiledb_model_obj.save(include_optimizer=True)
        loaded_model = tiledb_model_obj.load(compile_model=True)

        model_opt_weights = batch_get_value(getattr(model.optimizer,
                                                    "weights"))
        loaded_opt_weights = batch_get_value(
            getattr(loaded_model.optimizer, "weights"))

        # Assert optimizer weights are equal
        for weight_model, weight_loaded_model in zip(model_opt_weights,
                                                     loaded_opt_weights):
            np.testing.assert_array_equal(weight_model, weight_loaded_model)

        batch_size = 10
        timesteps = 1

        values_a = np.arange(10, dtype=np.float32)
        indices_a = np.zeros((10, 3), dtype=np.int64)
        indices_a[:, 0] = np.arange(10)
        inputs_a = sparse_tensor.SparseTensor(indices_a, values_a,
                                              (batch_size, timesteps, 1))

        values_b = np.zeros(10, dtype=np.str)
        indices_b = np.zeros((10, 3), dtype=np.int64)
        indices_b[:, 0] = np.arange(10)
        inputs_b = sparse_tensor.SparseTensor(indices_b, values_b,
                                              (batch_size, timesteps, 1))

        # Assert model predictions are equal
        np.testing.assert_array_equal(
            loaded_model.predict({
                "a": inputs_a,
                "b": inputs_b
            }, steps=1),
            model.predict({
                "a": inputs_a,
                "b": inputs_b
            }, steps=1),
        )