def test_convert_reproduces_classification_model(
      self,
      sklearn_tree,
  ):
    features, labels = datasets.make_classification(
        n_samples=100,
        n_features=10,
        n_classes=4,
        n_clusters_per_class=1,
        random_state=42,
    )
    sklearn_tree.fit(features, labels)
    tf_features = tf.constant(features, dtype=tf.float32)

    with self.subTest(msg="inference_is_reproduced_before_save"):
      tf_tree = scikit_learn_model_converter.convert(sklearn_tree)
      tf_labels = tf_tree(tf_features).numpy()
      sklearn_labels = sklearn_tree.predict_proba(features).astype(np.float32)
      self.assertAllClose(sklearn_labels, tf_labels, rtol=1e-5)

    with self.subTest(msg="inference_is_reproduced_after_save"):
      path = pathlib.Path(self.get_temp_dir())
      tf_tree = scikit_learn_model_converter.convert(
          sklearn_tree,
          intermediate_write_path=path / "intermediate_path",
      )
      tf.saved_model.save(obj=tf_tree, export_dir=path)
      loaded_tf_tree = tf.saved_model.load(path)
      self.assertAllEqual(tf_tree(tf_features), loaded_tf_tree(tf_features))
 def test_convert_raises_when_unrecognised_model_provided(self):
   features, labels = datasets.make_regression(
       n_samples=100,
       n_features=10,
       random_state=42,
   )
   sklearn_model = linear_model.LinearRegression().fit(features, labels)
   with self.assertRaises(NotImplementedError):
     scikit_learn_model_converter.convert(sklearn_model)
 def test_convert_raises_when_gbt_initial_estimator_is_not_tree_or_constant(
     self):
   features, labels = datasets.make_regression(
       n_samples=100,
       n_features=10,
       random_state=42,
   )
   init_estimator = linear_model.LinearRegression()
   sklearn_model = ensemble.GradientBoostingRegressor(init=init_estimator)
   sklearn_model.fit(features, labels)
   with self.assertRaises(ValueError):
     _ = scikit_learn_model_converter.convert(sklearn_model)
 def test_convert_raises_when_classification_target_is_multilabel(self):
   features, labels = datasets.make_multilabel_classification(
       n_samples=100,
       n_features=10,
       # This assigns two class labels per example.
       n_labels=2,
       random_state=42,
   )
   sklearn_tree = tree.DecisionTreeClassifier().fit(features, labels)
   with self.assertRaisesRegex(
       ValueError,
       "Only scalar regression and single-label classification are supported.",
   ):
     _ = scikit_learn_model_converter.convert(sklearn_tree)
 def test_convert_raises_when_regression_target_is_multivariate(self):
   features, labels = datasets.make_regression(
       n_samples=100,
       n_features=10,
       # This produces a two-dimensional target variable.
       n_targets=2,
       random_state=42,
   )
   sklearn_tree = tree.DecisionTreeRegressor().fit(features, labels)
   with self.assertRaisesRegex(
       ValueError,
       "Only scalar regression and single-label classification are supported.",
   ):
     _ = scikit_learn_model_converter.convert(sklearn_tree)
 def test_convert_uses_intermediate_model_path_if_provided(self):
   features, labels = datasets.make_classification(
       n_samples=100,
       n_features=10,
       n_classes=4,
       n_clusters_per_class=1,
       random_state=42,
   )
   sklearn_tree = tree.DecisionTreeClassifier().fit(features, labels)
   write_path = self.create_tempdir()
   _ = scikit_learn_model_converter.convert(
       sklearn_tree,
       intermediate_write_path=write_path,
   )
   # We should be able to load the intermediate TFDF model from the given path.
   tfdf_tree = tf.keras.models.load_model(write_path)
   self.assertIsInstance(tfdf_tree, tf.keras.Model)
 def test_convert_raises_when_sklearn_model_is_not_fit(self):
   with self.assertRaises(
       ValueError,
       msg="Scikit-learn model must be fit to data before converting to TF.",
   ):
     _ = scikit_learn_model_converter.convert(tree.DecisionTreeRegressor())