Esempio n. 1
0
    def test_compare_module_version(self):
        fLOG(__file__,
             self._testMethodName,
             OutputPrint=__name__ == "__main__")

        self.assertEqual(compare_module_version('1.7.19', '1.7.20'), -1)
        self.assertEqual(compare_module_version('1.7.19', '1.7.19'), 0)
        self.assertEqual(compare_module_version('1.7.19.20', '1.7.19'), 1)
 def test_numeric_module_version(self):
     self.assertEqual(numeric_module_version((4, 5)), (4, 5))
     self.assertEqual(numeric_module_version("4.5.e"), (4, 5, 'e'))
     self.assertEqual(compare_module_version(("4.5.e"), (4, 5, 'e')), 0)
     self.assertEqual(compare_module_version(("4.5.e"), None), -1)
     self.assertEqual(compare_module_version(None, ("4.5.e")), 1)
     self.assertEqual(compare_module_version(None, None), 0)
     self.assertEqual(compare_module_version(
         ("4.5.e"), (4, 5, 'e', 'b')), -1)
Esempio n. 3
0
    def test_compare_module_version(self):
        fLOG(
            __file__,
            self._testMethodName,
            OutputPrint=__name__ == "__main__")

        self.assertEqual(compare_module_version('1.7.19', '1.7.20'), -1)
        self.assertEqual(compare_module_version('1.7.19', '1.7.19'), 0)
        self.assertEqual(compare_module_version('1.7.19.20', '1.7.19'), 1)
Esempio n. 4
0
 def test_numeric_module_version(self):
     self.assertEqual(numeric_module_version((4, 5)), (4, 5))
     self.assertEqual(numeric_module_version("4.5.e"), (4, 5, 'e'))
     self.assertEqual(compare_module_version(("4.5.e"), (4, 5, 'e')), 0)
     self.assertEqual(compare_module_version(("4.5.e"), None), -1)
     self.assertEqual(compare_module_version(None, ("4.5.e")), 1)
     self.assertEqual(compare_module_version(None, None), 0)
     self.assertEqual(compare_module_version(("4.5.e"), (4, 5, 'e', 'b')),
                      -1)
Esempio n. 5
0
    def test_target_classifier_permute_iris(self):

        data = load_iris()
        X, y = data.data, data.target
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            random_state=12)

        log = LogisticRegression(n_jobs=1)
        log.fit(X_train, y_train)
        sc = log.score(X_test, y_test)
        r2 = r2_score(y_test, log.predict(X_test))

        for _ in range(10):
            TransformedTargetClassifier2(classifier=None,
                                         transformer='permute')
            tt = TransformedTargetClassifier2(
                classifier=LogisticRegression(n_jobs=1), transformer='permute')
            try:
                tt.fit(X_train, y_train)
            except AttributeError as e:
                if compare_module_version(sklver, "0.24") < 0:
                    return
                raise e
            sc2 = tt.score(X_test, y_test)
            self.assertEqual(sc, sc2)
            r22 = r2_score(y_test, tt.predict(X_test))
            self.assertEqual(r2, r22)
Esempio n. 6
0
 def test_categories_to_integers_grid_search(self):
     data = os.path.join(os.path.abspath(os.path.dirname(__file__)), "data",
                         "adult_set.txt")
     df = pandas.read_csv(data, sep="\t")
     X = df.drop('income', axis=1)
     y = df['income']
     pipe = make_pipeline(CategoriesToIntegers(), LogisticRegression())
     self.assertRaise(lambda: test_sklearn_grid_search_cv(lambda: pipe, df),
                      ValueError)
     self.assertRaise(
         lambda: test_sklearn_grid_search_cv(
             lambda: pipe, X, y, categoriestointegers__single=[True, False]
         ), ValueError, "Unable to find category value")
     pipe = make_pipeline(CategoriesToIntegers(),
                          Imputer(strategy='most_frequent'),
                          LogisticRegression(n_jobs=1))
     try:
         res = test_sklearn_grid_search_cv(
             lambda: pipe,
             X,
             y,
             categoriestointegers__single=[True, False],
             categoriestointegers__skip_errors=[True])
     except AttributeError as e:
         if compare_module_version(sklver, "0.24") < 0:
             return
         raise e
     self.assertIn('model', res)
     self.assertIn('score', res)
     self.assertGreater(res['score'], 0)
     self.assertLesser(res['score'], 1)
Esempio n. 7
0
 def test_log1p_float64(self):
     older_than = compare_module_version(ort_version, "1.7.0") >= 0
     x = numpy.array([[6.1, 5], [3.5, 7.8]], dtype=numpy.float64)
     self.common_test1(x,
                       numpy.log1p,
                       nxnpy.log1p,
                       numpy.float64,
                       ort=older_than)
    def fit(self,
            X,
            y,
            sample_weight=None,
            check_input=True,
            X_idx_sorted=None):
        """
        Replaces the string stored in criterion by an instance of a class.
        """
        replace = None
        if isinstance(self.criterion, str):
            if self.criterion == 'mselin':
                if compare_module_version(sklearn.__version__, '0.21') >= 0:
                    from .piecewise_tree_regression_criterion_linear import LinearRegressorCriterion  # pylint: disable=E0611,C0415
                    replace = self.criterion
                    self.criterion = LinearRegressorCriterion(X)
                else:
                    raise ImportError(
                        "LinearRegressorCriterion only exists for scikit-learn >= 0.21."
                    )
            elif self.criterion == "simple":
                if compare_module_version(sklearn.__version__, '0.21') >= 0:
                    from .piecewise_tree_regression_criterion_fast import SimpleRegressorCriterionFast  # pylint: disable=E0611,C0415
                    replace = self.criterion
                    self.criterion = SimpleRegressorCriterionFast(X)
                else:
                    raise ImportError(
                        "SimpleRegressorCriterion only exists for scikit-learn >= 0.21."
                    )
        else:
            replace = None

        DecisionTreeRegressor.fit(self,
                                  X,
                                  y,
                                  sample_weight=sample_weight,
                                  check_input=check_input,
                                  X_idx_sorted=X_idx_sorted)

        if replace:
            self.criterion = replace

        if self.criterion == "mselin":
            self._fit_reglin(X, y, sample_weight)
        return self
Esempio n. 9
0
 def test_classification_kmeans_pickle(self):
     iris = datasets.load_iris()
     X, y = iris.data, iris.target
     try:
         test_sklearn_pickle(lambda: ClassifierAfterKMeans(), X, y)
     except AttributeError as e:
         if compare_module_version(sklver, "0.24") < 0:
             return
         raise e
Esempio n. 10
0
 def test_classification_kmeans_intercept_weights(self):
     iris = datasets.load_iris()
     X, y = iris.data, iris.target
     clr = ClassifierAfterKMeans()
     try:
         clr.fit(X, y, sample_weight=numpy.ones((X.shape[0], )))
     except AttributeError as e:
         if compare_module_version(sklver, "0.24") < 0:
             return
         raise e
     acc = clr.score(X, y)
     self.assertGreater(acc, 0)
Esempio n. 11
0
 def test_classification_kmeans(self):
     iris = datasets.load_iris()
     X, y = iris.data, iris.target
     clr = ClassifierAfterKMeans()
     try:
         clr.fit(X, y)
     except AttributeError as e:
         if compare_module_version(sklver, "0.24") < 0:
             return
         raise e
     acc = clr.score(X, y)
     self.assertGreater(acc, 0)
     prob = clr.predict_proba(X)
     self.assertEqual(prob.shape[1], 3)
     dec = clr.decision_function(X)
     self.assertEqual(prob.shape, dec.shape)
Esempio n. 12
0
 def test_classification_kmeans_grid_search(self):
     iris = datasets.load_iris()
     X, y = iris.data, iris.target
     self.assertRaise(lambda: test_sklearn_grid_search_cv(
         lambda: ClassifierAfterKMeans(), X, y), ValueError)
     try:
         res = test_sklearn_grid_search_cv(
             lambda: ClassifierAfterKMeans(),
             X, y, c_n_clusters=[2, 3])
     except AttributeError as e:
         if compare_module_version(sklver, "0.24") < 0:
             return
         raise e
     self.assertIn('model', res)
     self.assertIn('score', res)
     self.assertGreater(res['score'], 0)
     self.assertLesser(res['score'], 1)
Esempio n. 13
0
    def test_notebook_logregclus(self):
        fLOG(__file__,
             self._testMethodName,
             OutputPrint=__name__ == "__main__")

        self.assertTrue(mlinsights is not None)
        folder = os.path.join(os.path.dirname(__file__), "..", "..", "_doc",
                              "notebooks", "sklearn")
        try:
            test_notebook_execution_coverage(__file__,
                                             "logistic_regression_clustering",
                                             folder,
                                             'mlinsights',
                                             fLOG=fLOG)
        except Exception as e:
            if compare_module_version(sklver, "0.24") < 0:
                return
            raise e
Esempio n. 14
0
 def test_pipeline_transform(self):
     data = load_iris()
     X, y = data.data, data.target
     X_train, X_test, y_train, y_test = train_test_split(X, y)
     conv = SkBaseTransformLearner(PCA())
     pipe = make_pipeline(conv, DecisionTreeClassifier())
     try:
         pipe.fit(X_train, y_train)
     except AttributeError as e:
         if compare_module_version(sklver, "0.24") < 0:
             return
         raise e
     pred = pipe.predict(X_test)
     score = accuracy_score(y_test, pred)
     self.assertGreater(score, 0.75)
     score2 = pipe.score(X_test, y_test)
     self.assertEqual(score, score2)
     rp = repr(conv)
     self.assertStartsWith('SkBaseTransformLearner(model=PCA(', rp)
 def test_kmeans_constraint_pipeline(self):
     data = load_iris()
     X, y = data.data, data.target
     X_train, X_test, y_train, y_test = train_test_split(X, y)
     km = ConstraintKMeans(strategy='distance')
     pipe = make_pipeline(km, LogisticRegression())
     try:
         pipe.fit(X_train, y_train)
     except AttributeError as e:
         if compare_module_version(sklver, "0.24") < 0:
             return
         raise e
     pred = pipe.predict(X_test)
     score = accuracy_score(y_test, pred)
     self.assertGreater(score, 0.8)
     score2 = pipe.score(X_test, y_test)
     self.assertEqual(score, score2)
     rp = repr(km)
     self.assertStartsWith("ConstraintKMeans(", rp)
Esempio n. 16
0
class TestNotebookPiecewise(unittest.TestCase):
    def setUp(self):
        add_missing_development_version(["jyquickhelper"], __file__, hide=True)

    @unittest.skipIf(compare_module_version(sklearn.__version__, "0.21") < 0,
                     reason="This notebook uses Criterion API changed in 0.21")
    def test_notebook_piecewise(self):
        fLOG(__file__,
             self._testMethodName,
             OutputPrint=__name__ == "__main__")

        self.assertTrue(mlinsights is not None)
        folder = os.path.join(os.path.dirname(__file__), "..", "..", "_doc",
                              "notebooks", "sklearn_c")
        test_notebook_execution_coverage(__file__,
                                         "piecewise",
                                         folder,
                                         'mlinsights',
                                         fLOG=fLOG)
Esempio n. 17
0
    def test_pipeline_wines(self):
        df = load_wines_dataset(shuffle=True)
        X = df.drop(['quality', 'color'], axis=1)
        y = df['quality']
        X_train, X_test, y_train, y_test = train_test_split(X, y)
        model = make_pipeline(
            SkBaseTransformStacking([LogisticRegression(n_jobs=1)],
                                    'decision_function'),
            RandomForestClassifier())
        try:
            model.fit(X_train, y_train)
        except AttributeError as e:
            if compare_module_version(sklver, "0.24") < 0:
                return
            raise e
        auc_pipe = roc_auc_score(y_test == model.predict(X_test),
                                 model.predict_proba(X_test).max(axis=1))
        acc = model.score(X_test, y_test)
        accu = accuracy_score(y_test, model.predict(X_test))
        self.assertGreater(auc_pipe, 0.6)
        self.assertGreater(acc, 0.5)
        self.assertGreater(accu, 0.5)
        grid = GridSearchCV(estimator=model,
                            param_grid={},
                            cv=3,
                            refit='acc',
                            scoring=dict(acc=make_scorer(accuracy_score)))
        grid.fit(X, y)
        best = grid.best_estimator_
        step = grid.best_estimator_.steps[0][1]
        meth = step.method
        self.assertEqual(meth, 'decision_function')

        res = cross_val_score(model, X, y, cv=5)
        acc1 = best.score(X_test, y_test)
        accu1 = accuracy_score(y_test, best.predict(X_test))

        best.fit(X_train, y_train)
        acc2 = best.score(X_test, y_test)
        accu2 = accuracy_score(y_test, best.predict(X_test))
        self.assertGreater(res.min(), 0.5)
        self.assertGreater(min([acc2, accu2, acc1, accu1]), 0.5)
Esempio n. 18
0
 def test_pipeline_with_two_classifiers(self):
     data = load_iris()
     X, y = data.data, data.target
     X_train, X_test, y_train, y_test = train_test_split(X, y)
     conv = SkBaseTransformStacking(
         [LogisticRegression(n_jobs=1), DecisionTreeClassifier()])
     pipe = make_pipeline(conv, DecisionTreeClassifier())
     try:
         pipe.fit(X_train, y_train)
     except AttributeError as e:
         if compare_module_version(sklver, "0.24") < 0:
             return
         raise e
     pred = pipe.predict(X_test)
     score = accuracy_score(y_test, pred)
     self.assertGreater(score, 0.8)
     score2 = pipe.score(X_test, y_test)
     self.assertEqual(score, score2)
     rp = repr(conv)
     self.assertStartsWith(
         'SkBaseTransformStacking([LogisticRegression(', rp)
Esempio n. 19
0
 def test_pipeline_with_callable(self):
     data = load_iris()
     X, y = data.data, data.target
     X_train, X_test, y_train, y_test = train_test_split(X, y)
     tmod = LogisticRegression(n_jobs=1)
     conv = SkBaseTransformLearner(tmod, method=tmod.decision_function)
     pipe = make_pipeline(conv, DecisionTreeClassifier())
     try:
         pipe.fit(X_train, y_train)
     except AttributeError as e:
         if compare_module_version(sklver, "0.24") < 0:
             return
         raise e
     pred = pipe.predict(X_test)
     score = accuracy_score(y_test, pred)
     self.assertGreater(score, 0.8)
     score2 = pipe.score(X_test, y_test)
     self.assertEqualFloat(score, score2, precision=1e-5)
     rp = repr(conv)
     self.assertStartsWith(
         'SkBaseTransformLearner(model=LogisticRegression(', rp)
Esempio n. 20
0
 def test_classification_kmeans_relevance(self):
     state = RandomState(seed=0)
     Xs = []
     Ys = []
     n = 20
     for i in range(0, 5):
         for j in range(0, 4):
             x1 = state.rand(n) + i * 1.1
             x2 = state.rand(n) + j * 1.1
             Xs.append(numpy.vstack([x1, x2]).T)
             cl = state.randint(0, 4)
             Ys.extend([cl for i in range(n)])
     X = numpy.vstack(Xs)
     Y = numpy.array(Ys)
     clk = ClassifierAfterKMeans(c_n_clusters=6, c_random_state=state)
     try:
         clk.fit(X, Y)
     except AttributeError as e:
         if compare_module_version(sklver, "0.24") < 0:
             return
         raise e
     score = clk.score(X, Y)
     self.assertGreater(score, 0.95)
class TestQuantileRegression(ExtTestCase):
    def test_sklver(self):
        self.assertTrue(compare_module_version(sklver, "0.22") >= 0)

    def test_quantile_regression_no_intercept(self):
        X = numpy.array([[0.1, 0.2], [0.2, 0.3]])
        Y = numpy.array([1., 1.1])
        clr = LinearRegression(fit_intercept=False)
        clr.fit(X, Y)
        clq = QuantileLinearRegression(fit_intercept=False)
        clq.fit(X, Y)
        self.assertEqual(clr.intercept_, 0)
        self.assertEqualArray(clr.coef_, clq.coef_)
        self.assertEqual(clq.intercept_, 0)
        self.assertEqualArray(clr.intercept_, clq.intercept_)

    @unittest.skipIf(compare_module_version(sklver, "0.24") == -1,
                     reason="positive was introduce in 0.24")
    def test_quantile_regression_no_intercept_positive(self):
        X = numpy.array([[0.1, 0.2], [0.2, 0.3]])
        Y = numpy.array([1., 1.1])
        clr = LinearRegression(fit_intercept=False, positive=True)
        clr.fit(X, Y)
        clq = QuantileLinearRegression(fit_intercept=False, positive=True)
        clq.fit(X, Y)
        self.assertEqual(clr.intercept_, 0)
        self.assertEqual(clq.intercept_, 0)
        self.assertGreater(clr.coef_.min(), 0)
        self.assertGreater(clq.coef_.min(), 0)
        self.assertEqualArray(clr.intercept_, clq.intercept_)
        self.assertEqualArray(clr.coef_[0], clq.coef_[0])
        self.assertGreater(clr.coef_[1:].min(), 3)
        self.assertGreater(clq.coef_[1:].min(), 3)

    def test_quantile_regression_intercept(self):
        X = numpy.array([[0.1, 0.2], [0.2, 0.3], [0.3, 0.3]])
        Y = numpy.array([1., 1.1, 1.2])
        clr = LinearRegression(fit_intercept=True)
        clr.fit(X, Y)
        clq = QuantileLinearRegression(verbose=False, fit_intercept=True)
        clq.fit(X, Y)
        self.assertNotEqual(clr.intercept_, 0)
        self.assertNotEqual(clq.intercept_, 0)
        self.assertEqualArray(clr.intercept_, clq.intercept_)
        self.assertEqualArray(clr.coef_, clq.coef_)

    @unittest.skipIf(compare_module_version(sklver, "0.24") == -1,
                     reason="positive was introduce in 0.24")
    def test_quantile_regression_intercept_positive(self):
        X = numpy.array([[0.1, 0.2], [0.2, 0.3], [0.3, 0.3]])
        Y = numpy.array([1., 1.1, 1.2])
        clr = LinearRegression(fit_intercept=True, positive=True)
        clr.fit(X, Y)
        clq = QuantileLinearRegression(verbose=False,
                                       fit_intercept=True,
                                       positive=True)
        clq.fit(X, Y)
        self.assertNotEqual(clr.intercept_, 0)
        self.assertNotEqual(clq.intercept_, 0)
        self.assertEqualArray(clr.intercept_, clq.intercept_)
        self.assertEqualArray(clr.coef_, clq.coef_)
        self.assertGreater(clr.coef_.min(), 0)
        self.assertGreater(clq.coef_.min(), 0)

    def test_quantile_regression_intercept_weights(self):
        X = numpy.array([[0.1, 0.2], [0.2, 0.3], [0.3, 0.3]])
        Y = numpy.array([1., 1.1, 1.2])
        W = numpy.array([1., 1., 1.])
        clr = LinearRegression(fit_intercept=True)
        clr.fit(X, Y, W)
        clq = QuantileLinearRegression(verbose=False, fit_intercept=True)
        clq.fit(X, Y, W)
        self.assertNotEqual(clr.intercept_, 0)
        self.assertNotEqual(clq.intercept_, 0)
        self.assertEqualArray(clr.intercept_, clq.intercept_)
        self.assertEqualArray(clr.coef_, clq.coef_)

    def test_quantile_regression_diff(self):
        X = numpy.array([[0.1], [0.2], [0.3], [0.4], [0.5]])
        Y = numpy.array([1., 1.1, 1.2, 10, 1.4])
        clr = LinearRegression(fit_intercept=True)
        clr.fit(X, Y)
        clq = QuantileLinearRegression(verbose=False, fit_intercept=True)
        clq.fit(X, Y)
        self.assertNotEqual(clr.intercept_, 0)
        self.assertNotEqual(clq.intercept_, 0)
        self.assertNotEqualArray(clr.coef_, clq.coef_)
        self.assertNotEqualArray(clr.intercept_, clq.intercept_)
        self.assertLesser(clq.n_iter_, 10)

    def test_quantile_regression_pandas(self):
        X = pandas.DataFrame(numpy.array([[0.1, 0.2], [0.2, 0.3]]))
        Y = numpy.array([1., 1.1])
        clr = LinearRegression(fit_intercept=False)
        clr.fit(X, Y)
        clq = QuantileLinearRegression(fit_intercept=False)
        clq.fit(X, Y)
        self.assertEqual(clr.intercept_, 0)
        self.assertEqualArray(clr.coef_, clq.coef_)
        self.assertEqual(clq.intercept_, 0)
        self.assertEqualArray(clr.intercept_, clq.intercept_)

    def test_quantile_regression_list(self):
        X = [[0.1, 0.2], [0.2, 0.3]]
        Y = numpy.array([1., 1.1])
        clq = QuantileLinearRegression(fit_intercept=False)
        self.assertRaise(lambda: clq.fit(X, Y), TypeError)

    def test_quantile_regression_list2(self):
        X = random(1000)
        eps1 = (random(900) - 0.5) * 0.1
        eps2 = random(100) * 2
        eps = numpy.hstack([eps1, eps2])
        X = X.reshape((1000, 1))  # pylint: disable=E1101
        Y = X * 3.4 + 5.6 + eps

        clq = QuantileLinearRegression(verbose=False, fit_intercept=True)
        self.assertRaise(lambda: clq.fit(X, Y), ValueError)

        Y = X.ravel() * 3.4 + 5.6 + eps

        clq = QuantileLinearRegression(verbose=False, fit_intercept=True)
        clq.fit(X, Y)

        clr = LinearRegression(fit_intercept=True)
        clr.fit(X, Y)

        self.assertNotEqual(clr.intercept_, 0)
        self.assertNotEqual(clq.intercept_, 0)
        self.assertNotEqualArray(clr.coef_, clq.coef_)
        self.assertNotEqualArray(clr.intercept_, clq.intercept_)
        self.assertLesser(clq.n_iter_, 10)

        pr = clr.predict(X)
        pq = clq.predict(X)
        self.assertEqual(pr.shape, pq.shape)

    def test_quantile_regression_pickle(self):
        X = random(100)
        eps1 = (random(90) - 0.5) * 0.1
        eps2 = random(10) * 2
        eps = numpy.hstack([eps1, eps2])
        X = X.reshape((100, 1))  # pylint: disable=E1101
        Y = X.ravel() * 3.4 + 5.6 + eps
        test_sklearn_pickle(lambda: LinearRegression(), X, Y)
        test_sklearn_pickle(lambda: QuantileLinearRegression(), X, Y)

    def test_quantile_regression_clone(self):
        test_sklearn_clone(lambda: QuantileLinearRegression(delta=0.001))

    def test_quantile_regression_grid_search(self):
        X = random(100)
        eps1 = (random(90) - 0.5) * 0.1
        eps2 = random(10) * 2
        eps = numpy.hstack([eps1, eps2])
        X = X.reshape((100, 1))  # pylint: disable=E1101
        Y = X.ravel() * 3.4 + 5.6 + eps
        self.assertRaise(
            lambda: test_sklearn_grid_search_cv(
                lambda: QuantileLinearRegression(), X, Y),
            (ValueError, TypeError))
        res = test_sklearn_grid_search_cv(lambda: QuantileLinearRegression(),
                                          X,
                                          Y,
                                          delta=[0.1, 0.001])
        self.assertIn('model', res)
        self.assertIn('score', res)
        self.assertGreater(res['score'], 0)
        self.assertLesser(res['score'], 1)

    def test_quantile_regression_diff_quantile(self):
        X = numpy.array([[0.1], [0.2], [0.3], [0.4], [0.5], [0.6]])
        Y = numpy.array([1., 1.11, 1.21, 10, 1.29, 1.39])
        clqs = []
        scores = []
        for q in [0.25, 0.4999, 0.5, 0.5001, 0.75]:
            clq = QuantileLinearRegression(verbose=False,
                                           fit_intercept=True,
                                           quantile=q)
            clq.fit(X, Y)
            clqs.append(clq)
            sc = clq.score(X, Y)
            scores.append(sc)
            self.assertGreater(sc, 0)

        self.assertLesser(abs(clqs[1].intercept_ - clqs[2].intercept_), 0.01)
        self.assertLesser(abs(clqs[2].intercept_ - clqs[3].intercept_), 0.01)
        self.assertLesser(abs(clqs[1].coef_[0] - clqs[2].coef_[0]), 0.01)
        self.assertLesser(abs(clqs[2].coef_[0] - clqs[3].coef_[0]), 0.01)

        self.assertGreater(abs(clqs[0].intercept_ - clqs[1].intercept_), 0.01)
        # self.assertGreater(abs(clqs[3].intercept_ - clqs[4].intercept_), 0.01)
        self.assertGreater(abs(clqs[0].coef_[0] - clqs[1].coef_[0]), 0.05)
        # self.assertGreater(abs(clqs[3].coef_[0] - clqs[4].coef_[0]), 0.05)

        self.assertLesser(abs(scores[1] - scores[2]), 0.01)
        self.assertLesser(abs(scores[2] - scores[3]), 0.01)

    def test_quantile_regression_quantile_check(self):
        n = 100
        X = (numpy.arange(n) / n)
        Y = X + X * X / n
        X = X.reshape((n, 1))
        for q in [0.1, 0.5, 0.9]:
            clq = QuantileLinearRegression(verbose=False,
                                           fit_intercept=True,
                                           quantile=q,
                                           max_iter=10)
            clq.fit(X, Y)
            y = clq.predict(X)
            diff = y - Y
            sign = numpy.sign(diff)  # pylint: disable=E1111
            pos = (sign > 0).sum()  # pylint: disable=W0143
            neg = (sign < 0).sum()  # pylint: disable=W0143
            if q < 0.5:
                self.assertGreater(neg, pos * 4)
            if q > 0.5:
                self.assertLesser(neg * 7, pos)

    def test_float_sign(self):
        self.assertEqual(float_sign(-1), -1)
        self.assertEqual(float_sign(1), 1)
        self.assertEqual(float_sign(1e-16), 0)

    def test_quantile_regression_intercept_D2(self):
        X = numpy.array([[0.1, 0.2], [0.2, 0.3], [0.3, 0.3]])
        Y = numpy.array([[1., 0.], [1.1, 0.1], [1.2, 0.19]])
        clr = LinearRegression(fit_intercept=True)
        clr.fit(X, Y)
        clq = QuantileLinearRegression(verbose=False, fit_intercept=True)
        self.assertRaise(lambda: clq.fit(X, Y), ValueError)
Esempio n. 22
0
class TestDocumentationExampleTrainingTorch(ExtTestCase):
    @unittest.skipIf(compare_module_version(mlp_version, "0.7.1642") <= 0,
                     reason="plot_onnx was updated.")
    @unittest.skipIf(torch is None, reason="torch is missing")
    @unittest.skipIf(ortt is None,
                     reason="onnxruntime-training not installed.")
    @skipif_circleci("stuck")
    @skipif_appveyor("too long")
    def test_documentation_examples_training_torch(self):

        this = os.path.abspath(os.path.dirname(__file__))
        onxc = os.path.normpath(os.path.join(this, '..', '..'))
        pypath = os.environ.get('PYTHONPATH', None)
        sep = ";" if sys.platform == 'win32' else ':'
        pypath = "" if pypath in (None, "") else (pypath + sep)
        pypath += onxc
        os.environ['PYTHONPATH'] = pypath
        fold = os.path.normpath(
            os.path.join(this, '..', '..', '_doc', 'examples'))
        found = os.listdir(fold)
        tested = 0
        for name in sorted(found):
            if 'training' not in name:
                continue
            if "torch" not in name:
                continue
            if not name.startswith("plot_") or not name.endswith(".py"):
                continue

            with self.subTest(name=name):
                if __name__ == "__main__" or "-v" in sys.argv:
                    print("%s: run %r" %
                          (datetime.now().strftime("%d-%m-%y %H:%M:%S"), name))
                sys.path.insert(0, fold)
                try:
                    mod = import_source(fold, os.path.splitext(name)[0])
                    assert mod is not None
                except FileNotFoundError:
                    # try another way
                    cmds = [sys.executable, "-u", os.path.join(fold, name)]
                    p = subprocess.Popen(cmds,
                                         stdout=subprocess.PIPE,
                                         stderr=subprocess.PIPE)
                    res = p.communicate()
                    _, err = res
                    st = err.decode('ascii', errors='ignore')
                    if "Non-zero status code returned while " in st:
                        raise RuntimeError(  # pylint: disable=W0707
                            "Example '{}' (cmd: {} - exec_prefix="
                            "'{}') failed due to C error\n{}"
                            "".format(name, cmds, sys.exec_prefix, st))
                    if len(st) > 0 and 'Traceback' in st:
                        if "No such file or directory: 'dot': 'dot'" in st:
                            # dot not installed, this part
                            # is tested in onnx framework
                            pass
                        elif '"dot" not found in path.' in st:
                            # dot not installed, this part
                            # is tested in onnx framework
                            pass
                        elif ('Please fix either the inputs or '
                              'the model.') in st:
                            # onnxruntime datasets changed in master
                            # branch, still the same in released
                            # version on pypi
                            pass
                        elif 'dot: graph is too large' in st:
                            # graph is too big
                            pass
                        else:
                            raise RuntimeError(  # pylint: disable=W0707
                                "Example '{}' (cmd: {} - exec_prefix="
                                "'{}') failed due to\n{}"
                                "".format(name, cmds, sys.exec_prefix, st))
                except (Exception, SystemExit):
                    raise RuntimeError(  # pylint: disable=W0707
                        "Example* '{}' (cmd: {} - exec_prefix="
                        "'{}') failed due to\n{}"
                        "".format(name, cmds, sys.exec_prefix, st))
                finally:
                    if sys.path[0] == fold:
                        del sys.path[0]
                with open(os.path.join(os.path.dirname(__file__),
                                       "_test_example.txt"),
                          "a",
                          encoding='utf-8') as f:
                    f.write(name + "\n")
                tested += 1
        if tested == 0:
            raise RuntimeError("No example was tested.")
class TestDecisionTreeExperimentLinear(ExtTestCase):

    @unittest.skipIf(compare_module_version(sklearn.__version__, "0.21") < 0,
                     reason="Only implemented for Criterion API from sklearn >= 0.21")
    def test_criterions(self):
        X = numpy.array([[10., 12., 13.]]).T
        y = numpy.array([20., 22., 23.])
        c1 = MSE(1, X.shape[0])
        c2 = LinearRegressorCriterion(X)
        self.assertNotEmpty(c1)
        self.assertNotEmpty(c2)
        w = numpy.ones((y.shape[0],))
        self.assertEqual(w.sum(), X.shape[0])
        ind = numpy.arange(y.shape[0]).astype(numpy.int64)
        ys = y.astype(float).reshape((y.shape[0], 1))
        _test_criterion_init(c1, ys, w, 1., ind, 0, y.shape[0])
        _test_criterion_init(c2, ys, w, 1., ind, 0, y.shape[0])
        # https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/tree/_criterion.pyx#L886
        v1 = _test_criterion_node_value(c1)
        v2 = _test_criterion_node_value(c2)
        self.assertEqual(v1, v2)
        i1 = _test_criterion_node_impurity(c1)
        i2 = _test_criterion_node_impurity(c2)
        self.assertGreater(i1, i2)
        self.assertGreater(i2, 0)
        p1 = _test_criterion_proxy_impurity_improvement(c1)
        p2 = _test_criterion_proxy_impurity_improvement(c2)
        self.assertTrue(numpy.isnan(p1), numpy.isnan(p2))

        X = numpy.array([[1., 2., 3.]]).T
        y = numpy.array([1., 2., 3.])
        c1 = MSE(1, X.shape[0])
        c2 = LinearRegressorCriterion(X)
        w = numpy.ones((y.shape[0],))
        ind = numpy.arange(y.shape[0]).astype(numpy.int64)
        ys = y.astype(float).reshape((y.shape[0], 1))
        _test_criterion_init(c1, ys, w, 1., ind, 0, y.shape[0])
        _test_criterion_init(c2, ys, w, 1., ind, 0, y.shape[0])
        i1 = _test_criterion_node_impurity(c1)
        i2 = _test_criterion_node_impurity(c2)
        self.assertGreater(i1, i2)
        v1 = _test_criterion_node_value(c1)
        v2 = _test_criterion_node_value(c2)
        self.assertEqual(v1, v2)
        p1 = _test_criterion_proxy_impurity_improvement(c1)
        p2 = _test_criterion_proxy_impurity_improvement(c2)
        self.assertTrue(numpy.isnan(p1), numpy.isnan(p2))

        X = numpy.array([[1., 2., 10., 11.]]).T
        y = numpy.array([0.9, 1.1, 1.9, 2.1])
        c1 = MSE(1, X.shape[0])
        c2 = LinearRegressorCriterion(X)
        w = numpy.ones((y.shape[0],))
        ind = numpy.arange(y.shape[0]).astype(numpy.int64)
        ys = y.astype(float).reshape((y.shape[0], 1))
        _test_criterion_init(c1, ys, w, 1., ind, 0, y.shape[0])
        _test_criterion_init(c2, ys, w, 1., ind, 0, y.shape[0])
        i1 = _test_criterion_node_impurity(c1)
        i2 = _test_criterion_node_impurity(c2)
        self.assertGreater(i1, i2)
        v1 = _test_criterion_node_value(c1)
        v2 = _test_criterion_node_value(c2)
        self.assertEqual(v1, v2)
        p1 = _test_criterion_proxy_impurity_improvement(c1)
        p2 = _test_criterion_proxy_impurity_improvement(c2)
        self.assertTrue(numpy.isnan(p1), numpy.isnan(p2))

        X = numpy.array([[1., 2., 10., 11.]]).T
        y = numpy.array([0.9, 1.1, 1.9, 2.1])
        c1 = MSE(1, X.shape[0])
        c2 = LinearRegressorCriterion(X)
        w = numpy.ones((y.shape[0],))
        ind = numpy.array([0, 3, 2, 1], dtype=ind.dtype)
        ys = y.astype(float).reshape((y.shape[0], 1))
        _test_criterion_init(c1, ys, w, 1., ind, 1, y.shape[0])
        _test_criterion_init(c2, ys, w, 1., ind, 1, y.shape[0])
        i1 = _test_criterion_node_impurity(c1)
        i2 = _test_criterion_node_impurity(c2)
        self.assertGreater(i1, i2)
        v1 = _test_criterion_node_value(c1)
        v2 = _test_criterion_node_value(c2)
        self.assertEqual(v1, v2)
        p1 = _test_criterion_proxy_impurity_improvement(c1)
        p2 = _test_criterion_proxy_impurity_improvement(c2)
        self.assertTrue(numpy.isnan(p1), numpy.isnan(p2))

        for i in range(2, 4):
            _test_criterion_update(c1, i)
            _test_criterion_update(c2, i)
            left1, right1 = _test_criterion_node_impurity_children(c1)
            left2, right2 = _test_criterion_node_impurity_children(c2)
            self.assertGreater(left1, left2)
            self.assertGreater(right1, right2)
            v1 = _test_criterion_node_value(c1)
            v2 = _test_criterion_node_value(c2)
            self.assertEqual(v1, v2)
            p1 = _test_criterion_impurity_improvement(c1, 0.)
            p2 = _test_criterion_impurity_improvement(c2, 0.)
            self.assertGreater(p1, p2 - 1.)

            dest = numpy.empty((2, ))
            c2.node_beta(dest)
            self.assertGreater(dest[0], 0)
            self.assertGreater(dest[1], 0)

    @unittest.skipIf(compare_module_version(sklearn.__version__, "0.21") < 0,
                     reason="Only implemented for Criterion API from sklearn >= 0.21")
    def test_criterions_check_value(self):
        X = numpy.array([[10., 12., 13.]]).T
        y = numpy.array([[20., 22., 23.]]).T
        c2 = LinearRegressorCriterion.create(X, y)
        coef = numpy.empty((3, ))
        c2.node_beta(coef)
        self.assertEqual(coef[:2], numpy.array([1, 10]))

    @unittest.skipIf(compare_module_version(sklearn.__version__, "0.21") < 0,
                     reason="Only implemented for Criterion API from sklearn >= 0.21")
    def test_decision_tree_criterion(self):
        X = numpy.array([[1., 2., 10., 11.]]).T
        y = numpy.array([0.9, 1.1, 1.9, 2.1])
        clr1 = DecisionTreeRegressor(max_depth=1)
        clr1.fit(X, y)
        p1 = clr1.predict(X)

        crit = LinearRegressorCriterion(X)
        clr2 = DecisionTreeRegressor(criterion=crit, max_depth=1)
        clr2.fit(X, y)
        p2 = clr2.predict(X)
        self.assertEqual(p1, p2)
        self.assertEqual(clr1.tree_.node_count, clr2.tree_.node_count)

    @unittest.skipIf(compare_module_version(sklearn.__version__, "0.21") < 0,
                     reason="Only implemented for Criterion API from sklearn >= 0.21")
    def test_decision_tree_criterion_iris(self):
        iris = datasets.load_iris()
        X, y = iris.data, iris.target
        clr1 = DecisionTreeRegressor()
        clr1.fit(X, y)
        p1 = clr1.predict(X)
        clr2 = DecisionTreeRegressor(criterion=LinearRegressorCriterion(X))
        clr2.fit(X, y)
        p2 = clr2.predict(X)
        self.assertEqual(p1.shape, p2.shape)

    @unittest.skipIf(compare_module_version(sklearn.__version__, "0.21") < 0,
                     reason="Only implemented for Criterion API from sklearn >= 0.21")
    def test_decision_tree_criterion_iris_dtc(self):
        iris = datasets.load_iris()
        X, y = iris.data, iris.target
        clr1 = DecisionTreeRegressor()
        clr1.fit(X, y)
        p1 = clr1.predict(X)
        clr2 = PiecewiseTreeRegressor(criterion='mselin')
        clr2.fit(X, y)
        p2 = clr2.predict(X)
        self.assertEqual(p1.shape, p2.shape)
        self.assertTrue(hasattr(clr2, 'betas_'))
        self.assertTrue(hasattr(clr2, 'leaves_mapping_'))
        self.assertEqual(len(clr2.leaves_index_), clr2.tree_.n_leaves)
        self.assertEqual(len(clr2.leaves_mapping_), clr2.tree_.n_leaves)
        self.assertEqual(clr2.betas_.shape[1], X.shape[1] + 1)
        self.assertEqual(clr2.betas_.shape[0], clr2.tree_.n_leaves)
        sc1 = clr1.score(X, y)
        sc2 = clr2.score(X, y)
        self.assertGreater(sc1, sc2)

    @unittest.skipIf(compare_module_version(sklearn.__version__, "0.21") < 0,
                     reason="Only implemented for Criterion API from sklearn >= 0.21")
    def test_decision_tree_criterion_iris_dtc_traintest(self):
        iris = datasets.load_iris()
        X, y = iris.data, iris.target
        X_train, X_test, y_train, y_test = train_test_split(X, y)
        clr1 = DecisionTreeRegressor()
        clr1.fit(X_train, y_train)
        p1 = clr1.predict(X_train)
        clr2 = PiecewiseTreeRegressor(criterion='mselin')
        clr2.fit(X_train, y_train)
        p2 = clr2.predict(X_train)
        self.assertEqual(p1.shape, p2.shape)
        self.assertTrue(hasattr(clr2, 'betas_'))
        self.assertTrue(hasattr(clr2, 'leaves_mapping_'))
        self.assertEqual(len(clr2.leaves_index_), clr2.tree_.n_leaves)
        self.assertEqual(len(clr2.leaves_mapping_), clr2.tree_.n_leaves)
        self.assertEqual(clr2.betas_.shape[1], X.shape[1] + 1)
        self.assertEqual(clr2.betas_.shape[0], clr2.tree_.n_leaves)
        sc1 = clr1.score(X_test, y_test)
        sc2 = clr2.score(X_test, y_test)
        self.assertGreater(abs(sc1 - sc2), -0.1)
# -*- coding: utf-8 -*-
"""
@brief      test log(time=10s)
"""
import unittest
import numpy
import sklearn
from sklearn.tree._criterion import MSE  # pylint: disable=E0611
from sklearn.tree import DecisionTreeRegressor
from sklearn import datasets
from sklearn.model_selection import train_test_split
from pyquickhelper.pycode import ExtTestCase
from pyquickhelper.texthelper import compare_module_version
from mlinsights.mlmodel.piecewise_tree_regression import PiecewiseTreeRegressor

if compare_module_version(sklearn.__version__, "0.21") >= 0:  # noqa
    from mlinsights.mlmodel._piecewise_tree_regression_common import _test_criterion_init  # pylint: disable=E0611, E0401
    from mlinsights.mlmodel._piecewise_tree_regression_common import _test_criterion_node_impurity  # pylint: disable=E0611, E0401
    from mlinsights.mlmodel._piecewise_tree_regression_common import _test_criterion_node_impurity_children  # pylint: disable=E0611, E0401
    from mlinsights.mlmodel._piecewise_tree_regression_common import _test_criterion_update  # pylint: disable=E0611, E0401
    from mlinsights.mlmodel._piecewise_tree_regression_common import _test_criterion_node_value  # pylint: disable=E0611, E0401
    from mlinsights.mlmodel._piecewise_tree_regression_common import _test_criterion_proxy_impurity_improvement  # pylint: disable=E0611, E0401
    from mlinsights.mlmodel._piecewise_tree_regression_common import _test_criterion_impurity_improvement  # pylint: disable=E0611, E0401
    from mlinsights.mlmodel.piecewise_tree_regression_criterion_linear import LinearRegressorCriterion  # pylint: disable=E0611, E0401


class TestDecisionTreeExperimentLinear(ExtTestCase):

    @unittest.skipIf(compare_module_version(sklearn.__version__, "0.21") < 0,
                     reason="Only implemented for Criterion API from sklearn >= 0.21")
    def test_criterions(self):
 def test_sklver(self):
     self.assertTrue(compare_module_version(sklver, "0.22") >= 0)
class TestPiecewiseDecisionTreeExperimentFast(ExtTestCase):

    def test_criterions(self):
        X = numpy.array([[1., 2.]]).T
        y = numpy.array([1., 2.])
        c1 = MSE(1, X.shape[0])
        c2 = SimpleRegressorCriterionFast(X)
        self.assertNotEmpty(c1)
        self.assertNotEmpty(c2)
        w = numpy.ones((y.shape[0],))
        self.assertEqual(w.sum(), X.shape[0])
        ind = numpy.arange(y.shape[0]).astype(numpy.int64)
        ys = y.astype(float).reshape((y.shape[0], 1))
        _test_criterion_init(c1, ys, w, 1., ind, 0, y.shape[0])
        _test_criterion_init(c2, ys, w, 1., ind, 0, y.shape[0])
        assert_criterion_equal(c1, c2)
        # https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/tree/_criterion.pyx#L886
        v1 = _test_criterion_node_value(c1)
        v2 = _test_criterion_node_value(c2)
        self.assertEqual(v1, v2)
        i1 = _test_criterion_node_impurity(c1)
        i2 = _test_criterion_node_impurity(c2)
        self.assertEqual(i1, i2)
        p1 = _test_criterion_proxy_impurity_improvement(c1)
        p2 = _test_criterion_proxy_impurity_improvement(c2)
        self.assertTrue(numpy.isnan(p1), numpy.isnan(p2))

        X = numpy.array([[1., 2., 3.]]).T
        y = numpy.array([1., 2., 3.])
        c1 = MSE(1, X.shape[0])
        c2 = SimpleRegressorCriterionFast(X)
        w = numpy.ones((y.shape[0],))
        ind = numpy.arange(y.shape[0]).astype(numpy.int64)
        ys = y.astype(float).reshape((y.shape[0], 1))
        _test_criterion_init(c1, ys, w, 1., ind, 0, y.shape[0])
        _test_criterion_init(c2, ys, w, 1., ind, 0, y.shape[0])
        assert_criterion_equal(c1, c2)
        i1 = _test_criterion_node_impurity(c1)
        i2 = _test_criterion_node_impurity(c2)
        self.assertAlmostEqual(i1, i2)
        v1 = _test_criterion_node_value(c1)
        v2 = _test_criterion_node_value(c2)
        self.assertEqual(v1, v2)
        p1 = _test_criterion_proxy_impurity_improvement(c1)
        p2 = _test_criterion_proxy_impurity_improvement(c2)
        self.assertTrue(numpy.isnan(p1), numpy.isnan(p2))

        X = numpy.array([[1., 2., 10., 11.]]).T
        y = numpy.array([0.9, 1.1, 1.9, 2.1])
        c1 = MSE(1, X.shape[0])
        c2 = SimpleRegressorCriterionFast(X)
        w = numpy.ones((y.shape[0],))
        ind = numpy.arange(y.shape[0]).astype(numpy.int64)
        ys = y.astype(float).reshape((y.shape[0], 1))
        _test_criterion_init(c1, ys, w, 1., ind, 0, y.shape[0])
        _test_criterion_init(c2, ys, w, 1., ind, 0, y.shape[0])
        i1 = _test_criterion_node_impurity(c1)
        i2 = _test_criterion_node_impurity(c2)
        self.assertAlmostEqual(i1, i2)
        v1 = _test_criterion_node_value(c1)
        v2 = _test_criterion_node_value(c2)
        self.assertEqual(v1, v2)
        p1 = _test_criterion_proxy_impurity_improvement(c1)
        p2 = _test_criterion_proxy_impurity_improvement(c2)
        self.assertTrue(numpy.isnan(p1), numpy.isnan(p2))

        for i in range(1, 4):
            _test_criterion_update(c1, i)
            _test_criterion_update(c2, i)
            assert_criterion_equal(c1, c2)
            left1, right1 = _test_criterion_node_impurity_children(c1)
            left2, right2 = _test_criterion_node_impurity_children(c2)
            self.assertAlmostEqual(left1, left2)
            self.assertAlmostEqual(right1, right2)
            v1 = _test_criterion_node_value(c1)
            v2 = _test_criterion_node_value(c2)
            self.assertEqual(v1, v2)
            assert_criterion_equal(c1, c2)
            try:
                # scikit-learn >= 0.24
                p1 = _test_criterion_impurity_improvement(
                    c1, 0., left1, right1)
                p2 = _test_criterion_impurity_improvement(
                    c2, 0., left2, right2)
            except TypeError:
                # scikit-learn < 0.23
                p1 = _test_criterion_impurity_improvement(c1, 0.)
                p2 = _test_criterion_impurity_improvement(c2, 0.)

            self.assertAlmostEqual(p1, p2)

        X = numpy.array([[1., 2., 10., 11.]]).T
        y = numpy.array([0.9, 1.1, 1.9, 2.1])
        c1 = MSE(1, X.shape[0])
        c2 = SimpleRegressorCriterionFast(X)
        w = numpy.ones((y.shape[0],))
        ind = numpy.array([0, 3, 2, 1], dtype=ind.dtype)
        ys = y.astype(float).reshape((y.shape[0], 1))
        _test_criterion_init(c1, ys, w, 1., ind, 1, y.shape[0])
        _test_criterion_init(c2, ys, w, 1., ind, 1, y.shape[0])
        i1 = _test_criterion_node_impurity(c1)
        i2 = _test_criterion_node_impurity(c2)
        self.assertAlmostEqual(i1, i2)
        v1 = _test_criterion_node_value(c1)
        v2 = _test_criterion_node_value(c2)
        self.assertEqual(v1, v2)
        p1 = _test_criterion_proxy_impurity_improvement(c1)
        p2 = _test_criterion_proxy_impurity_improvement(c2)
        self.assertTrue(numpy.isnan(p1), numpy.isnan(p2))

        for i in range(2, 4):
            _test_criterion_update(c1, i)
            _test_criterion_update(c2, i)
            left1, right1 = _test_criterion_node_impurity_children(c1)
            left2, right2 = _test_criterion_node_impurity_children(c2)
            self.assertAlmostEqual(left1, left2)
            self.assertAlmostEqual(right1, right2)
            v1 = _test_criterion_node_value(c1)
            v2 = _test_criterion_node_value(c2)
            self.assertEqual(v1, v2)
            try:
                # scikit-learn >= 0.24
                p1 = _test_criterion_impurity_improvement(
                    c1, 0., left1, right1)
                p2 = _test_criterion_impurity_improvement(
                    c2, 0., left2, right2)
            except TypeError:
                # scikit-learn < 0.23
                p1 = _test_criterion_impurity_improvement(c1, 0.)
                p2 = _test_criterion_impurity_improvement(c2, 0.)
            self.assertAlmostEqual(p1, p2)

    @unittest.skipIf(compare_module_version(sklearn.__version__, "0.21") < 0,
                     reason="Only implemented for Criterion API from sklearn >= 0.21")
    def test_decision_tree_criterion(self):
        X = numpy.array([[1., 2., 10., 11.]]).T
        y = numpy.array([0.9, 1.1, 1.9, 2.1])
        clr1 = DecisionTreeRegressor(max_depth=1)
        clr1.fit(X, y)
        p1 = clr1.predict(X)

        crit = SimpleRegressorCriterionFast(X)
        clr2 = DecisionTreeRegressor(criterion=crit, max_depth=1)
        clr2.fit(X, y)
        p2 = clr2.predict(X)
        self.assertEqual(p1, p2)
        self.assertEqual(clr1.tree_.node_count, clr2.tree_.node_count)

    def test_decision_tree_criterion_iris(self):
        iris = datasets.load_iris()
        X, y = iris.data, iris.target
        clr1 = DecisionTreeRegressor()
        clr1.fit(X, y)
        p1 = clr1.predict(X)
        clr2 = DecisionTreeRegressor(criterion=SimpleRegressorCriterionFast(X))
        clr2.fit(X, y)
        p2 = clr2.predict(X)
        self.assertEqual(p1[:10], p2[:10])

    def test_decision_tree_criterion_iris_dtc(self):
        iris = datasets.load_iris()
        X, y = iris.data, iris.target
        clr1 = DecisionTreeRegressor()
        clr1.fit(X, y)
        p1 = clr1.predict(X)
        clr2 = PiecewiseTreeRegressor(criterion='simple')
        clr2.fit(X, y)
        p2 = clr2.predict(X)
        self.assertEqual(p1[:10], p2[:10])
Esempio n. 27
0
class TestOnnxrtPythonRuntimeCustom(ExtTestCase):
    @classmethod
    def setUpClass(cls):
        pass

    @classmethod
    def tearDownClass(cls):
        if __name__ == "__main__":
            import pprint
            print('\n-----------')
            pprint.pprint(list(sorted({_.__name__ for _ in python_tested})))
            print('-----------')

    def setUp(self):
        logger = getLogger('skl2onnx')
        logger.disabled = True

    @ignore_warnings(DeprecationWarning)
    def test_onnxt_runtime_cdist(self):
        for metric in ['sqeuclidean', 'euclidean']:
            with self.subTest(metric=metric):
                X = numpy.array([[2, 1], [0, 1]], dtype=float)
                Y = numpy.array([[2, 1, 5], [0, 1, 3]], dtype=float).T
                Z = cdist(X, Y, metric=metric)

                onx = OnnxCDist('X',
                                'Y',
                                output_names=['Z'],
                                metric=metric,
                                op_version=TARGET_OPSET)
                model_def = onx.to_onnx(
                    {
                        'X': X.astype(numpy.float32),
                        'Y': Y.astype(numpy.float32)
                    },
                    outputs={'Z': Z.astype(numpy.float32)},
                    target_opset=TARGET_OPSET)
                self.assertIn('s: "%s"' % metric, str(model_def))
                oinf = OnnxInference(model_def)
                got = oinf.run({'X': X, 'Y': Y})
                self.assertEqual(list(sorted(got)), ['Z'])
                self.assertEqualArray(Z, got['Z'], decimal=6)

                oinfpy = OnnxInference(model_def,
                                       runtime="python",
                                       inplace=True)
                validate_python_inference(oinfpy, {
                    'X': X.astype(numpy.float32),
                    'Y': Y.astype(numpy.float32)
                },
                                          tolerance=1e-6)
        python_tested.append(OnnxCDist)

    @unittest.skipIf(
        compare_module_version(skl2onnx.__version__, "1.9.1") <= 0,
        reason="Missing complex support.")
    @ignore_warnings(DeprecationWarning)
    def test_onnxt_runtime_complex_abs(self):
        for dtype in [numpy.complex64, numpy.complex128]:
            with self.subTest(dtype=dtype):
                X = numpy.array([[2, 1j], [0, 1j]], dtype=dtype)
                Z = numpy.absolute(X)

                onx = OnnxComplexAbs('X',
                                     output_names=['Z'],
                                     op_version=TARGET_OPSET)
                model_def = onx.to_onnx({'X': X},
                                        outputs={'Z': Z},
                                        target_opset=TARGET_OPSET)
                oinf = OnnxInference(model_def)
                got = oinf.run({'X': X})
                self.assertEqual(list(sorted(got)), ['Z'])
                self.assertEqualArray(Z, got['Z'], decimal=6)

                oinfpy = OnnxInference(model_def,
                                       runtime="python",
                                       inplace=True)
                validate_python_inference(oinfpy, {'X': X}, tolerance=1e-6)
                python_tested.append(OnnxComplexAbs)

    @unittest.skipIf(
        compare_module_version(skl2onnx.__version__, "1.9.1") <= 0,
        reason="Missing complex support.")
    @ignore_warnings(DeprecationWarning)
    def test_onnxt_runtime_fft(self):
        for dim in [1, 2]:
            for axis in [-1, 0, 1]:
                if axis >= dim:
                    continue
                with self.subTest(dim=dim, axis=axis):
                    if dim == 1:
                        X = numpy.arange(16).astype(numpy.float32)
                    elif dim == 2:
                        X = numpy.arange(48).astype(numpy.float32).reshape(
                            (3, -1))
                    Y = numpy.fft.fft(X.astype(numpy.float32), axis=axis)

                    onx = OnnxFFT('X',
                                  output_names=['Y'],
                                  axis=axis,
                                  op_version=TARGET_OPSET)
                    model_def = onx.to_onnx({'X': X.astype(numpy.float32)},
                                            outputs={'Y': Y},
                                            target_opset=TARGET_OPSET)
                    oinf = OnnxInference(model_def)
                    got = oinf.run({'X': X})
                    self.assertEqual(list(sorted(got)), ['Y'])
                    self.assertEqualArray(Y, got['Y'], decimal=6)

                    oinfpy = OnnxInference(model_def,
                                           runtime="python",
                                           inplace=True)
                    validate_python_inference(oinfpy,
                                              {'X': X.astype(numpy.float32)},
                                              tolerance=1e-6)

        for dim in [1, 2]:
            for axis in [-1, 0, 1]:
                if axis >= dim:
                    continue
                with self.subTest(dim=dim, axis=axis, length=8):
                    if dim == 1:
                        X = numpy.arange(16).astype(numpy.float32)
                    elif dim == 2:
                        X = numpy.arange(48).astype(numpy.float32).reshape(
                            (3, -1))
                    Y = numpy.fft.fft(X.astype(numpy.float32), 8, axis=axis)

                    onx = OnnxFFT('X',
                                  numpy.array([8], dtype=numpy.int64),
                                  output_names=['Y'],
                                  axis=axis,
                                  op_version=TARGET_OPSET)
                    model_def = onx.to_onnx({'X': X.astype(numpy.float32)},
                                            outputs={'Y': Y},
                                            target_opset=TARGET_OPSET)
                    oinf = OnnxInference(model_def)
                    got = oinf.run({'X': X})
                    self.assertEqual(list(sorted(got)), ['Y'])
                    self.assertEqualArray(Y, got['Y'], decimal=5)

                    oinfpy = OnnxInference(model_def,
                                           runtime="python",
                                           inplace=True)
                    validate_python_inference(oinfpy,
                                              {'X': X.astype(numpy.float32)},
                                              tolerance=1e-5)
                    python_tested.append(OnnxFFT)

    @unittest.skipIf(
        compare_module_version(skl2onnx.__version__, "1.9.1") <= 0,
        reason="Missing complex support.")
    @ignore_warnings(DeprecationWarning)
    def test_onnxt_runtime_rfft(self):
        for dim in [1, 2]:
            for axis in [-1, 0, 1]:
                if axis >= dim:
                    continue
                with self.subTest(dim=dim, axis=axis):
                    if dim == 1:
                        X = numpy.arange(16).astype(numpy.float32)
                    elif dim == 2:
                        X = numpy.arange(48).astype(numpy.float32).reshape(
                            (3, -1))
                    Y = numpy.fft.rfft(X.astype(numpy.float32), axis=axis)

                    onx = OnnxRFFT('X',
                                   output_names=['Y'],
                                   axis=axis,
                                   op_version=TARGET_OPSET)
                    model_def = onx.to_onnx({'X': X.astype(numpy.float32)},
                                            outputs={'Y': Y},
                                            target_opset=TARGET_OPSET)
                    oinf = OnnxInference(model_def)
                    got = oinf.run({'X': X})
                    self.assertEqual(list(sorted(got)), ['Y'])
                    self.assertEqualArray(Y, got['Y'], decimal=6)

                    oinfpy = OnnxInference(model_def,
                                           runtime="python",
                                           inplace=True)
                    validate_python_inference(oinfpy,
                                              {'X': X.astype(numpy.float32)},
                                              tolerance=1e-6)

        for dim in [1, 2]:
            for axis in [-1, 0, 1]:
                if axis >= dim:
                    continue
                with self.subTest(dim=dim, axis=axis, length=8):
                    if dim == 1:
                        X = numpy.arange(16).astype(numpy.float32)
                    elif dim == 2:
                        X = numpy.arange(48).astype(numpy.float32).reshape(
                            (3, -1))
                    Y = numpy.fft.rfft(X.astype(numpy.float32), 8, axis=axis)

                    onx = OnnxRFFT('X',
                                   numpy.array([8], dtype=numpy.int64),
                                   output_names=['Y'],
                                   axis=axis,
                                   op_version=TARGET_OPSET)
                    try:
                        model_def = onx.to_onnx({'X': X.astype(numpy.float32)},
                                                outputs={'Y': Y},
                                                target_opset=TARGET_OPSET)
                    except NotImplementedError as e:
                        raise AssertionError(
                            "Unable to convert due to %r (version=%r)." %
                            (e, skl2onnx.__version__)) from e
                    oinf = OnnxInference(model_def)
                    got = oinf.run({'X': X})
                    self.assertEqual(list(sorted(got)), ['Y'])
                    self.assertEqualArray(Y, got['Y'], decimal=5)

                    oinfpy = OnnxInference(model_def,
                                           runtime="python",
                                           inplace=True)
                    validate_python_inference(oinfpy,
                                              {'X': X.astype(numpy.float32)},
                                              tolerance=1e-5)
                    python_tested.append(OnnxRFFT)

    @unittest.skipIf(
        compare_module_version(skl2onnx.__version__, "1.9.1") <= 0,
        reason="Missing complex support.")
    @ignore_warnings(DeprecationWarning)
    def test_onnxt_runtime_fft2d(self):
        for dim in [2]:
            for axis in [None, (-2, -1)]:
                with self.subTest(dim=dim, axis=axis):
                    if dim == 1:
                        X = numpy.arange(16).astype(numpy.float32)
                    elif dim == 2:
                        X = numpy.arange(48).astype(numpy.float32).reshape(
                            (3, -1))
                    Y = numpy.fft.fft2(X.astype(numpy.float32), axes=axis)

                    if axis is not None:
                        onx = OnnxFFT2D('X',
                                        output_names=['Y'],
                                        axes=axis,
                                        op_version=TARGET_OPSET)
                    else:
                        onx = OnnxFFT2D('X',
                                        output_names=['Y'],
                                        op_version=TARGET_OPSET)
                    model_def = onx.to_onnx({'X': X.astype(numpy.float32)},
                                            outputs={'Y': Y},
                                            target_opset=TARGET_OPSET)
                    oinf = OnnxInference(model_def)
                    got = oinf.run({'X': X})
                    self.assertEqual(list(sorted(got)), ['Y'])
                    self.assertEqualArray(Y, got['Y'], decimal=5)

                    oinfpy = OnnxInference(model_def,
                                           runtime="python",
                                           inplace=True)
                    validate_python_inference(oinfpy,
                                              {'X': X.astype(numpy.float32)},
                                              tolerance=1e-5)

        for dim in [2]:
            for axis in [None, (-2, -1)]:
                with self.subTest(dim=dim, axis=axis, length=(8, 8)):
                    if dim == 1:
                        X = numpy.arange(16).astype(numpy.float32)
                    elif dim == 2:
                        X = numpy.arange(48).astype(numpy.float32).reshape(
                            (3, -1))
                    Y = numpy.fft.fft2(X.astype(numpy.float32), (8, 8),
                                       axes=axis)

                    if axis is not None:
                        onx = OnnxFFT2D('X',
                                        numpy.array([8, 8], dtype=numpy.int64),
                                        output_names=['Y'],
                                        axes=axis,
                                        op_version=TARGET_OPSET)
                    else:
                        onx = OnnxFFT2D('X',
                                        numpy.array([8, 8], dtype=numpy.int64),
                                        output_names=['Y'],
                                        op_version=TARGET_OPSET)
                    model_def = onx.to_onnx({'X': X.astype(numpy.float32)},
                                            outputs={'Y': Y},
                                            target_opset=TARGET_OPSET)
                    oinf = OnnxInference(model_def)
                    got = oinf.run({'X': X})
                    self.assertEqual(list(sorted(got)), ['Y'])
                    self.assertEqualArray(Y, got['Y'], decimal=5)

                    oinfpy = OnnxInference(model_def,
                                           runtime="python",
                                           inplace=True)
                    validate_python_inference(oinfpy,
                                              {'X': X.astype(numpy.float32)},
                                              tolerance=1e-5)
                    python_tested.append(OnnxRFFT)

    @ignore_warnings(DeprecationWarning)
    def test_onnxt_runtime_solve(self):
        for transposed in [False, True]:
            with self.subTest(transposed=transposed):
                A = numpy.array([[2, 1], [0, 1]], dtype=float)
                Y = numpy.array([2, 1], dtype=float)
                X = solve(A, Y, transposed=transposed)

                onx = OnnxSolve('A',
                                'Y',
                                output_names=['X'],
                                transposed=transposed,
                                op_version=TARGET_OPSET)
                model_def = onx.to_onnx(
                    {
                        'A': A.astype(numpy.float32),
                        'Y': Y.astype(numpy.float32)
                    },
                    outputs={'X': X.astype(numpy.float32)},
                    target_opset=TARGET_OPSET)
                oinf = OnnxInference(model_def)
                got = oinf.run({'A': A, 'Y': Y})
                self.assertEqual(list(sorted(got)), ['X'])
                self.assertEqualArray(X, got['X'], decimal=6)

                python_tested.append(OnnxCDist)
                oinfpy = OnnxInference(model_def,
                                       runtime="python",
                                       inplace=True)
                validate_python_inference(oinfpy, {
                    'A': A.astype(numpy.float32),
                    'Y': Y.astype(numpy.float32)
                })
                python_tested.append(OnnxSolve)

    @ignore_warnings(DeprecationWarning)
    def test_onnxt_runtime_yield_op(self):
        for dtype in [numpy.float32, numpy.float64]:
            with self.subTest(dtype=dtype):
                X = numpy.array([[2, 1], [0, 1]], dtype=dtype)
                Z = X

                onx = OnnxYieldOp('X',
                                  output_names=['Z'],
                                  op_version=TARGET_OPSET)
                model_def = onx.to_onnx({'X': X},
                                        outputs={'Z': Z},
                                        target_opset=TARGET_OPSET)
                oinf = OnnxInference(model_def)
                got = oinf.run({'X': X})
                self.assertEqual(list(sorted(got)), ['Z'])
                self.assertEqualArray(Z, got['Z'], decimal=6)

                oinfpy = OnnxInference(model_def,
                                       runtime="python",
                                       inplace=True)
                validate_python_inference(oinfpy, {'X': X}, tolerance=1e-6)
                python_tested.append(OnnxYieldOp)

    @ignore_warnings(DeprecationWarning)
    def test_onnxt_runtime_broadcast_gradient_args(self):
        X = numpy.array([2, 16, 1024, 1024], dtype=numpy.int64)
        Y = numpy.array([1, 1, 1024, 1024], dtype=numpy.int64)
        Z1 = numpy.array([], dtype=numpy.int64)
        Z2 = numpy.array([1, 0], dtype=numpy.int64)
        onx = OnnxBroadcastGradientArgs('X',
                                        'Y',
                                        output_names=['Z1', 'Z2'],
                                        op_version=TARGET_OPSET)
        model_def = onx.to_onnx({
            'X': X,
            'Y': Y
        },
                                outputs={
                                    'Z1': Z1,
                                    'Z2': Z2
                                },
                                target_opset=TARGET_OPSET)

        oinf = OnnxInference(model_def)
        got = oinf.run({'X': X, 'Y': Y})
        self.assertEqualArray(Z1, got['Z1'])
        self.assertEqualArray(Z2, got['Z2'])

        X = numpy.array([2, 3, 4, 5], dtype=numpy.int64)
        Y = numpy.array([2, 3, 4, 5], dtype=numpy.int64)
        Z1 = numpy.array([], dtype=numpy.int64)
        Z2 = numpy.array([], dtype=numpy.int64)
        oinf = OnnxInference(model_def)
        got = oinf.run({'X': X, 'Y': Y})
        self.assertEqualArray(Z1, got['Z1'])
        self.assertEqualArray(Z2, got['Z2'])

        X = numpy.array([2, 3, 4, 5], dtype=numpy.int64)
        Y = numpy.array([], dtype=numpy.int64)
        Z1 = numpy.array([], dtype=numpy.int64)
        Z2 = numpy.array([3, 2, 1, 0], dtype=numpy.int64)
        oinf = OnnxInference(model_def)
        got = oinf.run({'X': X, 'Y': Y})
        self.assertEqualArray(Z1, got['Z1'])
        self.assertEqualArray(Z2, got['Z2'])

        X = numpy.array([2, 3, 4, 5], dtype=numpy.int64)
        Y = numpy.array([5], dtype=numpy.int64)
        Z1 = numpy.array([], dtype=numpy.int64)
        Z2 = numpy.array([2, 1, 0], dtype=numpy.int64)
        oinf = OnnxInference(model_def)
        got = oinf.run({'X': X, 'Y': Y})
        self.assertEqualArray(Z1, got['Z1'])
        self.assertEqualArray(Z2, got['Z2'])

        X = numpy.array([4, 5], dtype=numpy.int64)
        Y = numpy.array([2, 3, 4, 5], dtype=numpy.int64)
        Z1 = numpy.array([1, 0], dtype=numpy.int64)
        Z2 = numpy.array([], dtype=numpy.int64)
        oinf = OnnxInference(model_def)
        got = oinf.run({'X': X, 'Y': Y})
        self.assertEqualArray(Z1, got['Z1'])
        self.assertEqualArray(Z2, got['Z2'])

        X = numpy.array([1, 4, 5], dtype=numpy.int64)
        Y = numpy.array([2, 3, 1, 1], dtype=numpy.int64)
        Z1 = numpy.array([1, 0], dtype=numpy.int64)
        Z2 = numpy.array([3, 2], dtype=numpy.int64)
        oinf = OnnxInference(model_def)
        got = oinf.run({'X': X, 'Y': Y})
        self.assertEqualArray(Z1, got['Z1'])
        self.assertEqualArray(Z2, got['Z2'])

        X = numpy.array([3, 4, 5], dtype=numpy.int64)
        Y = numpy.array([2, 1, 1, 1], dtype=numpy.int64)
        Z1 = numpy.array([0], dtype=numpy.int64)
        Z2 = numpy.array([3, 2, 1], dtype=numpy.int64)
        oinf = OnnxInference(model_def)
        got = oinf.run({'X': X, 'Y': Y})
        self.assertEqualArray(Z1, got['Z1'])
        self.assertEqualArray(Z2, got['Z2'])

        X = numpy.array([3, 4, 5], dtype=numpy.int64)
        Y = numpy.array([2, 1, 1, 1], dtype=numpy.int64)
        Z1 = numpy.array([0], dtype=numpy.int64)
        Z2 = numpy.array([3, 2, 1], dtype=numpy.int64)
        oinf = OnnxInference(model_def)
        got = oinf.run({'X': X, 'Y': Y})
        self.assertEqualArray(Z1, got['Z1'])
        self.assertEqualArray(Z2, got['Z2'])

        X = numpy.array([2, 16, 1, 1024], dtype=numpy.int64)
        Y = numpy.array([1, 1, 1024, 1024], dtype=numpy.int64)
        Z1 = numpy.array([2], dtype=numpy.int64)
        Z2 = numpy.array([1, 0], dtype=numpy.int64)
        oinf = OnnxInference(model_def)
        got = oinf.run({'X': X, 'Y': Y})
        self.assertEqualArray(Z1, got['Z1'])
        self.assertEqualArray(Z2, got['Z2'])

        X = numpy.array([3, 4, 5], dtype=numpy.int64)
        Y = numpy.array([2, 1, 6, 1], dtype=numpy.int64)
        Z1 = numpy.array([], dtype=numpy.int64)
        Z2 = numpy.array([], dtype=numpy.int64)
        oinf = OnnxInference(model_def)
        self.assertRaise(lambda: oinf.run({'X': X, 'Y': Y}), RuntimeError)

    @ignore_warnings(DeprecationWarning)
    def test_onnxt_runtime_fused_matmul(self):
        idi = numpy.array([[1, 0], [1, 1]], dtype=numpy.float32)
        X = numpy.array([[1, 2], [3, 4]], dtype=numpy.float32)
        Y = numpy.dot(X, idi)

        onx = OnnxFusedMatMul('X',
                              idi,
                              output_names=['Y'],
                              op_version=TARGET_OPSET)
        model_def = onx.to_onnx({'X': idi.astype(numpy.float32)},
                                outputs={'Y': Y},
                                target_opset=TARGET_OPSET)
        oinf = OnnxInference(model_def)
        got = oinf.run({'X': X.astype(numpy.float32)})
        self.assertEqual(list(sorted(got)), ['Y'])
        self.assertEqualArray(numpy.dot(X, idi), got['Y'], decimal=5)

        onx = OnnxFusedMatMul('X',
                              idi,
                              transA=1,
                              transB=1,
                              output_names=['Y'],
                              op_version=TARGET_OPSET)
        model_def = onx.to_onnx({'X': idi.astype(numpy.float32)},
                                outputs={'Y': Y},
                                target_opset=TARGET_OPSET)
        oinf = OnnxInference(model_def)
        got = oinf.run({'X': X.astype(numpy.float32)})
        self.assertEqual(list(sorted(got)), ['Y'])
        self.assertEqualArray(numpy.dot(X.T, idi.T), got['Y'], decimal=5)

        onx = OnnxFusedMatMul('X',
                              idi,
                              transA=1,
                              output_names=['Y'],
                              op_version=TARGET_OPSET)
        model_def = onx.to_onnx({'X': idi.astype(numpy.float32)},
                                outputs={'Y': Y},
                                target_opset=TARGET_OPSET)
        oinf = OnnxInference(model_def)
        got = oinf.run({'X': X.astype(numpy.float32)})
        self.assertEqual(list(sorted(got)), ['Y'])
        self.assertEqualArray(numpy.dot(X.T, idi), got['Y'], decimal=5)

        onx = OnnxFusedMatMul('X',
                              idi,
                              transB=1,
                              output_names=['Y'],
                              op_version=TARGET_OPSET)
        model_def = onx.to_onnx({'X': idi.astype(numpy.float32)},
                                outputs={'Y': Y},
                                target_opset=TARGET_OPSET)
        oinf = OnnxInference(model_def)
        got = oinf.run({'X': X.astype(numpy.float32)})
        self.assertEqual(list(sorted(got)), ['Y'])
        self.assertEqualArray(numpy.dot(X, idi.T), got['Y'], decimal=5)

        onx = OnnxFusedMatMul('X',
                              idi,
                              transB=1,
                              output_names=['Y'],
                              alpha=numpy.float32(1.),
                              op_version=TARGET_OPSET)
        model_def = onx.to_onnx({'X': idi.astype(numpy.float32)},
                                outputs={'Y': Y},
                                target_opset=TARGET_OPSET)
        oinf = OnnxInference(model_def)
        got = oinf.run({'X': X.astype(numpy.float32)})
        self.assertEqual(list(sorted(got)), ['Y'])
        self.assertEqualArray(numpy.dot(X, idi.T), got['Y'], decimal=5)

        onx = OnnxFusedMatMul('X',
                              idi,
                              transB=1,
                              output_names=['Y'],
                              alpha=numpy.float32(1.),
                              op_version=TARGET_OPSET)
        model_def = onx.to_onnx({'X': idi.astype(numpy.float32)},
                                outputs={'Y': Y},
                                target_opset=TARGET_OPSET)
        oinf = OnnxInference(model_def)
        got = oinf.run({'X': X.astype(numpy.float32)})
        self.assertEqual(list(sorted(got)), ['Y'])
        self.assertEqualArray(numpy.dot(X, idi.T), got['Y'], decimal=5)

    @ignore_warnings(DeprecationWarning)
    def test_onnxt_runtime_softmax_grad_13(self):
        G = numpy.array([[-0.1, -0.1, 0.1]], dtype=numpy.float32)
        P = numpy.array([[0.1, 0.3, 0.5]], dtype=numpy.float32)
        Z = numpy.array([[-0.025, -0.015, 0.075]], dtype=numpy.float32)
        onx = OnnxSoftmaxGrad_13('G',
                                 'P',
                                 output_names=['Z'],
                                 op_version=TARGET_OPSET)
        model_def = onx.to_onnx({
            'G': G,
            'P': P
        },
                                outputs={'Z': Z},
                                target_opset=TARGET_OPSET)

        oinf = OnnxInference(model_def)
        got = oinf.run({'G': P, 'P': P})
        self.assertEqualArray(Z, got['Z'])
Esempio n. 28
0
def has_onnxruntime(version):
    try:
        import onnxruntime  # pylint: disable=C0415
        return compare_module_version(onnxruntime.__version__, version) >= 0
    except ImportError:
        return None
Esempio n. 29
0
class TestSklearnGaussianProcess(ExtTestCase):
    def remove_dim1(self, arr):
        new_shape = tuple(v for v in arr.shape if v != 1)
        if new_shape != arr.shape:
            arr = arr.reshape(new_shape)
        return arr

    def check_outputs(self,
                      model,
                      model_onnx,
                      Xtest,
                      predict_attributes,
                      decimal=5,
                      skip_if_float32=False,
                      disable_optimisation=False):
        if predict_attributes is None:
            predict_attributes = {}
        exp = model.predict(Xtest, **predict_attributes)
        runtime_options = dict(disable_optimisation=disable_optimisation)
        sess = OnnxInference(model_onnx, runtime_options=runtime_options)
        got = sess.run({'X': Xtest})
        got = [got[k] for k in sess.output_names]
        if isinstance(exp, tuple):
            if len(exp) != len(got):
                raise AssertionError("Mismatched number of outputs.")
            for i, (e, g) in enumerate(zip(exp, got)):
                if skip_if_float32 and g.dtype == np.float32:
                    continue
                try:
                    assert_almost_equal(self.remove_dim1(e),
                                        self.remove_dim1(g),
                                        decimal=decimal)
                except AssertionError as e:  # noqa
                    raise AssertionError(
                        "Mismatch for output {} and attributes {}"
                        ".".format(i, predict_attributes)) from e
        else:
            if skip_if_float32 and Xtest.dtype == np.float32:
                return
            assert_almost_equal(np.squeeze(exp),
                                np.squeeze(got),
                                decimal=decimal)

    def test_gpr_rbf_unfitted(self):

        se = (C(1.0, (1e-3, 1e3)) *
              RBF(length_scale=10, length_scale_bounds=(1e-3, 1e3)))
        kernel = (Sum(
            se,
            C(0.1, (1e-3, 1e3)) *
            RBF(length_scale=1, length_scale_bounds=(1e-3, 1e3))))

        gp = GaussianProcessRegressor(alpha=1e-7,
                                      kernel=kernel,
                                      n_restarts_optimizer=15,
                                      normalize_y=True)

        # return_cov=False, return_std=False
        model_onnx = to_onnx(gp, initial_types=[('X', FloatTensorType([]))])
        self.assertTrue(model_onnx is not None)
        dump_data_and_model(Xtest_.astype(np.float32),
                            gp,
                            model_onnx,
                            verbose=False,
                            basename="SklearnGaussianProcessRBFUnfitted")

        # return_cov=True, return_std=True
        options = {
            GaussianProcessRegressor: {
                "return_std": True,
                "return_cov": True
            }
        }
        try:
            to_onnx(gp, Xtrain_.astype(np.float32), options=options)
        except RuntimeError as e:
            assert "Not returning standard deviation" in str(e)

        # return_std=True
        options = {GaussianProcessRegressor: {"return_std": True}}
        model_onnx = to_onnx(gp,
                             options=options,
                             initial_types=[('X', FloatTensorType([None,
                                                                   None]))])
        self.assertTrue(model_onnx is not None)
        self.check_outputs(
            gp,
            model_onnx,
            Xtest_.astype(np.float32),
            predict_attributes=options[GaussianProcessRegressor])

        # return_cov=True
        options = {GaussianProcessRegressor: {"return_cov": True}}
        # model_onnx = to_onnx(gp, Xtrain_.astype(np.float32), options=options)
        model_onnx = to_onnx(gp,
                             options=options,
                             initial_types=[('X', FloatTensorType([None,
                                                                   None]))])
        self.assertTrue(model_onnx is not None)
        self.check_outputs(
            gp,
            model_onnx,
            Xtest_.astype(np.float32),
            predict_attributes=options[GaussianProcessRegressor])

    @unittest.skipIf(compare_module_version(skl2_vers, '1.7.1099') <= 0,
                     reason="shape issue")
    def test_gpr_rbf_fitted_true(self):

        gp = GaussianProcessRegressor(alpha=1e-7,
                                      n_restarts_optimizer=15,
                                      normalize_y=True)
        gp, X = fit_regression_model(gp)
        X = X.astype(np.float64)

        # return_cov=False, return_std=False
        model_onnx = to_onnx(gp,
                             initial_types=[('X',
                                             DoubleTensorType([None, None]))],
                             target_opset=TARGET_OPSET)
        self.assertTrue(model_onnx is not None)
        dump_data_and_model(X,
                            gp,
                            model_onnx,
                            verbose=False,
                            basename="SklearnGaussianProcessRBFT",
                            check_error="misses a kernel")

    def test_gpr_rbf_fitted_false(self):

        gp = GaussianProcessRegressor(alpha=1e-7,
                                      n_restarts_optimizer=15,
                                      normalize_y=False)
        gp.fit(Xtrain_, Ytrain_)

        # return_cov=False, return_std=False
        model_onnx = to_onnx(gp,
                             initial_types=[('X', FloatTensorType([None,
                                                                   None]))])
        self.assertTrue(model_onnx is not None)
        dump_data_and_model(Xtest_.astype(np.float32),
                            gp,
                            model_onnx,
                            verbose=False,
                            basename="SklearnGaussianProcessRBF-Dec4")

    def test_gpr_rbf_fitted_return_std_true(self):
        gp = GaussianProcessRegressor(alpha=1e-7,
                                      n_restarts_optimizer=15,
                                      normalize_y=True)
        gp.fit(Xtrain_, Ytrain_)

        # return_cov=False, return_std=False
        options = {GaussianProcessRegressor: {"return_std": True}}
        try:
            to_onnx(gp,
                    initial_types=[('X', FloatTensorType([None, None]))],
                    options=options,
                    target_opset=TARGET_OPSET)
        except RuntimeError as e:
            assert "The method *predict* must be called" in str(e)
        gp.predict(Xtrain_, return_std=True)
        model_onnx = to_onnx(gp,
                             initial_types=[('X', FloatTensorType([None,
                                                                   None]))],
                             options=options,
                             target_opset=TARGET_OPSET)
        self.assertTrue(model_onnx is not None)
        self.check_outputs(
            gp,
            model_onnx,
            Xtest_.astype(np.float32),
            predict_attributes=options[GaussianProcessRegressor],
            decimal=4,
            disable_optimisation=True)
        dump_data_and_model(Xtest_.astype(np.float32),
                            gp,
                            model_onnx,
                            verbose=False,
                            basename="SklearnGaussianProcessRBFStd-Out0",
                            check_error="misses a kernel",
                            disable_optimisation=True)

    def test_gpr_rbf_fitted_return_std_exp_sine_squared_true(self):

        gp = GaussianProcessRegressor(kernel=ExpSineSquared(),
                                      alpha=1e-7,
                                      n_restarts_optimizer=15,
                                      normalize_y=True)
        gp.fit(Xtrain_, Ytrain_)

        # return_cov=False, return_std=False
        options = {GaussianProcessRegressor: {"return_std": True}}
        gp.predict(Xtrain_, return_std=True)
        model_onnx = to_onnx(gp,
                             initial_types=[('X',
                                             DoubleTensorType([None, None]))],
                             options=options,
                             target_opset=TARGET_OPSET)
        self.assertTrue(model_onnx is not None)
        dump_data_and_model(
            Xtest_.astype(np.float64),
            gp,
            model_onnx,
            verbose=False,
            basename="SklearnGaussianProcessExpSineSquaredStdT-Out0-Dec3",
            check_error="misses a kernel",
            disable_optimisation=True)
        self.check_outputs(
            gp,
            model_onnx,
            Xtest_.astype(np.float64),
            predict_attributes=options[GaussianProcessRegressor],
            decimal=4,
            disable_optimisation=True)

    def test_gpr_rbf_fitted_return_std_exp_sine_squared_false(self):

        gp = GaussianProcessRegressor(kernel=ExpSineSquared(),
                                      alpha=1e-7,
                                      n_restarts_optimizer=15,
                                      normalize_y=False)
        gp.fit(Xtrain_, Ytrain_)

        # return_cov=False, return_std=False
        options = {GaussianProcessRegressor: {"return_std": True}}
        gp.predict(Xtrain_, return_std=True)
        model_onnx = to_onnx(gp,
                             initial_types=[('X',
                                             DoubleTensorType([None, None]))],
                             options=options,
                             target_opset=TARGET_OPSET)
        self.assertTrue(model_onnx is not None)
        dump_data_and_model(
            Xtest_.astype(np.float64),
            gp,
            model_onnx,
            verbose=False,
            basename="SklearnGaussianProcessExpSineSquaredStdF-Out0-Dec3")
        self.check_outputs(
            gp,
            model_onnx,
            Xtest_.astype(np.float64),
            predict_attributes=options[GaussianProcessRegressor],
            decimal=4)

    def test_gpr_rbf_fitted_return_std_exp_sine_squared_double_true(self):

        gp = GaussianProcessRegressor(kernel=ExpSineSquared(),
                                      alpha=1e-7,
                                      n_restarts_optimizer=15,
                                      normalize_y=True)
        gp.fit(Xtrain_, Ytrain_)

        # return_cov=False, return_std=False
        options = {GaussianProcessRegressor: {"return_std": True}}
        gp.predict(Xtrain_, return_std=True)
        model_onnx = to_onnx(gp,
                             initial_types=[('X',
                                             DoubleTensorType([None, None]))],
                             options=options,
                             target_opset=TARGET_OPSET)
        self.assertTrue(model_onnx is not None)
        dump_data_and_model(
            Xtest_.astype(np.float64),
            gp,
            model_onnx,
            basename="SklearnGaussianProcessExpSineSquaredStdDouble-Out0-Dec4",
            check_error="misses a kernel",
            disable_optimisation=True)
        self.check_outputs(
            gp,
            model_onnx,
            Xtest_.astype(np.float64),
            predict_attributes=options[GaussianProcessRegressor],
            decimal=4,
            disable_optimisation=True)

    def test_gpr_rbf_fitted_return_std_dot_product_true(self):

        gp = GaussianProcessRegressor(kernel=DotProduct(),
                                      alpha=1.,
                                      n_restarts_optimizer=15,
                                      normalize_y=True)
        gp.fit(Xtrain_, Ytrain_)
        gp.predict(Xtrain_, return_std=True)

        # return_cov=False, return_std=False
        options = {GaussianProcessRegressor: {"return_std": True}}
        model_onnx = to_onnx(gp,
                             initial_types=[('X',
                                             DoubleTensorType([None, None]))],
                             options=options,
                             target_opset=TARGET_OPSET)
        self.assertTrue(model_onnx is not None)
        dump_data_and_model(
            Xtest_.astype(np.float64),
            gp,
            model_onnx,
            basename="SklearnGaussianProcessDotProductStdDouble-Out0-Dec3",
            check_error="misses a kernel",
            disable_optimisation=True)
        self.check_outputs(
            gp,
            model_onnx,
            Xtest_.astype(np.float64),
            predict_attributes=options[GaussianProcessRegressor],
            decimal=3,
            disable_optimisation=True)

    def test_gpr_rbf_fitted_return_std_rational_quadratic_true(self):

        gp = GaussianProcessRegressor(kernel=RationalQuadratic(),
                                      alpha=1e-7,
                                      n_restarts_optimizer=15,
                                      normalize_y=True)
        gp.fit(Xtrain_, Ytrain_)
        gp.predict(Xtrain_, return_std=True)

        # return_cov=False, return_std=False
        options = {GaussianProcessRegressor: {"return_std": True}}
        model_onnx = to_onnx(gp,
                             initial_types=[('X',
                                             DoubleTensorType([None, None]))],
                             options=options,
                             target_opset=TARGET_OPSET)
        self.assertTrue(model_onnx is not None)
        dump_data_and_model(
            Xtest_.astype(np.float64),
            gp,
            model_onnx,
            basename="SklearnGaussianProcessRationalQuadraticStdDouble-Out0",
            check_error="misses a kernel",
            disable_optimisation=True)
        self.check_outputs(
            gp,
            model_onnx,
            Xtest_.astype(np.float64),
            predict_attributes=options[GaussianProcessRegressor],
            disable_optimisation=True)

    def test_gpr_fitted_shapes(self):
        data = load_iris()
        X = data.data.astype(np.float32)
        y = data.target.astype(np.float32)
        X_train, X_test, y_train, _ = train_test_split(X, y)
        gp = GaussianProcessRegressor()
        gp.fit(X_train, y_train)

        model_onnx = to_onnx(gp,
                             initial_types=[('X', FloatTensorType([None,
                                                                   None]))],
                             target_opset=TARGET_OPSET)
        self.assertTrue(model_onnx is not None)
        self.check_outputs(gp, model_onnx, X_test, {}, skip_if_float32=True)

    def test_gpr_fitted_partial_float64(self):
        data = load_iris()
        X = data.data
        y = data.target
        X_train, X_test, y_train, _ = train_test_split(X, y)
        gp = GaussianProcessRegressor(kernel=DotProduct(), alpha=10.)
        gp.fit(X_train, y_train)

        model_onnx = to_onnx(gp,
                             initial_types=[('X', FloatTensorType([None,
                                                                   None]))])
        self.assertTrue(model_onnx is not None)
        try:
            self.check_outputs(gp, model_onnx, X_test.astype(np.float32), {})
        except AssertionError as e:
            assert "Max relative difference:" in str(e)

        model_onnx = to_onnx(gp,
                             initial_types=[('X',
                                             DoubleTensorType([None, None]))])
        self.assertTrue(model_onnx is not None)
        self.check_outputs(gp, model_onnx, X_test, {})

    def test_gpr_fitted_partial_float64_operator_cdist_rbf(self):
        data = load_iris()
        X = data.data
        y = data.target
        X_train, X_test, y_train, _ = train_test_split(X, y)
        gp = GaussianProcessRegressor(kernel=RBF(), alpha=10.)
        gp.fit(X_train, y_train)

        try:
            to_onnx(gp,
                    initial_types=[('X', FloatTensorType([None, None]))],
                    options={GaussianProcessRegressor: {
                        'optim': 'CDIST'
                    }})
            raise AssertionError("CDIST is not implemented")
        except ValueError:
            pass

        model_onnx = to_onnx(
            gp,
            initial_types=[('X', FloatTensorType([None, None]))],
            options={GaussianProcessRegressor: {
                'optim': 'cdist'
            }})
        self.assertTrue(model_onnx is not None)
        name_save = inspect.currentframe().f_code.co_name + '.onnx'
        with open(name_save, 'wb') as f:
            f.write(model_onnx.SerializeToString())
        try:
            self.check_outputs(gp, model_onnx, X_test.astype(np.float32), {})
        except RuntimeError as e:
            if "CDist is not a registered" in str(e):
                return
        except AssertionError as e:
            assert "Max relative difference:" in str(e)

        model_onnx = to_onnx(gp,
                             initial_types=[('X',
                                             DoubleTensorType([None, None]))])
        self.assertTrue(model_onnx is not None)
        self.check_outputs(gp, model_onnx, X_test, {})

    @ignore_warnings(ConvergenceWarning)
    def test_gpr_fitted_partial_float64_operator_cdist_sine(self):
        data = load_iris()
        X = data.data
        y = data.target
        X_train, X_test, y_train, _ = train_test_split(X, y)
        gp = GaussianProcessRegressor(kernel=ExpSineSquared(), alpha=100.)
        gp.fit(X_train, y_train)

        try:
            to_onnx(gp,
                    initial_types=[('X', FloatTensorType([None, None]))],
                    options={GaussianProcessRegressor: {
                        'optim': 'CDIST'
                    }},
                    target_opset=TARGET_OPSET)
            raise AssertionError("CDIST is not implemented")
        except ValueError:
            pass

        model_onnx = to_onnx(
            gp,
            initial_types=[('X', FloatTensorType([None, None]))],
            options={GaussianProcessRegressor: {
                'optim': 'cdist'
            }},
            target_opset=TARGET_OPSET)
        self.assertTrue(model_onnx is not None)
        name_save = inspect.currentframe().f_code.co_name + '.onnx'
        with open(name_save, 'wb') as f:
            f.write(model_onnx.SerializeToString())
        try:
            self.check_outputs(gp, model_onnx, X_test.astype(np.float32), {})
        except RuntimeError as e:
            if "CDist is not a registered" in str(e):
                return
        except AssertionError as e:
            assert "Max relative difference:" in str(e)

        model_onnx = to_onnx(gp,
                             initial_types=[('X',
                                             DoubleTensorType([None, None]))],
                             target_opset=TARGET_OPSET)
        self.assertTrue(model_onnx is not None)
        self.check_outputs(gp, model_onnx, X_test, {})

    def test_gpr_fitted_partial_float64_operator_cdist_quad(self):
        data = load_iris()
        X = data.data
        y = data.target
        X_train, X_test, y_train, _ = train_test_split(X, y)
        gp = GaussianProcessRegressor(kernel=RationalQuadratic(), alpha=100.)
        gp.fit(X_train, y_train)

        try:
            to_onnx(gp,
                    initial_types=[('X', FloatTensorType([None, None]))],
                    options={GaussianProcessRegressor: {
                        'optim': 'CDIST'
                    }},
                    target_opset=TARGET_OPSET)
            raise AssertionError("CDIST is not implemented")
        except ValueError:
            pass

        model_onnx = to_onnx(
            gp,
            initial_types=[('X', FloatTensorType([None, None]))],
            options={GaussianProcessRegressor: {
                'optim': 'cdist'
            }},
            target_opset=TARGET_OPSET)
        self.assertTrue(model_onnx is not None)
        name_save = inspect.currentframe().f_code.co_name + '.onnx'
        with open(name_save, 'wb') as f:
            f.write(model_onnx.SerializeToString())
        try:
            self.check_outputs(gp, model_onnx, X_test.astype(np.float32), {})
        except RuntimeError as e:
            if "CDist is not a registered" in str(e):
                return
        except AssertionError as e:
            assert "Max relative difference:" in str(e)

        model_onnx = to_onnx(gp,
                             initial_types=[('X',
                                             DoubleTensorType([None, None]))],
                             target_opset=TARGET_OPSET)
        self.assertTrue(model_onnx is not None)
        self.check_outputs(gp, model_onnx, X_test, {})
class TestDownloadlinkExtension(ExtTestCase):
    def test_post_parse_sn(self):
        register_canonical_role("downloadlink", process_downloadlink_role)

    def get_name(self):
        this = os.path.dirname(__file__)
        name = "test_rst_builder.py"
        dest = os.path.join(this, name)
        return dest.replace("\\", "/")

    @unittest.skipIf(compare_module_version(sphinx.__version__, '1.8') < 0,
                     reason="DownloadFiles not available in 1.7")
    def test_downloadlink_rst(self):
        name = self.get_name()
        content = """
                    :downloadlink:`rst::http://f.html`
                    :downloadlink:`rst::{0}`
                    :downloadlink:`{0} <rst::{0}>`
                    """.replace("                    ", "").format(name)

        out = rst2html(content,
                       writer="rst",
                       keep_warnings=True,
                       directives=None)

        out = out.replace("\n", " ")
        self.assertNotIn('Unknown interpreted text role', out)
        self.assertIn(':downloadlink:`test_rst_builder.py', out)
        self.assertNotIn("test_rst_builder.py>`test_rst_builder.py", out)
        temp = get_temp_folder(__file__, "temp_downloadlink_rst")
        with open(os.path.join(temp, "out.rst"), "w", encoding="utf8") as f:
            f.write(out)

    @unittest.skipIf(compare_module_version(sphinx.__version__, '1.8') < 0,
                     reason="DownloadFiles not available in 1.7")
    def test_downloadlink_md(self):
        name = self.get_name()
        content = """
                    :downloadlink:`gggg <md::{0}>`
                    """.replace("                    ", "").format(name)

        out = rst2html(content,
                       writer="md",
                       keep_warnings=True,
                       directives=None)

        self.assertIn("test_rst_builder.py", out)
        self.assertNotIn('Unknown interpreted text role', out)
        temp = get_temp_folder(__file__, "temp_downloadlink_rst")
        with open(os.path.join(temp, "out.rst"), "w", encoding="utf8") as f:
            f.write(out)

    @unittest.skipIf(compare_module_version(sphinx.__version__, '1.8') < 0,
                     reason="DownloadFiles not available in 1.7")
    def test_downloadlink_html(self):
        name = self.get_name()
        content = """
                    :downloadlink:`html::{0}`
                    """.replace("                    ", "").format(name)

        out = rst2html(content,
                       writer="html",
                       keep_warnings=True,
                       directives=None)

        self.assertNotIn("Unable to find 'html:test_rst_builder.py'", out)
        self.assertNotIn('Unknown interpreted text role', out)
        self.assertIn("test_rst_builder.py", out)
        temp = get_temp_folder(__file__, "temp_downloadlink_rst")
        with open(os.path.join(temp, "out.rst"), "w", encoding="utf8") as f:
            f.write(out)