def _train_convert_evaluate_assert(self, **scikit_params):
        """
        Train a scikit-learn model, convert it and then evaluate it with CoreML
        """
        scikit_model = GradientBoostingClassifier(random_state=1, **scikit_params)
        scikit_model.fit(self.X, self.target)

        # Convert the model
        spec = skl_converter.convert(scikit_model, self.feature_names, self.output_name)

        if hasattr(scikit_model, '_init_decision_function') and scikit_model.n_classes_ > 2:
            import numpy as np
            # fix initial default prediction for multiclass classification
            # https://github.com/scikit-learn/scikit-learn/pull/12983
            if not hasattr(scikit_model, 'init_'):
                raise AssertionError
            if not hasattr(scikit_model.init_, 'priors'):
                raise AssertionError
            scikit_model.init_.priors = np.log(scikit_model.init_.priors)

        if _is_macos() and _macos_version() >= (10, 13):
            # Get predictions
            df = pd.DataFrame(self.X, columns=self.feature_names)
            df["prediction"] = scikit_model.predict(self.X)

            # Evaluate it
            metrics = evaluate_classifier(spec, df)
            self._check_metrics(metrics)
Example #2
0
    def test_default_names(self):
        df = pd.DataFrame({'input': self.x})

        # Test with probabilities
        spec = libsvm.convert(self.libsvm_model).get_spec()
        (_, _, probability_lists) = svm_predict(self.y, self.x,
                                                self.libsvm_model, '-b 1 -q')
        probability_dicts = [
            dict(zip([1, 2], cur_vals)) for cur_vals in probability_lists
        ]
        df['classProbability'] = probability_dicts
        metrics = evaluate_classifier_with_probabilities(
            spec, df, verbose=False, probabilities='classProbability')
        self.assertLess(metrics['max_probability_error'], 0.00001)

        # Test model without probabilities
        no_probability_model = svmutil.svm_train(self.prob,
                                                 svmutil.svm_parameter())
        spec = libsvm.convert(no_probability_model).get_spec()
        self.assertEqual(len(spec.description.output), 1)
        self.assertEqual(spec.description.output[0].name, u'target')
        (df['prediction'], _, _) = svm_predict(self.y, self.x,
                                               no_probability_model, ' -q')
        metrics = evaluate_classifier(spec, df, verbose=False)
        self.assertEquals(metrics['num_errors'], 0)
Example #3
0
    def test_default_names(self):
        df = pd.DataFrame({"input": self.x})
        df["input"] = df["input"].apply(np.array)

        # Test with probabilities
        spec = libsvm.convert(self.libsvm_model).get_spec()
        if _is_macos() and _macos_version() >= (10, 13):
            (_, _, probability_lists) = svm_predict(self.y, self.x,
                                                    self.libsvm_model,
                                                    "-b 1 -q")
            probability_dicts = [
                dict(zip([1, 2], cur_vals)) for cur_vals in probability_lists
            ]
            df["classProbability"] = probability_dicts
            metrics = evaluate_classifier_with_probabilities(
                spec, df, verbose=False, probabilities="classProbability")
            self.assertLess(metrics["max_probability_error"], 0.00001)

        # Test model without probabilities
        no_probability_model = svmutil.svm_train(self.prob,
                                                 svmutil.svm_parameter())
        spec = libsvm.convert(no_probability_model).get_spec()
        self.assertEqual(len(spec.description.output), 1)
        self.assertEqual(spec.description.output[0].name, u"target")
        if _is_macos() and _macos_version() >= (10, 13):
            (df["prediction"], _, _) = svm_predict(self.y, self.x,
                                                   no_probability_model, " -q")
            metrics = evaluate_classifier(spec, df, verbose=False)
            self.assertEquals(metrics["num_errors"], 0)
Example #4
0
    def _conversion_and_evaluation_helper_for_linear_svc(self, class_labels):
        ARGS = [ {},
                 {'C' : .75, 'loss': 'hinge'},
                 {'penalty': 'l1', 'dual': False},
                 {'tol': 0.001, 'fit_intercept': False},
                 {'intercept_scaling': 1.5}
        ]

        x, y = GlmCassifierTest._generate_random_data(class_labels)
        column_names = ['x1', 'x2']
        df = pd.DataFrame(x, columns=column_names)
        
        for cur_args in ARGS:
            print(class_labels, cur_args)
            cur_model = LinearSVC(**cur_args)
            cur_model.fit(x, y)

            spec = convert(cur_model, input_features=column_names,
                           output_feature_names='target')

            if is_macos() and macos_version() >= (10, 13):
                df['prediction'] = cur_model.predict(x)

                cur_eval_metics = evaluate_classifier(spec, df, verbose=False)
                self.assertEquals(cur_eval_metics['num_errors'], 0)
Example #5
0
    def test_int_features_in_pipeline(self):

        import numpy.random as rn
        import pandas as pd

        rn.seed(0)

        x_train_dict = [
            dict((rn.randint(100), 1) for i in range(20)) for j in range(100)
        ]
        y_train = [0, 1] * 50

        from sklearn.pipeline import Pipeline
        from sklearn.feature_extraction import DictVectorizer
        from sklearn.linear_model import LogisticRegression

        pl = Pipeline([("dv", DictVectorizer()), ("lm", LogisticRegression())])
        pl.fit(x_train_dict, y_train)

        import coremltools

        model = coremltools.converters.sklearn.convert(
            pl, input_features="features", output_feature_names="target")

        if _is_macos() and _macos_version() >= (10, 13):
            x = pd.DataFrame({
                "features": x_train_dict,
                "prediction": pl.predict(x_train_dict)
            })

            cur_eval_metics = evaluate_classifier(model, x)
            self.assertEquals(cur_eval_metics["num_errors"], 0)
Example #6
0
    def test_multi_class_without_probability(self):
        # Generate some random data.
        # This unit test should not rely on scikit learn for test data.
        x, y = [], []
        for _ in range(50):
            x.append([
                random.gauss(200, 30),
                random.gauss(-100, 22),
                random.gauss(100, 42)
            ])
            y.append(random.choice([1, 2, 10, 12]))
        y[0], y[1], y[2], y[3] = 1, 2, 10, 12
        column_names = ['x1', 'x2', 'x3']
        prob = svmutil.svm_problem(y, x)

        df = pd.DataFrame(x, columns=column_names)

        for param1 in self.non_kernel_parameters:
            for param2 in self.kernel_parameters:
                param_str = ' '.join([self.base_param, param1, param2])
                param = svm_parameter(param_str)

                model = svm_train(prob, param)

                # Get predictions with probabilities as dictionaries
                (df['prediction'], _, _) = svm_predict(y, x, model, ' -q')

                spec = libsvm.convert(model, column_names, 'target')

                metrics = evaluate_classifier(spec, df, verbose=False)
                self.assertEquals(metrics['num_errors'], 0)
    def _conversion_and_evaluation_helper_for_linear_svc(self, class_labels):
        ARGS = [
            {},
            {"C": 0.75, "loss": "hinge"},
            {"penalty": "l1", "dual": False},
            {"tol": 0.001, "fit_intercept": False},
            {"intercept_scaling": 1.5},
        ]

        x, y = GlmCassifierTest._generate_random_data(class_labels)
        column_names = ["x1", "x2"]
        df = pd.DataFrame(x, columns=column_names)

        for cur_args in ARGS:
            print(class_labels, cur_args)
            cur_model = LinearSVC(**cur_args)
            cur_model.fit(x, y)

            spec = convert(
                cur_model, input_features=column_names, output_feature_names="target"
            )

            if _is_macos() and _macos_version() >= (10, 13):
                df["prediction"] = cur_model.predict(x)

                cur_eval_metics = evaluate_classifier(spec, df, verbose=False)
                self.assertEqual(cur_eval_metics["num_errors"], 0)
Example #8
0
    def _evaluation_test_helper(self, class_labels, use_probability_estimates,
            allow_slow, allowed_prob_delta=0.00001):
        # Parameters to test
        kernel_parameters = [{}, {'kernel': 'rbf', 'gamma': 1.2},
                             {'kernel': 'linear'},
                             {'kernel': 'poly'},  {'kernel': 'poly', 'degree': 2},  {'kernel': 'poly', 'gamma': 0.75},
                                 {'kernel': 'poly', 'degree': 0, 'gamma': 0.9, 'coef0':2},
                             {'kernel': 'sigmoid'}, {'kernel': 'sigmoid', 'gamma': 1.3}, {'kernel': 'sigmoid', 'coef0': 0.8},
                                 {'kernel': 'sigmoid', 'coef0': 0.8, 'gamma': 0.5}
                             ]
        non_kernel_parameters = [{}, {'C': 1}, {'C': 1.5, 'shrinking': True}, {'C': 0.5, 'shrinking': False}]

        # Generate some random data
        x, y = [], []
        random.seed(42)
        for _ in range(50):
            x.append([random.gauss(200,30), random.gauss(-100,22), random.gauss(100,42)])
            y.append(random.choice(class_labels))
        column_names = ['x1', 'x2', 'x3']
        # make sure first label is seen first, second is seen second, and so on.
        for i, val in enumerate(class_labels):
            y[i] = val
        df = pd.DataFrame(x, columns=column_names)

        # Test
        for param1 in non_kernel_parameters:
            for param2 in kernel_parameters:
                cur_params = param1.copy()
                cur_params.update(param2)
                cur_params['probability'] = use_probability_estimates
                cur_params['max_iter'] = 10   # Don't want test to take too long
                print("cur_params=" + str(cur_params))

                cur_model = SVC(**cur_params)
                cur_model.fit(x, y)

                spec = scikit_converter.convert(cur_model, column_names, 'target')

                if macos_version() >= (10, 13):
                    if use_probability_estimates:
                        probability_lists = cur_model.predict_proba(x)
                        df['classProbability'] = [dict(zip(cur_model.classes_, cur_vals)) for cur_vals in probability_lists]
                        metrics = evaluate_classifier_with_probabilities(spec, df, probabilities='classProbability', verbose=True)
                        self.assertEquals(metrics['num_key_mismatch'], 0)
                        self.assertLess(metrics['max_probability_error'], allowed_prob_delta)
                    else:
                        df['prediction'] = cur_model.predict(x)
                        metrics = evaluate_classifier(spec, df, verbose=False)
                        self.assertEquals(metrics['num_errors'], 0)

                if not allow_slow:
                    break

            if not allow_slow:
                break
    def _train_convert_evaluate(self, **scikit_params):
        scikit_model = DecisionTreeClassifier(random_state = 1, **scikit_params)
        scikit_model.fit(self.X, self.target)

        # Convert the model
        spec = skl_converter.convert(scikit_model, self.feature_names, self.output_name)
        
        # Get predictions
        df = pd.DataFrame(self.X, columns=self.feature_names)
        df['prediction'] = scikit_model.predict(self.X)
        
        # Evaluate it
        metrics = evaluate_classifier(spec, df)
        return metrics
    def _train_convert_evaluate_assert(self, **scikit_params):
        scikit_model = RandomForestClassifier(random_state=1, **scikit_params)
        scikit_model.fit(self.X, self.target)

        # Convert the model
        spec = skl_converter.convert(scikit_model, self.feature_names, self.output_name)

        if _is_macos() and _macos_version() >= (10, 13):
            # Get predictions
            df = pd.DataFrame(self.X, columns=self.feature_names)
            df["prediction"] = scikit_model.predict(self.X)

            # Evaluate it
            metrics = evaluate_classifier(spec, df, verbose=False)
            self._check_metrics(metrics, scikit_params)
 def _train_convert_evaluate(self, **scikit_params):
     """
     Train a scikit-learn model, convert it and then evaluate it with CoreML
     """
     scikit_model = GradientBoostingClassifier(random_state = 1, **scikit_params)
     scikit_model.fit(self.X, self.target)
     
     # Convert the model
     spec = skl_converter.convert(scikit_model, self.feature_names, self.output_name)
     
     # Get predictions
     df = pd.DataFrame(self.X, columns=self.feature_names)
     df['prediction'] = scikit_model.predict(self.X)
     
     # Evaluate it
     metrics = evaluate_classifier(spec, df)
     return metrics
    def _train_convert_evaluate_assert(self, **xgboost_params):
        """
        Train a scikit-learn model, convert it and then evaluate it with CoreML
        """
        xgb_model = xgboost.XGBClassifier(**xgboost_params)
        xgb_model.fit(self.X, self.target)

        # Convert the model
        spec = xgb_converter.convert(xgb_model, self.feature_names, self.output_name, mode="classifier")

        if macos_version() >= (10, 13):
            # Get predictions
            df = pd.DataFrame(self.X, columns=self.feature_names)
            df['prediction'] = xgb_model.predict(self.X)

            # Evaluate it
            metrics = evaluate_classifier(spec, df)
            self._check_metrics(metrics)
Example #13
0
    def _evaluation_test_helper_no_probability(self, labels, allow_slow):
        # Generate some random data.
        # This unit test should not rely on scikit learn for test data.
        x, y = [], []
        random.seed(42)
        for _ in range(50):
            x.append([
                random.gauss(200, 30),
                random.gauss(-100, 22),
                random.gauss(100, 42)
            ])
            y.append(random.choice(labels))
        # make sure first label is seen first, second is seen second, and so on.
        for i, val in enumerate(labels):
            y[i] = val
        column_names = ["x1", "x2", "x3"]
        prob = svmutil.svm_problem(y, x)

        df = pd.DataFrame(x, columns=column_names)

        for param1 in self.non_kernel_parameters:
            for param2 in self.kernel_parameters:
                param_str = " ".join([self.base_param, param1, param2])
                print("PARAMS: ", param_str)
                param = svm_parameter(param_str)

                model = svm_train(prob, param)

                # Get predictions with probabilities as dictionaries
                (df["prediction"], _, _) = svm_predict(y, x, model, " -q")

                spec = libsvm.convert(model, column_names, "target")

                if _is_macos() and _macos_version() >= (10, 13):
                    metrics = evaluate_classifier(spec, df, verbose=False)
                    self.assertEquals(metrics["num_errors"], 0)

                if not allow_slow:
                    break

            if not allow_slow:
                break
Example #14
0
    def _train_convert_evaluate_assert(self, **scikit_params):
        """
        Train a scikit-learn model, convert it and then evaluate it with CoreML
        """
        scikit_model = GradientBoostingClassifier(random_state=1,
                                                  **scikit_params)
        scikit_model.fit(self.X, self.target)

        # Convert the model
        spec = skl_converter.convert(scikit_model, self.feature_names,
                                     self.output_name)

        if _is_macos() and _macos_version() >= (10, 13):
            # Get predictions
            df = pd.DataFrame(self.X, columns=self.feature_names)
            df["prediction"] = scikit_model.predict(self.X)

            # Evaluate it
            metrics = evaluate_classifier(spec, df)
            self._check_metrics(metrics)
Example #15
0
    def _evaluation_test_helper(
        self,
        class_labels,
        use_probability_estimates,
        allow_slow,
        allowed_prob_delta=0.00001,
    ):
        # Parameters to test
        kernel_parameters = [
            {},
            {
                "kernel": "rbf",
                "gamma": 1.2
            },
            {
                "kernel": "linear"
            },
            {
                "kernel": "poly"
            },
            {
                "kernel": "poly",
                "degree": 2
            },
            {
                "kernel": "poly",
                "gamma": 0.75
            },
            {
                "kernel": "poly",
                "degree": 0,
                "gamma": 0.9,
                "coef0": 2
            },
            {
                "kernel": "sigmoid"
            },
            {
                "kernel": "sigmoid",
                "gamma": 1.3
            },
            {
                "kernel": "sigmoid",
                "coef0": 0.8
            },
            {
                "kernel": "sigmoid",
                "coef0": 0.8,
                "gamma": 0.5
            },
        ]
        non_kernel_parameters = [
            {},
            {
                "C": 1
            },
            {
                "C": 1.5,
                "shrinking": True
            },
            {
                "C": 0.5,
                "shrinking": False
            },
        ]

        # Generate some random data
        x, y = [], []
        random.seed(42)
        for _ in range(50):
            x.append([
                random.gauss(200, 30),
                random.gauss(-100, 22),
                random.gauss(100, 42)
            ])
            y.append(random.choice(class_labels))
        column_names = ["x1", "x2", "x3"]
        # make sure first label is seen first, second is seen second, and so on.
        for i, val in enumerate(class_labels):
            y[i] = val
        df = pd.DataFrame(x, columns=column_names)

        # Test
        for param1 in non_kernel_parameters:
            for param2 in kernel_parameters:
                cur_params = param1.copy()
                cur_params.update(param2)
                cur_params["probability"] = use_probability_estimates
                cur_params["max_iter"] = 10  # Don't want test to take too long
                print("cur_params=" + str(cur_params))

                cur_model = SVC(**cur_params)
                cur_model.fit(x, y)

                spec = scikit_converter.convert(cur_model, column_names,
                                                "target")

                if _is_macos() and _macos_version() >= (10, 13):
                    if use_probability_estimates:
                        probability_lists = cur_model.predict_proba(x)
                        df["classProbability"] = [
                            dict(zip(cur_model.classes_, cur_vals))
                            for cur_vals in probability_lists
                        ]
                        metrics = evaluate_classifier_with_probabilities(
                            spec,
                            df,
                            probabilities="classProbability",
                            verbose=True)
                        self.assertEquals(metrics["num_key_mismatch"], 0)
                        self.assertLess(metrics["max_probability_error"],
                                        allowed_prob_delta)
                    else:
                        df["prediction"] = cur_model.predict(x)
                        metrics = evaluate_classifier(spec, df, verbose=False)
                        self.assertEquals(metrics["num_errors"], 0)

                if not allow_slow:
                    break

            if not allow_slow:
                break