예제 #1
0
    def test_default_names(self):
        df = pd.DataFrame({'input': self.x})

        # Test with probabilities
        spec = libsvm.convert(self.libsvm_model).get_spec()
        (_, _, probability_lists) = svm_predict(self.y, self.x,
                                                self.libsvm_model, '-b 1 -q')
        probability_dicts = [
            dict(zip([1, 2], cur_vals)) for cur_vals in probability_lists
        ]
        df['classProbability'] = probability_dicts
        metrics = evaluate_classifier_with_probabilities(
            spec, df, verbose=False, probabilities='classProbability')
        self.assertLess(metrics['max_probability_error'], 0.00001)

        # Test model without probabilities
        no_probability_model = svmutil.svm_train(self.prob,
                                                 svmutil.svm_parameter())
        spec = libsvm.convert(no_probability_model).get_spec()
        self.assertEqual(len(spec.description.output), 1)
        self.assertEqual(spec.description.output[0].name, u'target')
        (df['prediction'], _, _) = svm_predict(self.y, self.x,
                                               no_probability_model, ' -q')
        metrics = evaluate_classifier(spec, df, verbose=False)
        self.assertEquals(metrics['num_errors'], 0)
예제 #2
0
    def test_default_names(self):
        df = pd.DataFrame({"input": self.x})
        df["input"] = df["input"].apply(np.array)

        # Test with probabilities
        spec = libsvm.convert(self.libsvm_model).get_spec()
        if _is_macos() and _macos_version() >= (10, 13):
            (_, _, probability_lists) = svm_predict(self.y, self.x,
                                                    self.libsvm_model,
                                                    "-b 1 -q")
            probability_dicts = [
                dict(zip([1, 2], cur_vals)) for cur_vals in probability_lists
            ]
            df["classProbability"] = probability_dicts
            metrics = evaluate_classifier_with_probabilities(
                spec, df, verbose=False, probabilities="classProbability")
            self.assertLess(metrics["max_probability_error"], 0.00001)

        # Test model without probabilities
        no_probability_model = svmutil.svm_train(self.prob,
                                                 svmutil.svm_parameter())
        spec = libsvm.convert(no_probability_model).get_spec()
        self.assertEqual(len(spec.description.output), 1)
        self.assertEqual(spec.description.output[0].name, u"target")
        if _is_macos() and _macos_version() >= (10, 13):
            (df["prediction"], _, _) = svm_predict(self.y, self.x,
                                                   no_probability_model, " -q")
            metrics = evaluate_classifier(spec, df, verbose=False)
            self.assertEquals(metrics["num_errors"], 0)
예제 #3
0
    def _evaluation_test_helper_with_probability(self, labels, allow_slow):
        import copy
        df = pd.DataFrame(self.x, columns=self.column_names)
        y = copy.copy(self.y)
        for i, val in enumerate(labels):
            y[i] = val
        probability_param = '-b 1'

        for param1 in self.non_kernel_parameters:
            for param2 in self.kernel_parameters:
                param_str = ' '.join([self.base_param, param1, param2, probability_param])
                # print("PARAMS: ", param_str)
                param = svm_parameter(param_str)

                model = svm_train(self.prob, param)

                # Get predictions with probabilities as dictionaries
                (df['prediction'], _, probability_lists) = svm_predict(y, self.x, model, probability_param + ' -q')
                probability_dicts = [dict(zip([1, 2], cur_vals)) for cur_vals in probability_lists]
                df['probabilities'] = probability_dicts

                spec = libsvm.convert(model, self.column_names, 'target', 'probabilities')

                if macos_version() >= (10, 13):
                    metrics = evaluate_classifier_with_probabilities(spec, df, verbose=False)
                    self.assertEquals(metrics['num_key_mismatch'], 0)
                    self.assertLess(metrics['max_probability_error'], 0.00001)

                if not allow_slow:
                    break

            if not allow_slow:
                break
예제 #4
0
    def _conversion_and_evaluation_helper_for_logistic_regression(self, class_labels):
        options = {
            'C': (0.1, 1., 2.),
            'fit_intercept': (True, False), 
            'class_weight': ('balanced', None),
            'solver': ('newton-cg', 'lbfgs', 'liblinear', 'sag')
            }

        # Generate a list of all combinations of options and the default parameters
        product = itertools.product(*options.values())
        args = [{}] + [dict(zip(options.keys(), p)) for p in product]

        x, y = GlmCassifierTest._generate_random_data(class_labels)
        column_names = ['x1', 'x2']
        df = pd.DataFrame(x, columns=column_names)

        for cur_args in args:
            print(class_labels, cur_args)
            cur_model = LogisticRegression(**cur_args)
            cur_model.fit(x, y)

            spec = convert(cur_model, input_features=column_names,
                           output_feature_names='target')

            if is_macos() and macos_version() >= (10, 13):
                probability_lists = cur_model.predict_proba(x)
                df['classProbability'] = [dict(zip(cur_model.classes_, cur_vals)) for cur_vals in probability_lists]

                metrics = evaluate_classifier_with_probabilities(spec, df, probabilities='classProbability', verbose=False)
                self.assertEquals(metrics['num_key_mismatch'], 0)
                self.assertLess(metrics['max_probability_error'],  0.00001)
예제 #5
0
    def _test_prob_model(self, param1, param2):
        probability_param = '-b 1'
        df = self.df

        param_str = ' '.join(
            [self.base_param, param1, param2, probability_param])
        param = svm_parameter(param_str)

        model = svm_train(self.prob, param)

        # Get predictions with probabilities as dictionaries
        (df['prediction'], _,
         probability_lists) = svm_predict(self.y, self.x, model,
                                          probability_param + ' -q')
        probability_dicts = [
            dict(zip([1, 2], cur_vals)) for cur_vals in probability_lists
        ]
        df['probabilities'] = probability_dicts

        spec = libsvm.convert(model, self.column_names, 'target',
                              'probabilities')

        if macos_version() >= (10, 13):
            metrics = evaluate_classifier_with_probabilities(spec,
                                                             df,
                                                             verbose=False)
            self.assertEquals(metrics['num_key_mismatch'], 0)
            self.assertLess(metrics['max_probability_error'], 0.00001)
    def _train_convert_evaluate_assert(self, **xgboost_params):
        """
        Train a scikit-learn model, convert it and then evaluate it with CoreML
        """
        xgb_model = xgboost.XGBClassifier(**xgboost_params)
        xgb_model.fit(self.X, self.target)

        # Convert the model
        spec = xgb_converter.convert(xgb_model,
                                     self.feature_names,
                                     self.output_name,
                                     mode="classifier")

        if _is_macos() and _macos_version() >= (10, 13):
            # Get predictions
            df = pd.DataFrame(self.X, columns=self.feature_names)
            probabilities = xgb_model.predict_proba(self.X)
            df["classProbability"] = [
                dict(zip(xgb_model.classes_, cur_vals))
                for cur_vals in probabilities
            ]
            metrics = evaluate_classifier_with_probabilities(
                spec, df, probabilities="classProbability", verbose=False)
            self.assertEqual(metrics["num_key_mismatch"], 0)
            self.assertLess(metrics["max_probability_error"], 1e-3)
예제 #7
0
    def _evaluation_test_helper(self, class_labels, use_probability_estimates,
            allow_slow, allowed_prob_delta=0.00001):
        # Parameters to test
        kernel_parameters = [{}, {'kernel': 'rbf', 'gamma': 1.2},
                             {'kernel': 'linear'},
                             {'kernel': 'poly'},  {'kernel': 'poly', 'degree': 2},  {'kernel': 'poly', 'gamma': 0.75},
                                 {'kernel': 'poly', 'degree': 0, 'gamma': 0.9, 'coef0':2},
                             {'kernel': 'sigmoid'}, {'kernel': 'sigmoid', 'gamma': 1.3}, {'kernel': 'sigmoid', 'coef0': 0.8},
                                 {'kernel': 'sigmoid', 'coef0': 0.8, 'gamma': 0.5}
                             ]
        non_kernel_parameters = [{}, {'C': 1}, {'C': 1.5, 'shrinking': True}, {'C': 0.5, 'shrinking': False}]

        # Generate some random data
        x, y = [], []
        random.seed(42)
        for _ in range(50):
            x.append([random.gauss(200,30), random.gauss(-100,22), random.gauss(100,42)])
            y.append(random.choice(class_labels))
        column_names = ['x1', 'x2', 'x3']
        # make sure first label is seen first, second is seen second, and so on.
        for i, val in enumerate(class_labels):
            y[i] = val
        df = pd.DataFrame(x, columns=column_names)

        # Test
        for param1 in non_kernel_parameters:
            for param2 in kernel_parameters:
                cur_params = param1.copy()
                cur_params.update(param2)
                cur_params['probability'] = use_probability_estimates
                cur_params['max_iter'] = 10   # Don't want test to take too long
                print("cur_params=" + str(cur_params))

                cur_model = SVC(**cur_params)
                cur_model.fit(x, y)

                spec = scikit_converter.convert(cur_model, column_names, 'target')

                if macos_version() >= (10, 13):
                    if use_probability_estimates:
                        probability_lists = cur_model.predict_proba(x)
                        df['classProbability'] = [dict(zip(cur_model.classes_, cur_vals)) for cur_vals in probability_lists]
                        metrics = evaluate_classifier_with_probabilities(spec, df, probabilities='classProbability', verbose=True)
                        self.assertEquals(metrics['num_key_mismatch'], 0)
                        self.assertLess(metrics['max_probability_error'], allowed_prob_delta)
                    else:
                        df['prediction'] = cur_model.predict(x)
                        metrics = evaluate_classifier(spec, df, verbose=False)
                        self.assertEquals(metrics['num_errors'], 0)

                if not allow_slow:
                    break

            if not allow_slow:
                break
예제 #8
0
    def _test_prob_model(self, param1, param2):
        probability_param = "-b 1"
        df = self.df

        param_str = " ".join([self.base_param, param1, param2, probability_param])
        param = svmutil.svm_parameter(param_str)
        model = svm_train(self.prob, param)

        # Get predictions with probabilities as dictionaries
        (df["prediction"], _, probability_lists) = svm_predict(
            self.y, self.x, model, probability_param + " -q"
        )
        probability_dicts = [
            dict(zip([1, 2], cur_vals)) for cur_vals in probability_lists
        ]
        df["probabilities"] = probability_dicts

        spec = libsvm.convert(model, self.column_names, "target", "probabilities")

        if _is_macos() and _macos_version() >= (10, 13):
            metrics = evaluate_classifier_with_probabilities(spec, df, verbose=False)
            self.assertEqual(metrics["num_key_mismatch"], 0)
            self.assertLess(metrics["max_probability_error"], 0.00001)
예제 #9
0
    def _conversion_and_evaluation_helper_for_logistic_regression(
            self, class_labels):
        options = {
            "C": (0.1, 1.0, 2.0),
            "fit_intercept": (True, False),
            "class_weight": ("balanced", None),
            "solver": ("newton-cg", "lbfgs", "liblinear", "sag"),
        }

        # Generate a list of all combinations of options and the default parameters
        product = itertools.product(*options.values())
        args = [{}] + [dict(zip(options.keys(), p)) for p in product]

        x, y = GlmCassifierTest._generate_random_data(class_labels)
        column_names = ["x1", "x2"]
        df = pd.DataFrame(x, columns=column_names)

        for cur_args in args:
            print(class_labels, cur_args)
            cur_model = LogisticRegression(**cur_args)
            cur_model.fit(x, y)

            spec = convert(cur_model,
                           input_features=column_names,
                           output_feature_names="target")

            if _is_macos() and _macos_version() >= (10, 13):
                probability_lists = cur_model.predict_proba(x)
                df["classProbability"] = [
                    dict(zip(cur_model.classes_, cur_vals))
                    for cur_vals in probability_lists
                ]

                metrics = evaluate_classifier_with_probabilities(
                    spec, df, probabilities="classProbability", verbose=False)
                self.assertEquals(metrics["num_key_mismatch"], 0)
                self.assertLess(metrics["max_probability_error"], 0.00001)
예제 #10
0
    def _evaluation_test_helper(
        self,
        class_labels,
        use_probability_estimates,
        allow_slow,
        allowed_prob_delta=0.00001,
    ):
        # Parameters to test
        kernel_parameters = [
            {},
            {
                "kernel": "rbf",
                "gamma": 1.2
            },
            {
                "kernel": "linear"
            },
            {
                "kernel": "poly"
            },
            {
                "kernel": "poly",
                "degree": 2
            },
            {
                "kernel": "poly",
                "gamma": 0.75
            },
            {
                "kernel": "poly",
                "degree": 0,
                "gamma": 0.9,
                "coef0": 2
            },
            {
                "kernel": "sigmoid"
            },
            {
                "kernel": "sigmoid",
                "gamma": 1.3
            },
            {
                "kernel": "sigmoid",
                "coef0": 0.8
            },
            {
                "kernel": "sigmoid",
                "coef0": 0.8,
                "gamma": 0.5
            },
        ]
        non_kernel_parameters = [
            {},
            {
                "C": 1
            },
            {
                "C": 1.5,
                "shrinking": True
            },
            {
                "C": 0.5,
                "shrinking": False
            },
        ]

        # Generate some random data
        x, y = [], []
        random.seed(42)
        for _ in range(50):
            x.append([
                random.gauss(200, 30),
                random.gauss(-100, 22),
                random.gauss(100, 42)
            ])
            y.append(random.choice(class_labels))
        column_names = ["x1", "x2", "x3"]
        # make sure first label is seen first, second is seen second, and so on.
        for i, val in enumerate(class_labels):
            y[i] = val
        df = pd.DataFrame(x, columns=column_names)

        # Test
        for param1 in non_kernel_parameters:
            for param2 in kernel_parameters:
                cur_params = param1.copy()
                cur_params.update(param2)
                cur_params["probability"] = use_probability_estimates
                cur_params["max_iter"] = 10  # Don't want test to take too long
                print("cur_params=" + str(cur_params))

                cur_model = SVC(**cur_params)
                cur_model.fit(x, y)

                spec = scikit_converter.convert(cur_model, column_names,
                                                "target")

                if _is_macos() and _macos_version() >= (10, 13):
                    if use_probability_estimates:
                        probability_lists = cur_model.predict_proba(x)
                        df["classProbability"] = [
                            dict(zip(cur_model.classes_, cur_vals))
                            for cur_vals in probability_lists
                        ]
                        metrics = evaluate_classifier_with_probabilities(
                            spec,
                            df,
                            probabilities="classProbability",
                            verbose=True)
                        self.assertEquals(metrics["num_key_mismatch"], 0)
                        self.assertLess(metrics["max_probability_error"],
                                        allowed_prob_delta)
                    else:
                        df["prediction"] = cur_model.predict(x)
                        metrics = evaluate_classifier(spec, df, verbose=False)
                        self.assertEquals(metrics["num_errors"], 0)

                if not allow_slow:
                    break

            if not allow_slow:
                break