Beispiel #1
0
    def test_default_names(self):
        df = pd.DataFrame({'input': self.x})

        # Test with probabilities
        spec = libsvm.convert(self.libsvm_model).get_spec()
        (_, _, probability_lists) = svm_predict(self.y, self.x,
                                                self.libsvm_model, '-b 1 -q')
        probability_dicts = [
            dict(zip([1, 2], cur_vals)) for cur_vals in probability_lists
        ]
        df['classProbability'] = probability_dicts
        metrics = evaluate_classifier_with_probabilities(
            spec, df, verbose=False, probabilities='classProbability')
        self.assertLess(metrics['max_probability_error'], 0.00001)

        # Test model without probabilities
        no_probability_model = svmutil.svm_train(self.prob,
                                                 svmutil.svm_parameter())
        spec = libsvm.convert(no_probability_model).get_spec()
        self.assertEqual(len(spec.description.output), 1)
        self.assertEqual(spec.description.output[0].name, u'target')
        (df['prediction'], _, _) = svm_predict(self.y, self.x,
                                               no_probability_model, ' -q')
        metrics = evaluate_classifier(spec, df, verbose=False)
        self.assertEquals(metrics['num_errors'], 0)
Beispiel #2
0
    def test_default_names(self):
        df = pd.DataFrame({"input": self.x})
        df["input"] = df["input"].apply(np.array)

        # Test with probabilities
        spec = libsvm.convert(self.libsvm_model).get_spec()
        if _is_macos() and _macos_version() >= (10, 13):
            (_, _, probability_lists) = svm_predict(self.y, self.x,
                                                    self.libsvm_model,
                                                    "-b 1 -q")
            probability_dicts = [
                dict(zip([1, 2], cur_vals)) for cur_vals in probability_lists
            ]
            df["classProbability"] = probability_dicts
            metrics = evaluate_classifier_with_probabilities(
                spec, df, verbose=False, probabilities="classProbability")
            self.assertLess(metrics["max_probability_error"], 0.00001)

        # Test model without probabilities
        no_probability_model = svmutil.svm_train(self.prob,
                                                 svmutil.svm_parameter())
        spec = libsvm.convert(no_probability_model).get_spec()
        self.assertEqual(len(spec.description.output), 1)
        self.assertEqual(spec.description.output[0].name, u"target")
        if _is_macos() and _macos_version() >= (10, 13):
            (df["prediction"], _, _) = svm_predict(self.y, self.x,
                                                   no_probability_model, " -q")
            metrics = evaluate_classifier(spec, df, verbose=False)
            self.assertEquals(metrics["num_errors"], 0)
Beispiel #3
0
    def _evaluation_test_helper_with_probability(self, labels, allow_slow):
        import copy
        df = pd.DataFrame(self.x, columns=self.column_names)
        y = copy.copy(self.y)
        for i, val in enumerate(labels):
            y[i] = val
        probability_param = '-b 1'

        for param1 in self.non_kernel_parameters:
            for param2 in self.kernel_parameters:
                param_str = ' '.join([self.base_param, param1, param2, probability_param])
                # print("PARAMS: ", param_str)
                param = svm_parameter(param_str)

                model = svm_train(self.prob, param)

                # Get predictions with probabilities as dictionaries
                (df['prediction'], _, probability_lists) = svm_predict(y, self.x, model, probability_param + ' -q')
                probability_dicts = [dict(zip([1, 2], cur_vals)) for cur_vals in probability_lists]
                df['probabilities'] = probability_dicts

                spec = libsvm.convert(model, self.column_names, 'target', 'probabilities')

                if macos_version() >= (10, 13):
                    metrics = evaluate_classifier_with_probabilities(spec, df, verbose=False)
                    self.assertEquals(metrics['num_key_mismatch'], 0)
                    self.assertLess(metrics['max_probability_error'], 0.00001)

                if not allow_slow:
                    break

            if not allow_slow:
                break
Beispiel #4
0
    def setUpClass(self):
        """
        Set up the unit test by loading the dataset and training a model.
        """
        if not HAS_SKLEARN:
            return
        if not HAS_LIBSVM:
            return

        scikit_data = load_boston()
        prob = svmutil.svm_problem(
            scikit_data['target'] > scikit_data['target'].mean(),
            scikit_data['data'].tolist())
        param = svmutil.svm_parameter()
        param.svm_type = svmutil.C_SVC
        param.kernel_type = svmutil.LINEAR
        param.eps = 1

        libsvm_model = svmutil.svm_train(prob, param)
        libsvm_spec = libsvm_converter.convert(libsvm_model,
                                               scikit_data.feature_names,
                                               'target').get_spec()

        # Save the data and the model
        self.scikit_data = scikit_data
        self.libsvm_spec = libsvm_spec
Beispiel #5
0
    def test_multi_class_without_probability(self):
        # Generate some random data.
        # This unit test should not rely on scikit learn for test data.
        x, y = [], []
        for _ in range(50):
            x.append([
                random.gauss(200, 30),
                random.gauss(-100, 22),
                random.gauss(100, 42)
            ])
            y.append(random.choice([1, 2, 10, 12]))
        y[0], y[1], y[2], y[3] = 1, 2, 10, 12
        column_names = ['x1', 'x2', 'x3']
        prob = svmutil.svm_problem(y, x)

        df = pd.DataFrame(x, columns=column_names)

        for param1 in self.non_kernel_parameters:
            for param2 in self.kernel_parameters:
                param_str = ' '.join([self.base_param, param1, param2])
                param = svm_parameter(param_str)

                model = svm_train(prob, param)

                # Get predictions with probabilities as dictionaries
                (df['prediction'], _, _) = svm_predict(y, x, model, ' -q')

                spec = libsvm.convert(model, column_names, 'target')

                metrics = evaluate_classifier(spec, df, verbose=False)
                self.assertEquals(metrics['num_errors'], 0)
Beispiel #6
0
    def _test_prob_model(self, param1, param2):
        probability_param = '-b 1'
        df = self.df

        param_str = ' '.join(
            [self.base_param, param1, param2, probability_param])
        param = svm_parameter(param_str)

        model = svm_train(self.prob, param)

        # Get predictions with probabilities as dictionaries
        (df['prediction'], _,
         probability_lists) = svm_predict(self.y, self.x, model,
                                          probability_param + ' -q')
        probability_dicts = [
            dict(zip([1, 2], cur_vals)) for cur_vals in probability_lists
        ]
        df['probabilities'] = probability_dicts

        spec = libsvm.convert(model, self.column_names, 'target',
                              'probabilities')

        if macos_version() >= (10, 13):
            metrics = evaluate_classifier_with_probabilities(spec,
                                                             df,
                                                             verbose=False)
            self.assertEquals(metrics['num_key_mismatch'], 0)
            self.assertLess(metrics['max_probability_error'], 0.00001)
Beispiel #7
0
    def _test_evaluation(self, allow_slow):
        """
        Test that the same predictions are made
        """
        from svm import svm_parameter, svm_problem
        from svmutil import svm_train, svm_predict

        # Generate some smallish (poly kernels take too long on anything else) random data
        x, y = [], []
        for _ in range(50):
            cur_x1, cur_x2 = random.gauss(2, 3), random.gauss(-1, 2)
            x.append([cur_x1, cur_x2])
            y.append(1 + 2 * cur_x1 + 3 * cur_x2)

        input_names = ["x1", "x2"]
        df = pd.DataFrame(x, columns=input_names)
        prob = svm_problem(y, x)

        # Parameters
        base_param = "-s 3"  # model type is epsilon SVR
        non_kernel_parameters = [
            "", "-c 1.5 -p 0.5 -h 1", "-c 0.5 -p 0.5 -h 0"
        ]
        kernel_parameters = [
            "",
            "-t 2 -g 1.2",  # rbf kernel
            "-t 0",  # linear kernel
            "-t 1",
            "-t 1 -d 2",
            "-t 1 -g 0.75",
            "-t 1 -d 0 -g 0.9 -r 2",  # poly kernel
            "-t 3",
            "-t 3 -g 1.3",
            "-t 3 -r 0.8",
            "-t 3 -r 0.8 -g 0.5",  # sigmoid kernel
        ]

        for param1 in non_kernel_parameters:
            for param2 in kernel_parameters:
                param_str = " ".join([base_param, param1, param2])
                print(param_str)
                param = svm_parameter(param_str)

                model = svm_train(prob, param)
                (df["prediction"], _, _) = svm_predict(y, x, model)

                spec = libsvm.convert(model,
                                      input_names=input_names,
                                      target_name="target")

                if _is_macos() and _macos_version() >= (10, 13):
                    metrics = evaluate_regressor(spec, df)
                    self.assertAlmostEqual(metrics["max_error"], 0)

                if not allow_slow:
                    break

            if not allow_slow:
                break
Beispiel #8
0
    def _test_evaluation(self, allow_slow):
        """
        Test that the same predictions are made
        """
        from svm import svm_parameter, svm_problem
        from svmutil import svm_train, svm_predict

        # Generate some smallish (poly kernels take too long on anything else) random data
        x, y = [], []
        for _ in range(50):
            cur_x1, cur_x2 = random.gauss(2, 3), random.gauss(-1, 2)
            x.append([cur_x1, cur_x2])
            y.append(1 + 2 * cur_x1 + 3 * cur_x2)

        input_names = ['x1', 'x2']
        df = pd.DataFrame(x, columns=input_names)
        prob = svm_problem(y, x)

        # Parameters
        base_param = '-s 3'  # model type is epsilon SVR
        non_kernel_parameters = [
            '', '-c 1.5 -p 0.5 -h 1', '-c 0.5 -p 0.5 -h 0'
        ]
        kernel_parameters = [
            '',
            '-t 2 -g 1.2',  # rbf kernel
            '-t 0',  # linear kernel
            '-t 1',
            '-t 1 -d 2',
            '-t 1 -g 0.75',
            '-t 1 -d 0 -g 0.9 -r 2',  # poly kernel
            '-t 3',
            '-t 3 -g 1.3',
            '-t 3 -r 0.8',
            '-t 3 -r 0.8 -g 0.5'  # sigmoid kernel
        ]

        for param1 in non_kernel_parameters:
            for param2 in kernel_parameters:
                param_str = ' '.join([base_param, param1, param2])
                print(param_str)
                param = svm_parameter(param_str)

                model = svm_train(prob, param)
                (df['prediction'], _, _) = svm_predict(y, x, model)

                spec = libsvm.convert(model,
                                      input_names=input_names,
                                      target_name='target')

                metrics = evaluate_regressor(spec, df)
                self.assertAlmostEquals(metrics['max_error'], 0)

                if not allow_slow:
                    break

            if not allow_slow:
                break
Beispiel #9
0
    def _evaluation_test_helper_no_probability(self, labels, allow_slow):
        # Generate some random data.
        # This unit test should not rely on scikit learn for test data.
        x, y = [], []
        random.seed(42)
        for _ in range(50):
            x.append([
                random.gauss(200, 30),
                random.gauss(-100, 22),
                random.gauss(100, 42)
            ])
            y.append(random.choice(labels))
        # make sure first label is seen first, second is seen second, and so on.
        for i, val in enumerate(labels):
            y[i] = val
        column_names = ["x1", "x2", "x3"]
        prob = svmutil.svm_problem(y, x)

        df = pd.DataFrame(x, columns=column_names)

        for param1 in self.non_kernel_parameters:
            for param2 in self.kernel_parameters:
                param_str = " ".join([self.base_param, param1, param2])
                print("PARAMS: ", param_str)
                param = svm_parameter(param_str)

                model = svm_train(prob, param)

                # Get predictions with probabilities as dictionaries
                (df["prediction"], _, _) = svm_predict(y, x, model, " -q")

                spec = libsvm.convert(model, column_names, "target")

                if _is_macos() and _macos_version() >= (10, 13):
                    metrics = evaluate_classifier(spec, df, verbose=False)
                    self.assertEquals(metrics["num_errors"], 0)

                if not allow_slow:
                    break

            if not allow_slow:
                break
Beispiel #10
0
    def _test_prob_model(self, param1, param2):
        probability_param = "-b 1"
        df = self.df

        param_str = " ".join([self.base_param, param1, param2, probability_param])
        param = svmutil.svm_parameter(param_str)
        model = svm_train(self.prob, param)

        # Get predictions with probabilities as dictionaries
        (df["prediction"], _, probability_lists) = svm_predict(
            self.y, self.x, model, probability_param + " -q"
        )
        probability_dicts = [
            dict(zip([1, 2], cur_vals)) for cur_vals in probability_lists
        ]
        df["probabilities"] = probability_dicts

        spec = libsvm.convert(model, self.column_names, "target", "probabilities")

        if _is_macos() and _macos_version() >= (10, 13):
            metrics = evaluate_classifier_with_probabilities(spec, df, verbose=False)
            self.assertEqual(metrics["num_key_mismatch"], 0)
            self.assertLess(metrics["max_probability_error"], 0.00001)
Beispiel #11
0
    def test_input_names(self):
        data = load_boston()
        df = pd.DataFrame({'input': data['data'].tolist()})

        # Default values
        spec = libsvm.convert(self.libsvm_model)
        (df['prediction'], _, _) = svmutil.svm_predict(data['target'], data['data'].tolist(), self.libsvm_model)
        metrics = evaluate_regressor(spec, df)
        self.assertAlmostEquals(metrics['max_error'], 0)

        # One extra parameters. This is legal/possible.
        num_inputs = len(data['data'][0])
        spec = libsvm.convert(self.libsvm_model, input_length=num_inputs+1)

        # Not enought input names.
        input_names=['this', 'is', 'not', 'enought', 'names']
        with self.assertRaises(ValueError):
            libsvm.convert(self.libsvm_model, input_names=input_names)
        with self.assertRaises(ValueError):
            libsvm.convert(self.libsvm_model, input_length=num_inputs-1)
Beispiel #12
0
    def test_input_names(self):
        data = load_boston()
        df = pd.DataFrame({"input": data["data"].tolist()})
        df["input"] = df["input"].apply(np.array)

        # Default values
        spec = libsvm.convert(self.libsvm_model)
        if _is_macos() and _macos_version() >= (10, 13):
            (df["prediction"], _,
             _) = svmutil.svm_predict(data["target"], data["data"].tolist(),
                                      self.libsvm_model)
            metrics = evaluate_regressor(spec, df)
            self.assertAlmostEqual(metrics["max_error"], 0)

        # One extra parameters. This is legal/possible.
        num_inputs = len(data["data"][0])
        spec = libsvm.convert(self.libsvm_model, input_length=num_inputs + 1)

        # Not enought input names.
        input_names = ["this", "is", "not", "enought", "names"]
        with self.assertRaises(ValueError):
            libsvm.convert(self.libsvm_model, input_names=input_names)
        with self.assertRaises(ValueError):
            libsvm.convert(self.libsvm_model, input_length=num_inputs - 1)
Beispiel #13
0
 def test_conversion_bad_inputs(self):
     # Check the expected class during covnersion.
     with self.assertRaises(TypeError):
         model = OneHotEncoder()
         spec = libsvm.convert(model, 'data', 'out')
Beispiel #14
0
 def test_conversion_from_filesystem(self):
     libsvm_model_path = tempfile.mktemp(suffix = 'model.libsvm')
     svmutil.svm_save_model(libsvm_model_path, self.libsvm_model)
     spec = libsvm.convert(libsvm_model_path, 'data', 'target')
Beispiel #15
0
 def test_conversion(self):
     spec = libsvm.convert(self.libsvm_model, 'data', 'target')
Beispiel #16
0
 def test_conversion_from_filesystem(self):
     libsvm_model_path = tempfile.mktemp(suffix='model.libsvm')
     svmutil.svm_save_model(libsvm_model_path, self.libsvm_model)
     # libsvm's save(...) truncates floating points. So it's not going to match self.libsvm_model any more.
     spec = libsvm.convert(libsvm_model_path, self.column_names, 'target')
     self.assertIsNotNone(spec)
Beispiel #17
0
 def test_conversion(self):
     spec = libsvm.convert(self.libsvm_model, "data", "target")
Beispiel #18
0
 def test_conversion_from_filesystem(self):
     libsvm_model_path = tempfile.mktemp(suffix="model.libsvm")
     svmutil.svm_save_model(libsvm_model_path, self.libsvm_model)
     spec = libsvm.convert(libsvm_model_path,
                           input_names="data",
                           target_name="target")