Beispiel #1
0
    def test_conversion_bad_inputs(self):

        # Error on converting an untrained model
        with self.assertRaises(TypeError):
            model = GradientBoostingRegressor()
            spec = xgb_converter.convert(model, 'data', 'out')

        # Check the expected class during conversion
        with self.assertRaises(TypeError):
            model = OneHotEncoder()
            spec = xgb_converter.convert(model, 'data', 'out')
Beispiel #2
0
    def test_unsupported_conversion(self):

        feature_names = self.scikit_data.feature_names
        output_name = 'target'
        xgb_model = xgboost.XGBRegressor(objective = 'reg:gamma')
        xgb_model.fit(self.scikit_data.data, self.scikit_data.target)
        with self.assertRaises(ValueError):
            spec = xgb_converter.convert(xgb_model, feature_names, 'target')

        xgb_model = xgboost.XGBRegressor(objective = 'reg:tweedie')
        xgb_model.fit(self.scikit_data.data, self.scikit_data.target)
        with self.assertRaises(ValueError):
            spec = xgb_converter.convert(xgb_model, feature_names, 'target')
Beispiel #3
0
    def test_conversion_from_file(self):

        output_name = 'target'
        feature_names = self.feature_names

        xgb_model_json = tempfile.mktemp('tree_model.json')
        xgb_json_out = self.xgb_model.get_dump(dump_format = 'json')
        with open(xgb_model_json, 'w') as f:
            json.dump(xgb_json_out, f)
        spec = xgb_converter.convert(xgb_model_json, feature_names, 'target').get_spec()
        self.assertIsNotNone(spec)

        # Test the model class
        self.assertIsNotNone(spec.description)
        self.assertIsNotNone(spec.treeEnsembleRegressor)

        # Test the interface class
        self.assertEqual(spec.description.predictedFeatureName,
                'target')

        # Test the inputs and outputs
        self.assertEqual(len(spec.description.output), 1)
        self.assertEqual(spec.description.output[0].name, 'target')
        self.assertEqual(spec.description.output[0].type.WhichOneof('Type'),
                'doubleType')
        for input_type in spec.description.input:
            self.assertEqual(input_type.type.WhichOneof('Type'),
                    'doubleType')
        self.assertEqual(sorted(self.feature_names),
               sorted(map(lambda x: x.name, spec.description.input)))

        # Test the linear regression parameters.
        tr = spec.treeEnsembleRegressor.treeEnsemble
        self.assertIsNotNone(tr)
        self.assertEqual(len(tr.nodes), 23)
    def _train_convert_evaluate_assert(self, bt_params=None, allowed_error=None, **params):
        """
        Set up the unit test by loading the dataset and training a model.
        """
        if bt_params is None:
            bt_params = {}
        if allowed_error is None:
            allowed_error = {}
        # Train a model
        xgb_model = xgboost.XGBRegressor(**params)
        xgb_model.fit(self.X, self.target)

        # Convert the model (feature_names can't be given because of XGboost)
        spec = xgb_converter.convert(
            xgb_model, self.feature_names, self.output_name, force_32bit_float=False
        )

        if _is_macos() and _macos_version() >= (10, 13):
            # Get predictions
            df = pd.DataFrame(self.X, columns=self.feature_names)
            df["prediction"] = xgb_model.predict(self.X)

            # Evaluate it
            metrics = evaluate_regressor(spec, df, target="target", verbose=False)
            self._check_metrics(metrics, bt_params, allowed_error)
Beispiel #5
0
    def test_conversion(self):

        feature_names = self.scikit_data.feature_names
        output_name = 'target'
        spec = xgb_converter.convert(self.xgb_model, feature_names, 'target').get_spec()
        self.assertIsNotNone(spec)

        # Test the model class
        self.assertIsNotNone(spec.description)
        self.assertIsNotNone(spec.treeEnsembleRegressor)

        # Test the interface class
        self.assertEqual(spec.description.predictedFeatureName,
                'target')

        # Test the inputs and outputs
        self.assertEqual(len(spec.description.output), 1)
        self.assertEqual(spec.description.output[0].name, 'target')
        self.assertEqual(spec.description.output[0].type.WhichOneof('Type'),
                'doubleType')
        for input_type in spec.description.input:
            self.assertEqual(input_type.type.WhichOneof('Type'),
                    'doubleType')
        self.assertEqual(sorted(self.feature_names),
               sorted(map(lambda x: x.name, spec.description.input)))

        # Test the linear regression parameters.
        tr = spec.treeEnsembleRegressor.treeEnsemble
        self.assertIsNotNone(tr)
        self.assertEqual(len(tr.nodes), 23)
    def test_conversion_bad_inputs(self):
        # Error on converting an untrained model
        with self.assertRaises(Exception):
            model = xgboost.XGBClassifier()
            spec = xgb_converter.convert(model,
                                         "data",
                                         "out",
                                         mode="classifier")

        # Check the expected class during covnersion.
        with self.assertRaises(Exception):
            model = xgboost.XGBRegressor()
            spec = xgb_converter.convert(model,
                                         "data",
                                         "out",
                                         mode="classifier")
    def test_conversion(self):

        input_names = self.scikit_data.feature_names
        output_name = "target"
        spec = xgb_converter.convert(
            self.xgb_model,
            input_names,
            output_name,
            mode="classifier",
            n_classes=self.n_classes,
        ).get_spec()
        self.assertIsNotNone(spec)

        # Test the model class
        self.assertIsNotNone(spec.description)
        self.assertEqual(spec.description.predictedFeatureName, output_name)

        # Test the inputs and outputs
        self.assertEqual(len(spec.description.output), 2)
        self.assertEqual(spec.description.output[0].name, output_name)
        self.assertEqual(spec.description.output[0].type.WhichOneof("Type"),
                         "int64Type")

        for input_type in spec.description.input:
            self.assertEqual(input_type.type.WhichOneof("Type"), "doubleType")
        self.assertEqual(sorted(input_names),
                         sorted(map(lambda x: x.name, spec.description.input)))

        # Test the linear regression parameters.
        tr = spec.treeEnsembleClassifier.treeEnsemble
        self.assertIsNotNone(tr)
    def _train_convert_evaluate_assert(self,
                                       bt_params={},
                                       allowed_error={},
                                       **params):
        """
        Set up the unit test by loading the dataset and training a model.
        """
        # Train a model
        xgb_model = xgboost.train(bt_params, self.dtrain, **params)

        # Convert the model
        spec = xgb_converter.convert(xgb_model,
                                     self.feature_names,
                                     self.output_name,
                                     force_32bit_float=False)

        if _is_macos() and _macos_version() >= (10, 13):
            # Get predictions
            df = pd.DataFrame(self.X, columns=self.feature_names)
            df["prediction"] = xgb_model.predict(self.dtrain)

            # Evaluate it
            metrics = evaluate_regressor(spec,
                                         df,
                                         target="target",
                                         verbose=False)
            self._check_metrics(metrics, allowed_error, bt_params)
    def _train_convert_evaluate_assert(self, **xgboost_params):
        """
        Train a scikit-learn model, convert it and then evaluate it with CoreML
        """
        xgb_model = xgboost.XGBClassifier(**xgboost_params)
        xgb_model.fit(self.X, self.target)

        # Convert the model
        spec = xgb_converter.convert(xgb_model,
                                     self.feature_names,
                                     self.output_name,
                                     mode="classifier")

        if _is_macos() and _macos_version() >= (10, 13):
            # Get predictions
            df = pd.DataFrame(self.X, columns=self.feature_names)
            probabilities = xgb_model.predict_proba(self.X)
            df["classProbability"] = [
                dict(zip(xgb_model.classes_, cur_vals))
                for cur_vals in probabilities
            ]
            metrics = evaluate_classifier_with_probabilities(
                spec, df, probabilities="classProbability", verbose=False)
            self.assertEqual(metrics["num_key_mismatch"], 0)
            self.assertLess(metrics["max_probability_error"], 1e-3)
    def _train_convert_evaluate_assert(self, **xgboost_params):
        """
        Train a scikit-learn model, convert it and then evaluate it with CoreML
        """
        xgb_model = xgboost.XGBClassifier(**xgboost_params)
        xgb_model.fit(self.X, self.target)

        # Convert the model
        spec = xgb_converter.convert(xgb_model, self.feature_names, self.output_name, mode="classifier")

        if macos_version() >= (10, 13):
            # Get predictions
            df = pd.DataFrame(self.X, columns=self.feature_names)
            df['prediction'] = xgb_model.predict(self.X)

            # Evaluate it
            metrics = evaluate_classifier(spec, df)
            self._check_metrics(metrics)
    def test_conversion_from_file(self):
        import numpy as np

        output_name = "target"
        feature_names = self.scikit_data.feature_names

        xgb_model_json = tempfile.mktemp("xgb_tree_model_classifier.json")
        xgb_json_out = self.xgb_model.get_dump(with_stats=True,
                                               dump_format="json")
        with open(xgb_model_json, "w") as f:
            json.dump(xgb_json_out, f)
        spec = xgb_converter.convert(
            xgb_model_json,
            feature_names,
            output_name,
            mode="classifier",
            n_classes=self.n_classes,
        ).get_spec()
        self.assertIsNotNone(spec)

        # Test the model class
        self.assertIsNotNone(spec.description)
        self.assertIsNotNone(spec.treeEnsembleRegressor)

        # Test the interface class
        self.assertEqual(spec.description.predictedFeatureName, output_name)

        # Test the inputs and outputs
        self.assertEqual(len(spec.description.output), 2)
        self.assertEqual(spec.description.output[0].name, output_name)
        self.assertEqual(spec.description.output[0].type.WhichOneof("Type"),
                         "int64Type")
        for input_type in spec.description.input:
            self.assertEqual(input_type.type.WhichOneof("Type"), "doubleType")
        self.assertEqual(
            sorted(self.scikit_data.feature_names),
            sorted(map(lambda x: x.name, spec.description.input)),
        )

        # Test the linear regression parameters.
        tr = spec.treeEnsembleClassifier.treeEnsemble
        self.assertIsNotNone(tr)
Beispiel #12
0
    def _train_convert_evaluate(self, bt_params={}, **params):
        """
        Set up the unit test by loading the dataset and training a model.
        """
        # Train a model
        xgb_model = xgboost.train(bt_params, self.dtrain, **params)

        # Convert the model
        spec = xgb_converter.convert(xgb_model,
                                     self.feature_names,
                                     self.output_name,
                                     force_32bit_float=False)

        # Get predictions
        df = pd.DataFrame(self.X, columns=self.feature_names)
        df['prediction'] = xgb_model.predict(self.dtrain)

        # Evaluate it
        metrics = evaluate_regressor(spec, df, target='target', verbose=False)
        return metrics
import pandas as pd
import numpy as np
from coremltools.converters.xgboost import convert
from coremltools.models.utils import evaluate_regressor
import pickle

test = pd.read_csv("../data/train.csv")
test_xgb = (test.drop(['ID_code', 'target'], axis=1, inplace=False)).values
rename_dict = {'var_0':'f0', 'var_1':'f1', 'var_2':'f2', 'var_3':'f3', 'var_4':'f4', 'var_5':'f5', 'var_6':'f6', 'var_7':'f7', 'var_8':'f8', 'var_9':'f9', 'var_10':'f10', 'var_11':'f11', 'var_12':'f12', 'var_13':'f13', 'var_14':'f14', 'var_15':'f15', 'var_16':'f16', 'var_17':'f17', 'var_18':'f18', 'var_19':'f19', 'var_20':'f20', 'var_21':'f21', 'var_22':'f22', 'var_23':'f23', 'var_24':'f24', 'var_25':'f25', 'var_26':'f26', 'var_27':'f27', 'var_28':'f28', 'var_29':'f29', 'var_30':'f30', 'var_31':'f31', 'var_32':'f32', 'var_33':'f33', 'var_34':'f34', 'var_35':'f35', 'var_36':'f36', 'var_37':'f37', 'var_38':'f38', 'var_39':'f39', 'var_40':'f40', 'var_41':'f41', 'var_42':'f42', 'var_43':'f43', 'var_44':'f44', 'var_45':'f45', 'var_46':'f46', 'var_47':'f47', 'var_48':'f48', 'var_49':'f49', 'var_50':'f50', 'var_51':'f51', 'var_52':'f52', 'var_53':'f53', 'var_54':'f54', 'var_55':'f55', 'var_56':'f56', 'var_57':'f57', 'var_58':'f58', 'var_59':'f59', 'var_60':'f60', 'var_61':'f61', 'var_62':'f62', 'var_63':'f63', 'var_64':'f64', 'var_65':'f65', 'var_66':'f66', 'var_67':'f67', 'var_68':'f68', 'var_69':'f69', 'var_70':'f70', 'var_71':'f71', 'var_72':'f72', 'var_73':'f73', 'var_74':'f74', 'var_75':'f75', 'var_76':'f76', 'var_77':'f77', 'var_78':'f78', 'var_79':'f79', 'var_80':'f80', 'var_81':'f81', 'var_82':'f82', 'var_83':'f83', 'var_84':'f84', 'var_85':'f85', 'var_86':'f86', 'var_87':'f87', 'var_88':'f88', 'var_89':'f89', 'var_90':'f90', 'var_91':'f91', 'var_92':'f92', 'var_93':'f93', 'var_94':'f94', 'var_95':'f95', 'var_96':'f96', 'var_97':'f97', 'var_98':'f98', 'var_99':'f99', 'var_100':'f100', 'var_101':'f101', 'var_102':'f102', 'var_103':'f103', 'var_104':'f104', 'var_105':'f105', 'var_106':'f106', 'var_107':'f107', 'var_108':'f108', 'var_109':'f109', 'var_110':'f110', 'var_111':'f111', 'var_112':'f112', 'var_113':'f113', 'var_114':'f114', 'var_115':'f115', 'var_116':'f116', 'var_117':'f117', 'var_118':'f118', 'var_119':'f119', 'var_120':'f120', 'var_121':'f121', 'var_122':'f122', 'var_123':'f123', 'var_124':'f124', 'var_125':'f125', 'var_126':'f126', 'var_127':'f127', 'var_128':'f128', 'var_129':'f129', 'var_130':'f130', 'var_131':'f131', 'var_132':'f132', 'var_133':'f133', 'var_134':'f134', 'var_135':'f135', 'var_136':'f136', 'var_137':'f137', 'var_138':'f138', 'var_139':'f139', 'var_140':'f140', 'var_141':'f141', 'var_142':'f142', 'var_143':'f143', 'var_144':'f144', 'var_145':'f145', 'var_146':'f146', 'var_147':'f147', 'var_148':'f148', 'var_149':'f149', 'var_150':'f150', 'var_151':'f151', 'var_152':'f152', 'var_153':'f153', 'var_154':'f154', 'var_155':'f155', 'var_156':'f156', 'var_157':'f157', 'var_158':'f158', 'var_159':'f159', 'var_160':'f160', 'var_161':'f161', 'var_162':'f162', 'var_163':'f163', 'var_164':'f164', 'var_165':'f165', 'var_166':'f166', 'var_167':'f167', 'var_168':'f168', 'var_169':'f169', 'var_170':'f170', 'var_171':'f171', 'var_172':'f172', 'var_173':'f173', 'var_174':'f174', 'var_175':'f175', 'var_176':'f176', 'var_177':'f177', 'var_178':'f178', 'var_179':'f179', 'var_180':'f180', 'var_181':'f181', 'var_182':'f182', 'var_183':'f183', 'var_184':'f184', 'var_185':'f185', 'var_186':'f186', 'var_187':'f187', 'var_188':'f188', 'var_189':'f189', 'var_190':'f190', 'var_191':'f191', 'var_192':'f192', 'var_193':'f193', 'var_194':'f194', 'var_195':'f195', 'var_196':'f196', 'var_197':'f197', 'var_198':'f198', 'var_199':'f199'}
test_coreml = test.rename(columns=rename_dict, inplace=False)
print(test_coreml.describe())

models = ["xgb_fold_regressor_{0}.dat".format(i) for i in range(1,4)]
for idx, m in enumerate(models):
    print("Converting {0}".format(m))
    xgb_model = pickle.load(open("./Models/"+m, "rb"))
    predictions_xgb = xgb_model.predict(test_xgb)

    test_coreml["prediction"] = pd.Series(predictions_xgb)

    coreml_model = convert(xgb_model)
    metrics = evaluate_regressor(coreml_model, test_coreml, target="target", verbose=False)
    print("coreml prediction metrics")
    print(metrics)
    coreml_model.save("./Models/XgbRegressor{0}.mlmodel".format(idx+1))
Beispiel #14
0
# Only for coremltools version from PR at https://github.com/apple/coremltools/pull/293

import pandas as pd
import numpy as np
from coremltools.converters.xgboost import convert
from coremltools.models.utils import evaluate_regressor
import pickle

test = pd.read_csv("../data/train.csv")
test_xgb = (test.drop(['ID_code', 'target'], axis=1, inplace=False)).values
rename_dict = {'var_0':'f0', 'var_1':'f1', 'var_2':'f2', 'var_3':'f3', 'var_4':'f4', 'var_5':'f5', 'var_6':'f6', 'var_7':'f7', 'var_8':'f8', 'var_9':'f9', 'var_10':'f10', 'var_11':'f11', 'var_12':'f12', 'var_13':'f13', 'var_14':'f14', 'var_15':'f15', 'var_16':'f16', 'var_17':'f17', 'var_18':'f18', 'var_19':'f19', 'var_20':'f20', 'var_21':'f21', 'var_22':'f22', 'var_23':'f23', 'var_24':'f24', 'var_25':'f25', 'var_26':'f26', 'var_27':'f27', 'var_28':'f28', 'var_29':'f29', 'var_30':'f30', 'var_31':'f31', 'var_32':'f32', 'var_33':'f33', 'var_34':'f34', 'var_35':'f35', 'var_36':'f36', 'var_37':'f37', 'var_38':'f38', 'var_39':'f39', 'var_40':'f40', 'var_41':'f41', 'var_42':'f42', 'var_43':'f43', 'var_44':'f44', 'var_45':'f45', 'var_46':'f46', 'var_47':'f47', 'var_48':'f48', 'var_49':'f49', 'var_50':'f50', 'var_51':'f51', 'var_52':'f52', 'var_53':'f53', 'var_54':'f54', 'var_55':'f55', 'var_56':'f56', 'var_57':'f57', 'var_58':'f58', 'var_59':'f59', 'var_60':'f60', 'var_61':'f61', 'var_62':'f62', 'var_63':'f63', 'var_64':'f64', 'var_65':'f65', 'var_66':'f66', 'var_67':'f67', 'var_68':'f68', 'var_69':'f69', 'var_70':'f70', 'var_71':'f71', 'var_72':'f72', 'var_73':'f73', 'var_74':'f74', 'var_75':'f75', 'var_76':'f76', 'var_77':'f77', 'var_78':'f78', 'var_79':'f79', 'var_80':'f80', 'var_81':'f81', 'var_82':'f82', 'var_83':'f83', 'var_84':'f84', 'var_85':'f85', 'var_86':'f86', 'var_87':'f87', 'var_88':'f88', 'var_89':'f89', 'var_90':'f90', 'var_91':'f91', 'var_92':'f92', 'var_93':'f93', 'var_94':'f94', 'var_95':'f95', 'var_96':'f96', 'var_97':'f97', 'var_98':'f98', 'var_99':'f99', 'var_100':'f100', 'var_101':'f101', 'var_102':'f102', 'var_103':'f103', 'var_104':'f104', 'var_105':'f105', 'var_106':'f106', 'var_107':'f107', 'var_108':'f108', 'var_109':'f109', 'var_110':'f110', 'var_111':'f111', 'var_112':'f112', 'var_113':'f113', 'var_114':'f114', 'var_115':'f115', 'var_116':'f116', 'var_117':'f117', 'var_118':'f118', 'var_119':'f119', 'var_120':'f120', 'var_121':'f121', 'var_122':'f122', 'var_123':'f123', 'var_124':'f124', 'var_125':'f125', 'var_126':'f126', 'var_127':'f127', 'var_128':'f128', 'var_129':'f129', 'var_130':'f130', 'var_131':'f131', 'var_132':'f132', 'var_133':'f133', 'var_134':'f134', 'var_135':'f135', 'var_136':'f136', 'var_137':'f137', 'var_138':'f138', 'var_139':'f139', 'var_140':'f140', 'var_141':'f141', 'var_142':'f142', 'var_143':'f143', 'var_144':'f144', 'var_145':'f145', 'var_146':'f146', 'var_147':'f147', 'var_148':'f148', 'var_149':'f149', 'var_150':'f150', 'var_151':'f151', 'var_152':'f152', 'var_153':'f153', 'var_154':'f154', 'var_155':'f155', 'var_156':'f156', 'var_157':'f157', 'var_158':'f158', 'var_159':'f159', 'var_160':'f160', 'var_161':'f161', 'var_162':'f162', 'var_163':'f163', 'var_164':'f164', 'var_165':'f165', 'var_166':'f166', 'var_167':'f167', 'var_168':'f168', 'var_169':'f169', 'var_170':'f170', 'var_171':'f171', 'var_172':'f172', 'var_173':'f173', 'var_174':'f174', 'var_175':'f175', 'var_176':'f176', 'var_177':'f177', 'var_178':'f178', 'var_179':'f179', 'var_180':'f180', 'var_181':'f181', 'var_182':'f182', 'var_183':'f183', 'var_184':'f184', 'var_185':'f185', 'var_186':'f186', 'var_187':'f187', 'var_188':'f188', 'var_189':'f189', 'var_190':'f190', 'var_191':'f191', 'var_192':'f192', 'var_193':'f193', 'var_194':'f194', 'var_195':'f195', 'var_196':'f196', 'var_197':'f197', 'var_198':'f198', 'var_199':'f199'}
test_coreml = test.rename(columns=rename_dict, inplace=False)
print(test_coreml.describe())

models = ["xgb_fold{0}.dat".format(i) for i in range(1,6)]
for idx, m in enumerate(models):
    print("Converting {0}".format(m))
    xgb_model = pickle.load(open("./Models/"+m, "rb"))
    predictions_xgb = xgb_model.predict(test_xgb)

    test_coreml["prediction"] = pd.Series(predictions_xgb)

    coreml_model = convert(xgb_model, mode="classifier")
    metrics = evaluate_regressor(coreml_model, test_coreml, target="target", verbose=False)
    print("coreml prediction metrics")
    print(metrics)
    print("\n")
    coreml_model.save("./Models/XgbClassifier{0}.mlmodel".format(idx+1))