def _train_convert_evaluate_assert(self, **scikit_params): scikit_model = GradientBoostingRegressor(random_state=1, **scikit_params) scikit_model.fit(self.X, self.target) # Convert the model spec = skl_converter.convert(scikit_model, self.feature_names, self.output_name) if _is_macos() and _macos_version() >= (10, 13): # Get predictions df = pd.DataFrame(self.X, columns=self.feature_names) df["prediction"] = scikit_model.predict(self.X) # Evaluate it metrics = evaluate_regressor(spec, df, "target", verbose=False) self._check_metrics(metrics, scikit_params)
def test_linear_regression_evaluation(self): """ Check that the evaluation results are the same in scikit learn and coremltools """ input_names = self.scikit_data.feature_names df = pd.DataFrame(self.scikit_data.data, columns=input_names) for normalize_value in (True, False): cur_model = LinearRegression(normalize=normalize_value) cur_model.fit(self.scikit_data['data'], self.scikit_data['target']) spec = convert(cur_model, input_names, 'target') df['prediction'] = cur_model.predict(self.scikit_data.data) metrics = evaluate_regressor(spec, df) self.assertAlmostEquals(metrics['max_error'], 0)
def _train_convert_evaluate(self, **scikit_params): scikit_model = GradientBoostingRegressor(random_state=1, **scikit_params) scikit_model.fit(self.X, self.target) # Convert the model spec = skl_converter.convert(scikit_model, self.feature_names, self.output_name) # Get predictions df = pd.DataFrame(self.X, columns=self.feature_names) df['prediction'] = scikit_model.predict(self.X) # Evaluate it metrics = evaluate_regressor(spec, df, 'target', verbose=False) return metrics
def _test_evaluation(self, allow_slow): """ Test that the same predictions are made """ from svm import svm_parameter, svm_problem from svmutil import svm_train, svm_predict # Generate some smallish (poly kernels take too long on anything else) random data x, y = [], [] for _ in range(50): cur_x1, cur_x2 = random.gauss(2,3), random.gauss(-1,2) x.append([cur_x1, cur_x2]) y.append( 1 + 2*cur_x1 + 3*cur_x2 ) input_names = ['x1', 'x2'] df = pd.DataFrame(x, columns=input_names) prob = svm_problem(y,x) # Parameters base_param = '-s 4' # model type is nu-SVR non_kernel_parameters = ['', '-c 1.5 -p 0.5 -h 1', '-c 0.5 -p 0.5 -h 0'] kernel_parameters = [ '', '-t 2 -g 1.2', # rbf kernel '-t 0', # linear kernel '-t 1', '-t 1 -d 2', '-t 1 -g 0.75', '-t 1 -d 0 -g 0.9 -r 2', # poly kernel '-t 3', '-t 3 -g 1.3', '-t 3 -r 0.8', '-t 3 -r 0.8 -g 0.5' # sigmoid kernel ] for param1 in non_kernel_parameters: for param2 in kernel_parameters: param_str = ' '.join([base_param, param1, param2]) param = svm_parameter(param_str) model = svm_train(prob, param) (df['prediction'], _, _) = svm_predict(y, x, model) spec = libsvm.convert(model, input_names, 'target') if is_macos() and macos_version() >= (10, 13): metrics = evaluate_regressor(spec, df) self.assertAlmostEquals(metrics['max_error'], 0) if not allow_slow: break if not allow_slow: break
def _test_evaluation(self, allow_slow): """ Test that the same predictions are made """ # Generate some smallish (some kernels take too long on anything else) random data x, y = [], [] for _ in range(50): cur_x1, cur_x2 = random.gauss(2,3), random.gauss(-1,2) x.append([cur_x1, cur_x2]) y.append( 1 + 2*cur_x1 + 3*cur_x2 ) input_names = ['x1', 'x2'] df = pd.DataFrame(x, columns=input_names) # Parameters to test kernel_parameters = [{}, {'kernel': 'rbf', 'gamma': 1.2}, {'kernel': 'linear'}, {'kernel': 'poly'}, {'kernel': 'poly', 'degree': 2}, {'kernel': 'poly', 'gamma': 0.75}, {'kernel': 'poly', 'degree': 0, 'gamma': 0.9, 'coef0':2}, {'kernel': 'sigmoid'}, {'kernel': 'sigmoid', 'gamma': 1.3}, {'kernel': 'sigmoid', 'coef0': 0.8}, {'kernel': 'sigmoid', 'coef0': 0.8, 'gamma': 0.5} ] non_kernel_parameters = [{}, {'C': 1}, {'C': 1.5, 'shrinking': True}, {'C': 0.5, 'shrinking': False, 'nu': 0.9}] # Test for param1 in non_kernel_parameters: for param2 in kernel_parameters: cur_params = param1.copy() cur_params.update(param2) cur_model = NuSVR(**cur_params) cur_model.fit(x, y) df['prediction'] = cur_model.predict(x) spec = scikit_converter.convert(cur_model, input_names, 'target') if is_macos() and macos_version() >= (10, 13): metrics = evaluate_regressor(spec, df) self.assertAlmostEquals(metrics['max_error'], 0) if not allow_slow: break if not allow_slow: break
def _train_convert_evaluate_assert(self, **scikit_params): """ Train a scikit-learn model, convert it and then evaluate it with CoreML """ scikit_model = RandomForestRegressor(random_state=1, **scikit_params) scikit_model.fit(self.X, self.target) # Convert the model spec = skl_converter.convert(scikit_model, self.feature_names, self.output_name) if _is_macos() and _macos_version() >= (10, 13): # Get predictions df = pd.DataFrame(self.X, columns=self.feature_names) df["prediction"] = scikit_model.predict(self.X) # Evaluate it metrics = evaluate_regressor(spec, df, verbose=False) self._check_metrics(metrics, scikit_params)
def _train_convert_evaluate(self, **scikit_params): """ Train a scikit-learn model, convert it and then evaluate it with CoreML """ scikit_model = DecisionTreeRegressor(random_state=1, **scikit_params) scikit_model.fit(self.X, self.target) # Convert the model spec = skl_converter.convert(scikit_model, self.feature_names, self.output_name) # Get predictions df = pd.DataFrame(self.X, columns=self.feature_names) df['prediction'] = scikit_model.predict(self.X) # Evaluate it metrics = evaluate_regressor(spec, df, target='target', verbose=False) return metrics
def _train_convert_evaluate_assert(self, bt_params={}, allowed_error={}, **params): """ Set up the unit test by loading the dataset and training a model. """ # Train a model xgb_model = xgboost.train(bt_params, self.dtrain, **params) # Convert the model spec = xgb_converter.convert( xgb_model, self.feature_names, self.output_name, force_32bit_float=False ) if _is_macos() and _macos_version() >= (10, 13): # Get predictions df = pd.DataFrame(self.X, columns=self.feature_names) df["prediction"] = xgb_model.predict(self.dtrain) # Evaluate it metrics = evaluate_regressor(spec, df, target="target", verbose=False) self._check_metrics(metrics, allowed_error, bt_params)
def _train_convert_evaluate(self, bt_params={}, **params): """ Set up the unit test by loading the dataset and training a model. """ # Train a model xgb_model = xgboost.train(bt_params, self.dtrain, **params) # Convert the model spec = xgb_converter.convert(xgb_model, self.feature_names, self.output_name, force_32bit_float=False) # Get predictions df = pd.DataFrame(self.X, columns=self.feature_names) df['prediction'] = xgb_model.predict(self.dtrain) # Evaluate it metrics = evaluate_regressor(spec, df, target='target', verbose=False) return metrics
def test_input_names(self): data = load_boston() df = pd.DataFrame({'input': data['data'].tolist()}) # Default values spec = libsvm.convert(self.libsvm_model) (df['prediction'], _, _) = svmutil.svm_predict(data['target'], data['data'].tolist(), self.libsvm_model) metrics = evaluate_regressor(spec, df) self.assertAlmostEquals(metrics['max_error'], 0) # One extra parameters. This is legal/possible. num_inputs = len(data['data'][0]) spec = libsvm.convert(self.libsvm_model, input_length=num_inputs+1) # Not enought input names. input_names=['this', 'is', 'not', 'enought', 'names'] with self.assertRaises(ValueError): libsvm.convert(self.libsvm_model, input_names=input_names) with self.assertRaises(ValueError): libsvm.convert(self.libsvm_model, input_length=num_inputs-1)
def test_boston_OHE_plus_trees(self): data = load_boston() pl = Pipeline([ ("OHE", OneHotEncoder(categorical_features = [8], sparse=False)), ("Trees",GradientBoostingRegressor(random_state = 1))]) pl.fit(data.data, data.target) # Convert the model spec = convert(pl, data.feature_names, 'target') # Get predictions df = pd.DataFrame(data.data, columns=data.feature_names) df['prediction'] = pl.predict(data.data) # Evaluate it result = evaluate_regressor(spec, df, 'target', verbose = False) assert result["max_error"] < 0.0001
def test_linear_svr_evaluation(self): """ Check that the evaluation results are the same in scikit learn and coremltools """ ARGS = [ {}, { "C": 0.5, "epsilon": 0.25 }, { "dual": False, "loss": "squared_epsilon_insensitive" }, { "tol": 0.005 }, { "fit_intercept": False }, { "intercept_scaling": 1.5 }, ] input_names = self.scikit_data.feature_names df = pd.DataFrame(self.scikit_data.data, columns=input_names) for cur_args in ARGS: print(cur_args) cur_model = LinearSVR(**cur_args) cur_model.fit(self.scikit_data["data"], self.scikit_data["target"]) spec = convert(cur_model, input_names, "target") df["prediction"] = cur_model.predict(self.scikit_data.data) metrics = evaluate_regressor(spec, df) self.assertAlmostEqual(metrics["max_error"], 0)
def _test_boston_OHE_plus_trees(self, loss='ls'): data = load_boston() pl = Pipeline([ ("OHE", OneHotEncoder(categorical_features=[8], sparse=False)), ("Trees", GradientBoostingRegressor(random_state=1, loss=loss)), ]) pl.fit(data.data, data.target) # Convert the model spec = convert(pl, data.feature_names, "target") if _is_macos() and _macos_version() >= (10, 13): # Get predictions df = pd.DataFrame(data.data, columns=data.feature_names) df["prediction"] = pl.predict(data.data) # Evaluate it result = evaluate_regressor(spec, df, "target", verbose=False) assert result["max_error"] < 0.0001
def test_input_names(self): data = load_boston() df = pd.DataFrame({"input": data["data"].tolist()}) df["input"] = df["input"].apply(np.array) # Default values spec = libsvm.convert(self.libsvm_model) if _is_macos() and _macos_version() >= (10, 13): (df["prediction"], _, _) = svmutil.svm_predict(data["target"], data["data"].tolist(), self.libsvm_model) metrics = evaluate_regressor(spec, df) self.assertAlmostEqual(metrics["max_error"], 0) # One extra parameters. This is legal/possible. num_inputs = len(data["data"][0]) spec = libsvm.convert(self.libsvm_model, input_length=num_inputs + 1) # Not enought input names. input_names = ["this", "is", "not", "enought", "names"] with self.assertRaises(ValueError): libsvm.convert(self.libsvm_model, input_names=input_names) with self.assertRaises(ValueError): libsvm.convert(self.libsvm_model, input_length=num_inputs - 1)
import pandas as pd import numpy as np from coremltools.converters.xgboost import convert from coremltools.models.utils import evaluate_regressor from coremltools.models import MLModel basePath = "../SantanderCreateML/SantanderCreateML/" test = pd.read_csv(basePath + "Data/train_head_pre.csv") test_xgb = (test.drop(['ID_code', 'target'], axis=1, inplace=False)).values rename_dict = {'var_0':'f0', 'var_1':'f1', 'var_2':'f2', 'var_3':'f3', 'var_4':'f4', 'var_5':'f5', 'var_6':'f6', 'var_7':'f7', 'var_8':'f8', 'var_9':'f9', 'var_10':'f10', 'var_11':'f11', 'var_12':'f12', 'var_13':'f13', 'var_14':'f14', 'var_15':'f15', 'var_16':'f16', 'var_17':'f17', 'var_18':'f18', 'var_19':'f19', 'var_20':'f20', 'var_21':'f21', 'var_22':'f22', 'var_23':'f23', 'var_24':'f24', 'var_25':'f25', 'var_26':'f26', 'var_27':'f27', 'var_28':'f28', 'var_29':'f29', 'var_30':'f30', 'var_31':'f31', 'var_32':'f32', 'var_33':'f33', 'var_34':'f34', 'var_35':'f35', 'var_36':'f36', 'var_37':'f37', 'var_38':'f38', 'var_39':'f39', 'var_40':'f40', 'var_41':'f41', 'var_42':'f42', 'var_43':'f43', 'var_44':'f44', 'var_45':'f45', 'var_46':'f46', 'var_47':'f47', 'var_48':'f48', 'var_49':'f49', 'var_50':'f50', 'var_51':'f51', 'var_52':'f52', 'var_53':'f53', 'var_54':'f54', 'var_55':'f55', 'var_56':'f56', 'var_57':'f57', 'var_58':'f58', 'var_59':'f59', 'var_60':'f60', 'var_61':'f61', 'var_62':'f62', 'var_63':'f63', 'var_64':'f64', 'var_65':'f65', 'var_66':'f66', 'var_67':'f67', 'var_68':'f68', 'var_69':'f69', 'var_70':'f70', 'var_71':'f71', 'var_72':'f72', 'var_73':'f73', 'var_74':'f74', 'var_75':'f75', 'var_76':'f76', 'var_77':'f77', 'var_78':'f78', 'var_79':'f79', 'var_80':'f80', 'var_81':'f81', 'var_82':'f82', 'var_83':'f83', 'var_84':'f84', 'var_85':'f85', 'var_86':'f86', 'var_87':'f87', 'var_88':'f88', 'var_89':'f89', 'var_90':'f90', 'var_91':'f91', 'var_92':'f92', 'var_93':'f93', 'var_94':'f94', 'var_95':'f95', 'var_96':'f96', 'var_97':'f97', 'var_98':'f98', 'var_99':'f99', 'var_100':'f100', 'var_101':'f101', 'var_102':'f102', 'var_103':'f103', 'var_104':'f104', 'var_105':'f105', 'var_106':'f106', 'var_107':'f107', 'var_108':'f108', 'var_109':'f109', 'var_110':'f110', 'var_111':'f111', 'var_112':'f112', 'var_113':'f113', 'var_114':'f114', 'var_115':'f115', 'var_116':'f116', 'var_117':'f117', 'var_118':'f118', 'var_119':'f119', 'var_120':'f120', 'var_121':'f121', 'var_122':'f122', 'var_123':'f123', 'var_124':'f124', 'var_125':'f125', 'var_126':'f126', 'var_127':'f127', 'var_128':'f128', 'var_129':'f129', 'var_130':'f130', 'var_131':'f131', 'var_132':'f132', 'var_133':'f133', 'var_134':'f134', 'var_135':'f135', 'var_136':'f136', 'var_137':'f137', 'var_138':'f138', 'var_139':'f139', 'var_140':'f140', 'var_141':'f141', 'var_142':'f142', 'var_143':'f143', 'var_144':'f144', 'var_145':'f145', 'var_146':'f146', 'var_147':'f147', 'var_148':'f148', 'var_149':'f149', 'var_150':'f150', 'var_151':'f151', 'var_152':'f152', 'var_153':'f153', 'var_154':'f154', 'var_155':'f155', 'var_156':'f156', 'var_157':'f157', 'var_158':'f158', 'var_159':'f159', 'var_160':'f160', 'var_161':'f161', 'var_162':'f162', 'var_163':'f163', 'var_164':'f164', 'var_165':'f165', 'var_166':'f166', 'var_167':'f167', 'var_168':'f168', 'var_169':'f169', 'var_170':'f170', 'var_171':'f171', 'var_172':'f172', 'var_173':'f173', 'var_174':'f174', 'var_175':'f175', 'var_176':'f176', 'var_177':'f177', 'var_178':'f178', 'var_179':'f179', 'var_180':'f180', 'var_181':'f181', 'var_182':'f182', 'var_183':'f183', 'var_184':'f184', 'var_185':'f185', 'var_186':'f186', 'var_187':'f187', 'var_188':'f188', 'var_189':'f189', 'var_190':'f190', 'var_191':'f191', 'var_192':'f192', 'var_193':'f193', 'var_194':'f194', 'var_195':'f195', 'var_196':'f196', 'var_197':'f197', 'var_198':'f198', 'var_199':'f199'} test_coreml = test.rename(columns=rename_dict, inplace=False) test_coreml["prediction"] = test_coreml["target"] print(test_coreml.describe()) models = ["SantanderBoostedTreeRegressor_100_000_it", "SantanderBoostedTreeRegressor_20_000_it", "SantanderBoostedTreeRegressor_2_500_it"] for idx, m in enumerate(models): print("Comparing non-quantized vs quantized version of model {0}".format(m)) non_quantized_model = MLModel(basePath + m + ".mlmodel") metrics = evaluate_regressor(non_quantized_model, test_coreml, target="target", verbose=False) print("non-quantized model metrics") print(metrics) quantized_model = MLModel(basePath + m + "Q.mlmodel") metrics = evaluate_regressor(quantized_model, test_coreml, target="target", verbose=False) print("quantized model metrics") print(metrics)
import pandas as pd import numpy as np from coremltools.converters.xgboost import convert from coremltools.models.utils import evaluate_regressor import pickle test = pd.read_csv("../data/train.csv") test_xgb = (test.drop(['ID_code', 'target'], axis=1, inplace=False)).values rename_dict = {'var_0':'f0', 'var_1':'f1', 'var_2':'f2', 'var_3':'f3', 'var_4':'f4', 'var_5':'f5', 'var_6':'f6', 'var_7':'f7', 'var_8':'f8', 'var_9':'f9', 'var_10':'f10', 'var_11':'f11', 'var_12':'f12', 'var_13':'f13', 'var_14':'f14', 'var_15':'f15', 'var_16':'f16', 'var_17':'f17', 'var_18':'f18', 'var_19':'f19', 'var_20':'f20', 'var_21':'f21', 'var_22':'f22', 'var_23':'f23', 'var_24':'f24', 'var_25':'f25', 'var_26':'f26', 'var_27':'f27', 'var_28':'f28', 'var_29':'f29', 'var_30':'f30', 'var_31':'f31', 'var_32':'f32', 'var_33':'f33', 'var_34':'f34', 'var_35':'f35', 'var_36':'f36', 'var_37':'f37', 'var_38':'f38', 'var_39':'f39', 'var_40':'f40', 'var_41':'f41', 'var_42':'f42', 'var_43':'f43', 'var_44':'f44', 'var_45':'f45', 'var_46':'f46', 'var_47':'f47', 'var_48':'f48', 'var_49':'f49', 'var_50':'f50', 'var_51':'f51', 'var_52':'f52', 'var_53':'f53', 'var_54':'f54', 'var_55':'f55', 'var_56':'f56', 'var_57':'f57', 'var_58':'f58', 'var_59':'f59', 'var_60':'f60', 'var_61':'f61', 'var_62':'f62', 'var_63':'f63', 'var_64':'f64', 'var_65':'f65', 'var_66':'f66', 'var_67':'f67', 'var_68':'f68', 'var_69':'f69', 'var_70':'f70', 'var_71':'f71', 'var_72':'f72', 'var_73':'f73', 'var_74':'f74', 'var_75':'f75', 'var_76':'f76', 'var_77':'f77', 'var_78':'f78', 'var_79':'f79', 'var_80':'f80', 'var_81':'f81', 'var_82':'f82', 'var_83':'f83', 'var_84':'f84', 'var_85':'f85', 'var_86':'f86', 'var_87':'f87', 'var_88':'f88', 'var_89':'f89', 'var_90':'f90', 'var_91':'f91', 'var_92':'f92', 'var_93':'f93', 'var_94':'f94', 'var_95':'f95', 'var_96':'f96', 'var_97':'f97', 'var_98':'f98', 'var_99':'f99', 'var_100':'f100', 'var_101':'f101', 'var_102':'f102', 'var_103':'f103', 'var_104':'f104', 'var_105':'f105', 'var_106':'f106', 'var_107':'f107', 'var_108':'f108', 'var_109':'f109', 'var_110':'f110', 'var_111':'f111', 'var_112':'f112', 'var_113':'f113', 'var_114':'f114', 'var_115':'f115', 'var_116':'f116', 'var_117':'f117', 'var_118':'f118', 'var_119':'f119', 'var_120':'f120', 'var_121':'f121', 'var_122':'f122', 'var_123':'f123', 'var_124':'f124', 'var_125':'f125', 'var_126':'f126', 'var_127':'f127', 'var_128':'f128', 'var_129':'f129', 'var_130':'f130', 'var_131':'f131', 'var_132':'f132', 'var_133':'f133', 'var_134':'f134', 'var_135':'f135', 'var_136':'f136', 'var_137':'f137', 'var_138':'f138', 'var_139':'f139', 'var_140':'f140', 'var_141':'f141', 'var_142':'f142', 'var_143':'f143', 'var_144':'f144', 'var_145':'f145', 'var_146':'f146', 'var_147':'f147', 'var_148':'f148', 'var_149':'f149', 'var_150':'f150', 'var_151':'f151', 'var_152':'f152', 'var_153':'f153', 'var_154':'f154', 'var_155':'f155', 'var_156':'f156', 'var_157':'f157', 'var_158':'f158', 'var_159':'f159', 'var_160':'f160', 'var_161':'f161', 'var_162':'f162', 'var_163':'f163', 'var_164':'f164', 'var_165':'f165', 'var_166':'f166', 'var_167':'f167', 'var_168':'f168', 'var_169':'f169', 'var_170':'f170', 'var_171':'f171', 'var_172':'f172', 'var_173':'f173', 'var_174':'f174', 'var_175':'f175', 'var_176':'f176', 'var_177':'f177', 'var_178':'f178', 'var_179':'f179', 'var_180':'f180', 'var_181':'f181', 'var_182':'f182', 'var_183':'f183', 'var_184':'f184', 'var_185':'f185', 'var_186':'f186', 'var_187':'f187', 'var_188':'f188', 'var_189':'f189', 'var_190':'f190', 'var_191':'f191', 'var_192':'f192', 'var_193':'f193', 'var_194':'f194', 'var_195':'f195', 'var_196':'f196', 'var_197':'f197', 'var_198':'f198', 'var_199':'f199'} test_coreml = test.rename(columns=rename_dict, inplace=False) print(test_coreml.describe()) models = ["xgb_fold_regressor_{0}.dat".format(i) for i in range(1,4)] for idx, m in enumerate(models): print("Converting {0}".format(m)) xgb_model = pickle.load(open("./Models/"+m, "rb")) predictions_xgb = xgb_model.predict(test_xgb) test_coreml["prediction"] = pd.Series(predictions_xgb) coreml_model = convert(xgb_model) metrics = evaluate_regressor(coreml_model, test_coreml, target="target", verbose=False) print("coreml prediction metrics") print(metrics) coreml_model.save("./Models/XgbRegressor{0}.mlmodel".format(idx+1))
def _test_evaluation(self, allow_slow): """ Test that the same predictions are made """ # Generate some smallish (some kernels take too long on anything else) random data x, y = [], [] for _ in range(50): cur_x1, cur_x2 = random.gauss(2, 3), random.gauss(-1, 2) x.append([cur_x1, cur_x2]) y.append(1 + 2 * cur_x1 + 3 * cur_x2) input_names = ["x1", "x2"] df = pd.DataFrame(x, columns=input_names) # Parameters to test kernel_parameters = [ {}, { "kernel": "rbf", "gamma": 1.2 }, { "kernel": "linear" }, { "kernel": "poly" }, { "kernel": "poly", "degree": 2 }, { "kernel": "poly", "gamma": 0.75 }, { "kernel": "poly", "degree": 0, "gamma": 0.9, "coef0": 2 }, { "kernel": "sigmoid" }, { "kernel": "sigmoid", "gamma": 1.3 }, { "kernel": "sigmoid", "coef0": 0.8 }, { "kernel": "sigmoid", "coef0": 0.8, "gamma": 0.5 }, ] non_kernel_parameters = [ {}, { "C": 1 }, { "C": 1.5, "epsilon": 0.5, "shrinking": True }, { "C": 0.5, "epsilon": 1.5, "shrinking": False }, ] # Test for param1 in non_kernel_parameters: for param2 in kernel_parameters: cur_params = param1.copy() cur_params.update(param2) print("cur_params=" + str(cur_params)) cur_model = SVR(**cur_params) cur_model.fit(x, y) df["prediction"] = cur_model.predict(x) spec = sklearn_converter.convert(cur_model, input_names, "target") if _is_macos() and _macos_version() >= (10, 13): metrics = evaluate_regressor(spec, df) self.assertAlmostEqual(metrics["max_error"], 0) if not allow_slow: break if not allow_slow: break