def _train_convert_evaluate_assert(self, **scikit_params): """ Train a scikit-learn model, convert it and then evaluate it with CoreML """ scikit_model = GradientBoostingClassifier(random_state=1, **scikit_params) scikit_model.fit(self.X, self.target) # Convert the model spec = skl_converter.convert(scikit_model, self.feature_names, self.output_name) if hasattr(scikit_model, '_init_decision_function') and scikit_model.n_classes_ > 2: import numpy as np # fix initial default prediction for multiclass classification # https://github.com/scikit-learn/scikit-learn/pull/12983 if not hasattr(scikit_model, 'init_'): raise AssertionError if not hasattr(scikit_model.init_, 'priors'): raise AssertionError scikit_model.init_.priors = np.log(scikit_model.init_.priors) if _is_macos() and _macos_version() >= (10, 13): # Get predictions df = pd.DataFrame(self.X, columns=self.feature_names) df["prediction"] = scikit_model.predict(self.X) # Evaluate it metrics = evaluate_classifier(spec, df) self._check_metrics(metrics)
def test_default_names(self): df = pd.DataFrame({'input': self.x}) # Test with probabilities spec = libsvm.convert(self.libsvm_model).get_spec() (_, _, probability_lists) = svm_predict(self.y, self.x, self.libsvm_model, '-b 1 -q') probability_dicts = [ dict(zip([1, 2], cur_vals)) for cur_vals in probability_lists ] df['classProbability'] = probability_dicts metrics = evaluate_classifier_with_probabilities( spec, df, verbose=False, probabilities='classProbability') self.assertLess(metrics['max_probability_error'], 0.00001) # Test model without probabilities no_probability_model = svmutil.svm_train(self.prob, svmutil.svm_parameter()) spec = libsvm.convert(no_probability_model).get_spec() self.assertEqual(len(spec.description.output), 1) self.assertEqual(spec.description.output[0].name, u'target') (df['prediction'], _, _) = svm_predict(self.y, self.x, no_probability_model, ' -q') metrics = evaluate_classifier(spec, df, verbose=False) self.assertEquals(metrics['num_errors'], 0)
def test_default_names(self): df = pd.DataFrame({"input": self.x}) df["input"] = df["input"].apply(np.array) # Test with probabilities spec = libsvm.convert(self.libsvm_model).get_spec() if _is_macos() and _macos_version() >= (10, 13): (_, _, probability_lists) = svm_predict(self.y, self.x, self.libsvm_model, "-b 1 -q") probability_dicts = [ dict(zip([1, 2], cur_vals)) for cur_vals in probability_lists ] df["classProbability"] = probability_dicts metrics = evaluate_classifier_with_probabilities( spec, df, verbose=False, probabilities="classProbability") self.assertLess(metrics["max_probability_error"], 0.00001) # Test model without probabilities no_probability_model = svmutil.svm_train(self.prob, svmutil.svm_parameter()) spec = libsvm.convert(no_probability_model).get_spec() self.assertEqual(len(spec.description.output), 1) self.assertEqual(spec.description.output[0].name, u"target") if _is_macos() and _macos_version() >= (10, 13): (df["prediction"], _, _) = svm_predict(self.y, self.x, no_probability_model, " -q") metrics = evaluate_classifier(spec, df, verbose=False) self.assertEquals(metrics["num_errors"], 0)
def _conversion_and_evaluation_helper_for_linear_svc(self, class_labels): ARGS = [ {}, {'C' : .75, 'loss': 'hinge'}, {'penalty': 'l1', 'dual': False}, {'tol': 0.001, 'fit_intercept': False}, {'intercept_scaling': 1.5} ] x, y = GlmCassifierTest._generate_random_data(class_labels) column_names = ['x1', 'x2'] df = pd.DataFrame(x, columns=column_names) for cur_args in ARGS: print(class_labels, cur_args) cur_model = LinearSVC(**cur_args) cur_model.fit(x, y) spec = convert(cur_model, input_features=column_names, output_feature_names='target') if is_macos() and macos_version() >= (10, 13): df['prediction'] = cur_model.predict(x) cur_eval_metics = evaluate_classifier(spec, df, verbose=False) self.assertEquals(cur_eval_metics['num_errors'], 0)
def test_int_features_in_pipeline(self): import numpy.random as rn import pandas as pd rn.seed(0) x_train_dict = [ dict((rn.randint(100), 1) for i in range(20)) for j in range(100) ] y_train = [0, 1] * 50 from sklearn.pipeline import Pipeline from sklearn.feature_extraction import DictVectorizer from sklearn.linear_model import LogisticRegression pl = Pipeline([("dv", DictVectorizer()), ("lm", LogisticRegression())]) pl.fit(x_train_dict, y_train) import coremltools model = coremltools.converters.sklearn.convert( pl, input_features="features", output_feature_names="target") if _is_macos() and _macos_version() >= (10, 13): x = pd.DataFrame({ "features": x_train_dict, "prediction": pl.predict(x_train_dict) }) cur_eval_metics = evaluate_classifier(model, x) self.assertEquals(cur_eval_metics["num_errors"], 0)
def test_multi_class_without_probability(self): # Generate some random data. # This unit test should not rely on scikit learn for test data. x, y = [], [] for _ in range(50): x.append([ random.gauss(200, 30), random.gauss(-100, 22), random.gauss(100, 42) ]) y.append(random.choice([1, 2, 10, 12])) y[0], y[1], y[2], y[3] = 1, 2, 10, 12 column_names = ['x1', 'x2', 'x3'] prob = svmutil.svm_problem(y, x) df = pd.DataFrame(x, columns=column_names) for param1 in self.non_kernel_parameters: for param2 in self.kernel_parameters: param_str = ' '.join([self.base_param, param1, param2]) param = svm_parameter(param_str) model = svm_train(prob, param) # Get predictions with probabilities as dictionaries (df['prediction'], _, _) = svm_predict(y, x, model, ' -q') spec = libsvm.convert(model, column_names, 'target') metrics = evaluate_classifier(spec, df, verbose=False) self.assertEquals(metrics['num_errors'], 0)
def _conversion_and_evaluation_helper_for_linear_svc(self, class_labels): ARGS = [ {}, {"C": 0.75, "loss": "hinge"}, {"penalty": "l1", "dual": False}, {"tol": 0.001, "fit_intercept": False}, {"intercept_scaling": 1.5}, ] x, y = GlmCassifierTest._generate_random_data(class_labels) column_names = ["x1", "x2"] df = pd.DataFrame(x, columns=column_names) for cur_args in ARGS: print(class_labels, cur_args) cur_model = LinearSVC(**cur_args) cur_model.fit(x, y) spec = convert( cur_model, input_features=column_names, output_feature_names="target" ) if _is_macos() and _macos_version() >= (10, 13): df["prediction"] = cur_model.predict(x) cur_eval_metics = evaluate_classifier(spec, df, verbose=False) self.assertEqual(cur_eval_metics["num_errors"], 0)
def _evaluation_test_helper(self, class_labels, use_probability_estimates, allow_slow, allowed_prob_delta=0.00001): # Parameters to test kernel_parameters = [{}, {'kernel': 'rbf', 'gamma': 1.2}, {'kernel': 'linear'}, {'kernel': 'poly'}, {'kernel': 'poly', 'degree': 2}, {'kernel': 'poly', 'gamma': 0.75}, {'kernel': 'poly', 'degree': 0, 'gamma': 0.9, 'coef0':2}, {'kernel': 'sigmoid'}, {'kernel': 'sigmoid', 'gamma': 1.3}, {'kernel': 'sigmoid', 'coef0': 0.8}, {'kernel': 'sigmoid', 'coef0': 0.8, 'gamma': 0.5} ] non_kernel_parameters = [{}, {'C': 1}, {'C': 1.5, 'shrinking': True}, {'C': 0.5, 'shrinking': False}] # Generate some random data x, y = [], [] random.seed(42) for _ in range(50): x.append([random.gauss(200,30), random.gauss(-100,22), random.gauss(100,42)]) y.append(random.choice(class_labels)) column_names = ['x1', 'x2', 'x3'] # make sure first label is seen first, second is seen second, and so on. for i, val in enumerate(class_labels): y[i] = val df = pd.DataFrame(x, columns=column_names) # Test for param1 in non_kernel_parameters: for param2 in kernel_parameters: cur_params = param1.copy() cur_params.update(param2) cur_params['probability'] = use_probability_estimates cur_params['max_iter'] = 10 # Don't want test to take too long print("cur_params=" + str(cur_params)) cur_model = SVC(**cur_params) cur_model.fit(x, y) spec = scikit_converter.convert(cur_model, column_names, 'target') if macos_version() >= (10, 13): if use_probability_estimates: probability_lists = cur_model.predict_proba(x) df['classProbability'] = [dict(zip(cur_model.classes_, cur_vals)) for cur_vals in probability_lists] metrics = evaluate_classifier_with_probabilities(spec, df, probabilities='classProbability', verbose=True) self.assertEquals(metrics['num_key_mismatch'], 0) self.assertLess(metrics['max_probability_error'], allowed_prob_delta) else: df['prediction'] = cur_model.predict(x) metrics = evaluate_classifier(spec, df, verbose=False) self.assertEquals(metrics['num_errors'], 0) if not allow_slow: break if not allow_slow: break
def _train_convert_evaluate(self, **scikit_params): scikit_model = DecisionTreeClassifier(random_state = 1, **scikit_params) scikit_model.fit(self.X, self.target) # Convert the model spec = skl_converter.convert(scikit_model, self.feature_names, self.output_name) # Get predictions df = pd.DataFrame(self.X, columns=self.feature_names) df['prediction'] = scikit_model.predict(self.X) # Evaluate it metrics = evaluate_classifier(spec, df) return metrics
def _train_convert_evaluate_assert(self, **scikit_params): scikit_model = RandomForestClassifier(random_state=1, **scikit_params) scikit_model.fit(self.X, self.target) # Convert the model spec = skl_converter.convert(scikit_model, self.feature_names, self.output_name) if _is_macos() and _macos_version() >= (10, 13): # Get predictions df = pd.DataFrame(self.X, columns=self.feature_names) df["prediction"] = scikit_model.predict(self.X) # Evaluate it metrics = evaluate_classifier(spec, df, verbose=False) self._check_metrics(metrics, scikit_params)
def _train_convert_evaluate(self, **scikit_params): """ Train a scikit-learn model, convert it and then evaluate it with CoreML """ scikit_model = GradientBoostingClassifier(random_state = 1, **scikit_params) scikit_model.fit(self.X, self.target) # Convert the model spec = skl_converter.convert(scikit_model, self.feature_names, self.output_name) # Get predictions df = pd.DataFrame(self.X, columns=self.feature_names) df['prediction'] = scikit_model.predict(self.X) # Evaluate it metrics = evaluate_classifier(spec, df) return metrics
def _train_convert_evaluate_assert(self, **xgboost_params): """ Train a scikit-learn model, convert it and then evaluate it with CoreML """ xgb_model = xgboost.XGBClassifier(**xgboost_params) xgb_model.fit(self.X, self.target) # Convert the model spec = xgb_converter.convert(xgb_model, self.feature_names, self.output_name, mode="classifier") if macos_version() >= (10, 13): # Get predictions df = pd.DataFrame(self.X, columns=self.feature_names) df['prediction'] = xgb_model.predict(self.X) # Evaluate it metrics = evaluate_classifier(spec, df) self._check_metrics(metrics)
def _evaluation_test_helper_no_probability(self, labels, allow_slow): # Generate some random data. # This unit test should not rely on scikit learn for test data. x, y = [], [] random.seed(42) for _ in range(50): x.append([ random.gauss(200, 30), random.gauss(-100, 22), random.gauss(100, 42) ]) y.append(random.choice(labels)) # make sure first label is seen first, second is seen second, and so on. for i, val in enumerate(labels): y[i] = val column_names = ["x1", "x2", "x3"] prob = svmutil.svm_problem(y, x) df = pd.DataFrame(x, columns=column_names) for param1 in self.non_kernel_parameters: for param2 in self.kernel_parameters: param_str = " ".join([self.base_param, param1, param2]) print("PARAMS: ", param_str) param = svm_parameter(param_str) model = svm_train(prob, param) # Get predictions with probabilities as dictionaries (df["prediction"], _, _) = svm_predict(y, x, model, " -q") spec = libsvm.convert(model, column_names, "target") if _is_macos() and _macos_version() >= (10, 13): metrics = evaluate_classifier(spec, df, verbose=False) self.assertEquals(metrics["num_errors"], 0) if not allow_slow: break if not allow_slow: break
def _train_convert_evaluate_assert(self, **scikit_params): """ Train a scikit-learn model, convert it and then evaluate it with CoreML """ scikit_model = GradientBoostingClassifier(random_state=1, **scikit_params) scikit_model.fit(self.X, self.target) # Convert the model spec = skl_converter.convert(scikit_model, self.feature_names, self.output_name) if _is_macos() and _macos_version() >= (10, 13): # Get predictions df = pd.DataFrame(self.X, columns=self.feature_names) df["prediction"] = scikit_model.predict(self.X) # Evaluate it metrics = evaluate_classifier(spec, df) self._check_metrics(metrics)
def _evaluation_test_helper( self, class_labels, use_probability_estimates, allow_slow, allowed_prob_delta=0.00001, ): # Parameters to test kernel_parameters = [ {}, { "kernel": "rbf", "gamma": 1.2 }, { "kernel": "linear" }, { "kernel": "poly" }, { "kernel": "poly", "degree": 2 }, { "kernel": "poly", "gamma": 0.75 }, { "kernel": "poly", "degree": 0, "gamma": 0.9, "coef0": 2 }, { "kernel": "sigmoid" }, { "kernel": "sigmoid", "gamma": 1.3 }, { "kernel": "sigmoid", "coef0": 0.8 }, { "kernel": "sigmoid", "coef0": 0.8, "gamma": 0.5 }, ] non_kernel_parameters = [ {}, { "C": 1 }, { "C": 1.5, "shrinking": True }, { "C": 0.5, "shrinking": False }, ] # Generate some random data x, y = [], [] random.seed(42) for _ in range(50): x.append([ random.gauss(200, 30), random.gauss(-100, 22), random.gauss(100, 42) ]) y.append(random.choice(class_labels)) column_names = ["x1", "x2", "x3"] # make sure first label is seen first, second is seen second, and so on. for i, val in enumerate(class_labels): y[i] = val df = pd.DataFrame(x, columns=column_names) # Test for param1 in non_kernel_parameters: for param2 in kernel_parameters: cur_params = param1.copy() cur_params.update(param2) cur_params["probability"] = use_probability_estimates cur_params["max_iter"] = 10 # Don't want test to take too long print("cur_params=" + str(cur_params)) cur_model = SVC(**cur_params) cur_model.fit(x, y) spec = scikit_converter.convert(cur_model, column_names, "target") if _is_macos() and _macos_version() >= (10, 13): if use_probability_estimates: probability_lists = cur_model.predict_proba(x) df["classProbability"] = [ dict(zip(cur_model.classes_, cur_vals)) for cur_vals in probability_lists ] metrics = evaluate_classifier_with_probabilities( spec, df, probabilities="classProbability", verbose=True) self.assertEquals(metrics["num_key_mismatch"], 0) self.assertLess(metrics["max_probability_error"], allowed_prob_delta) else: df["prediction"] = cur_model.predict(x) metrics = evaluate_classifier(spec, df, verbose=False) self.assertEquals(metrics["num_errors"], 0) if not allow_slow: break if not allow_slow: break