def test_default_names(self): df = pd.DataFrame({'input': self.x}) # Test with probabilities spec = libsvm.convert(self.libsvm_model).get_spec() (_, _, probability_lists) = svm_predict(self.y, self.x, self.libsvm_model, '-b 1 -q') probability_dicts = [ dict(zip([1, 2], cur_vals)) for cur_vals in probability_lists ] df['classProbability'] = probability_dicts metrics = evaluate_classifier_with_probabilities( spec, df, verbose=False, probabilities='classProbability') self.assertLess(metrics['max_probability_error'], 0.00001) # Test model without probabilities no_probability_model = svmutil.svm_train(self.prob, svmutil.svm_parameter()) spec = libsvm.convert(no_probability_model).get_spec() self.assertEqual(len(spec.description.output), 1) self.assertEqual(spec.description.output[0].name, u'target') (df['prediction'], _, _) = svm_predict(self.y, self.x, no_probability_model, ' -q') metrics = evaluate_classifier(spec, df, verbose=False) self.assertEquals(metrics['num_errors'], 0)
def test_default_names(self): df = pd.DataFrame({"input": self.x}) df["input"] = df["input"].apply(np.array) # Test with probabilities spec = libsvm.convert(self.libsvm_model).get_spec() if _is_macos() and _macos_version() >= (10, 13): (_, _, probability_lists) = svm_predict(self.y, self.x, self.libsvm_model, "-b 1 -q") probability_dicts = [ dict(zip([1, 2], cur_vals)) for cur_vals in probability_lists ] df["classProbability"] = probability_dicts metrics = evaluate_classifier_with_probabilities( spec, df, verbose=False, probabilities="classProbability") self.assertLess(metrics["max_probability_error"], 0.00001) # Test model without probabilities no_probability_model = svmutil.svm_train(self.prob, svmutil.svm_parameter()) spec = libsvm.convert(no_probability_model).get_spec() self.assertEqual(len(spec.description.output), 1) self.assertEqual(spec.description.output[0].name, u"target") if _is_macos() and _macos_version() >= (10, 13): (df["prediction"], _, _) = svm_predict(self.y, self.x, no_probability_model, " -q") metrics = evaluate_classifier(spec, df, verbose=False) self.assertEquals(metrics["num_errors"], 0)
def _evaluation_test_helper_with_probability(self, labels, allow_slow): import copy df = pd.DataFrame(self.x, columns=self.column_names) y = copy.copy(self.y) for i, val in enumerate(labels): y[i] = val probability_param = '-b 1' for param1 in self.non_kernel_parameters: for param2 in self.kernel_parameters: param_str = ' '.join([self.base_param, param1, param2, probability_param]) # print("PARAMS: ", param_str) param = svm_parameter(param_str) model = svm_train(self.prob, param) # Get predictions with probabilities as dictionaries (df['prediction'], _, probability_lists) = svm_predict(y, self.x, model, probability_param + ' -q') probability_dicts = [dict(zip([1, 2], cur_vals)) for cur_vals in probability_lists] df['probabilities'] = probability_dicts spec = libsvm.convert(model, self.column_names, 'target', 'probabilities') if macos_version() >= (10, 13): metrics = evaluate_classifier_with_probabilities(spec, df, verbose=False) self.assertEquals(metrics['num_key_mismatch'], 0) self.assertLess(metrics['max_probability_error'], 0.00001) if not allow_slow: break if not allow_slow: break
def setUpClass(self): """ Set up the unit test by loading the dataset and training a model. """ if not HAS_SKLEARN: return if not HAS_LIBSVM: return scikit_data = load_boston() prob = svmutil.svm_problem( scikit_data['target'] > scikit_data['target'].mean(), scikit_data['data'].tolist()) param = svmutil.svm_parameter() param.svm_type = svmutil.C_SVC param.kernel_type = svmutil.LINEAR param.eps = 1 libsvm_model = svmutil.svm_train(prob, param) libsvm_spec = libsvm_converter.convert(libsvm_model, scikit_data.feature_names, 'target').get_spec() # Save the data and the model self.scikit_data = scikit_data self.libsvm_spec = libsvm_spec
def test_multi_class_without_probability(self): # Generate some random data. # This unit test should not rely on scikit learn for test data. x, y = [], [] for _ in range(50): x.append([ random.gauss(200, 30), random.gauss(-100, 22), random.gauss(100, 42) ]) y.append(random.choice([1, 2, 10, 12])) y[0], y[1], y[2], y[3] = 1, 2, 10, 12 column_names = ['x1', 'x2', 'x3'] prob = svmutil.svm_problem(y, x) df = pd.DataFrame(x, columns=column_names) for param1 in self.non_kernel_parameters: for param2 in self.kernel_parameters: param_str = ' '.join([self.base_param, param1, param2]) param = svm_parameter(param_str) model = svm_train(prob, param) # Get predictions with probabilities as dictionaries (df['prediction'], _, _) = svm_predict(y, x, model, ' -q') spec = libsvm.convert(model, column_names, 'target') metrics = evaluate_classifier(spec, df, verbose=False) self.assertEquals(metrics['num_errors'], 0)
def _test_prob_model(self, param1, param2): probability_param = '-b 1' df = self.df param_str = ' '.join( [self.base_param, param1, param2, probability_param]) param = svm_parameter(param_str) model = svm_train(self.prob, param) # Get predictions with probabilities as dictionaries (df['prediction'], _, probability_lists) = svm_predict(self.y, self.x, model, probability_param + ' -q') probability_dicts = [ dict(zip([1, 2], cur_vals)) for cur_vals in probability_lists ] df['probabilities'] = probability_dicts spec = libsvm.convert(model, self.column_names, 'target', 'probabilities') if macos_version() >= (10, 13): metrics = evaluate_classifier_with_probabilities(spec, df, verbose=False) self.assertEquals(metrics['num_key_mismatch'], 0) self.assertLess(metrics['max_probability_error'], 0.00001)
def _test_evaluation(self, allow_slow): """ Test that the same predictions are made """ from svm import svm_parameter, svm_problem from svmutil import svm_train, svm_predict # Generate some smallish (poly kernels take too long on anything else) random data x, y = [], [] for _ in range(50): cur_x1, cur_x2 = random.gauss(2, 3), random.gauss(-1, 2) x.append([cur_x1, cur_x2]) y.append(1 + 2 * cur_x1 + 3 * cur_x2) input_names = ["x1", "x2"] df = pd.DataFrame(x, columns=input_names) prob = svm_problem(y, x) # Parameters base_param = "-s 3" # model type is epsilon SVR non_kernel_parameters = [ "", "-c 1.5 -p 0.5 -h 1", "-c 0.5 -p 0.5 -h 0" ] kernel_parameters = [ "", "-t 2 -g 1.2", # rbf kernel "-t 0", # linear kernel "-t 1", "-t 1 -d 2", "-t 1 -g 0.75", "-t 1 -d 0 -g 0.9 -r 2", # poly kernel "-t 3", "-t 3 -g 1.3", "-t 3 -r 0.8", "-t 3 -r 0.8 -g 0.5", # sigmoid kernel ] for param1 in non_kernel_parameters: for param2 in kernel_parameters: param_str = " ".join([base_param, param1, param2]) print(param_str) param = svm_parameter(param_str) model = svm_train(prob, param) (df["prediction"], _, _) = svm_predict(y, x, model) spec = libsvm.convert(model, input_names=input_names, target_name="target") if _is_macos() and _macos_version() >= (10, 13): metrics = evaluate_regressor(spec, df) self.assertAlmostEqual(metrics["max_error"], 0) if not allow_slow: break if not allow_slow: break
def _test_evaluation(self, allow_slow): """ Test that the same predictions are made """ from svm import svm_parameter, svm_problem from svmutil import svm_train, svm_predict # Generate some smallish (poly kernels take too long on anything else) random data x, y = [], [] for _ in range(50): cur_x1, cur_x2 = random.gauss(2, 3), random.gauss(-1, 2) x.append([cur_x1, cur_x2]) y.append(1 + 2 * cur_x1 + 3 * cur_x2) input_names = ['x1', 'x2'] df = pd.DataFrame(x, columns=input_names) prob = svm_problem(y, x) # Parameters base_param = '-s 3' # model type is epsilon SVR non_kernel_parameters = [ '', '-c 1.5 -p 0.5 -h 1', '-c 0.5 -p 0.5 -h 0' ] kernel_parameters = [ '', '-t 2 -g 1.2', # rbf kernel '-t 0', # linear kernel '-t 1', '-t 1 -d 2', '-t 1 -g 0.75', '-t 1 -d 0 -g 0.9 -r 2', # poly kernel '-t 3', '-t 3 -g 1.3', '-t 3 -r 0.8', '-t 3 -r 0.8 -g 0.5' # sigmoid kernel ] for param1 in non_kernel_parameters: for param2 in kernel_parameters: param_str = ' '.join([base_param, param1, param2]) print(param_str) param = svm_parameter(param_str) model = svm_train(prob, param) (df['prediction'], _, _) = svm_predict(y, x, model) spec = libsvm.convert(model, input_names=input_names, target_name='target') metrics = evaluate_regressor(spec, df) self.assertAlmostEquals(metrics['max_error'], 0) if not allow_slow: break if not allow_slow: break
def _evaluation_test_helper_no_probability(self, labels, allow_slow): # Generate some random data. # This unit test should not rely on scikit learn for test data. x, y = [], [] random.seed(42) for _ in range(50): x.append([ random.gauss(200, 30), random.gauss(-100, 22), random.gauss(100, 42) ]) y.append(random.choice(labels)) # make sure first label is seen first, second is seen second, and so on. for i, val in enumerate(labels): y[i] = val column_names = ["x1", "x2", "x3"] prob = svmutil.svm_problem(y, x) df = pd.DataFrame(x, columns=column_names) for param1 in self.non_kernel_parameters: for param2 in self.kernel_parameters: param_str = " ".join([self.base_param, param1, param2]) print("PARAMS: ", param_str) param = svm_parameter(param_str) model = svm_train(prob, param) # Get predictions with probabilities as dictionaries (df["prediction"], _, _) = svm_predict(y, x, model, " -q") spec = libsvm.convert(model, column_names, "target") if _is_macos() and _macos_version() >= (10, 13): metrics = evaluate_classifier(spec, df, verbose=False) self.assertEquals(metrics["num_errors"], 0) if not allow_slow: break if not allow_slow: break
def _test_prob_model(self, param1, param2): probability_param = "-b 1" df = self.df param_str = " ".join([self.base_param, param1, param2, probability_param]) param = svmutil.svm_parameter(param_str) model = svm_train(self.prob, param) # Get predictions with probabilities as dictionaries (df["prediction"], _, probability_lists) = svm_predict( self.y, self.x, model, probability_param + " -q" ) probability_dicts = [ dict(zip([1, 2], cur_vals)) for cur_vals in probability_lists ] df["probabilities"] = probability_dicts spec = libsvm.convert(model, self.column_names, "target", "probabilities") if _is_macos() and _macos_version() >= (10, 13): metrics = evaluate_classifier_with_probabilities(spec, df, verbose=False) self.assertEqual(metrics["num_key_mismatch"], 0) self.assertLess(metrics["max_probability_error"], 0.00001)
def test_input_names(self): data = load_boston() df = pd.DataFrame({'input': data['data'].tolist()}) # Default values spec = libsvm.convert(self.libsvm_model) (df['prediction'], _, _) = svmutil.svm_predict(data['target'], data['data'].tolist(), self.libsvm_model) metrics = evaluate_regressor(spec, df) self.assertAlmostEquals(metrics['max_error'], 0) # One extra parameters. This is legal/possible. num_inputs = len(data['data'][0]) spec = libsvm.convert(self.libsvm_model, input_length=num_inputs+1) # Not enought input names. input_names=['this', 'is', 'not', 'enought', 'names'] with self.assertRaises(ValueError): libsvm.convert(self.libsvm_model, input_names=input_names) with self.assertRaises(ValueError): libsvm.convert(self.libsvm_model, input_length=num_inputs-1)
def test_input_names(self): data = load_boston() df = pd.DataFrame({"input": data["data"].tolist()}) df["input"] = df["input"].apply(np.array) # Default values spec = libsvm.convert(self.libsvm_model) if _is_macos() and _macos_version() >= (10, 13): (df["prediction"], _, _) = svmutil.svm_predict(data["target"], data["data"].tolist(), self.libsvm_model) metrics = evaluate_regressor(spec, df) self.assertAlmostEqual(metrics["max_error"], 0) # One extra parameters. This is legal/possible. num_inputs = len(data["data"][0]) spec = libsvm.convert(self.libsvm_model, input_length=num_inputs + 1) # Not enought input names. input_names = ["this", "is", "not", "enought", "names"] with self.assertRaises(ValueError): libsvm.convert(self.libsvm_model, input_names=input_names) with self.assertRaises(ValueError): libsvm.convert(self.libsvm_model, input_length=num_inputs - 1)
def test_conversion_bad_inputs(self): # Check the expected class during covnersion. with self.assertRaises(TypeError): model = OneHotEncoder() spec = libsvm.convert(model, 'data', 'out')
def test_conversion_from_filesystem(self): libsvm_model_path = tempfile.mktemp(suffix = 'model.libsvm') svmutil.svm_save_model(libsvm_model_path, self.libsvm_model) spec = libsvm.convert(libsvm_model_path, 'data', 'target')
def test_conversion(self): spec = libsvm.convert(self.libsvm_model, 'data', 'target')
def test_conversion_from_filesystem(self): libsvm_model_path = tempfile.mktemp(suffix='model.libsvm') svmutil.svm_save_model(libsvm_model_path, self.libsvm_model) # libsvm's save(...) truncates floating points. So it's not going to match self.libsvm_model any more. spec = libsvm.convert(libsvm_model_path, self.column_names, 'target') self.assertIsNotNone(spec)
def test_conversion(self): spec = libsvm.convert(self.libsvm_model, "data", "target")
def test_conversion_from_filesystem(self): libsvm_model_path = tempfile.mktemp(suffix="model.libsvm") svmutil.svm_save_model(libsvm_model_path, self.libsvm_model) spec = libsvm.convert(libsvm_model_path, input_names="data", target_name="target")