def test_convert_nusvmc(self): iris = load_iris() X = iris.data[:, :2] y = iris.target y[y == 2] = 1 prob = svmutil.svm_problem(y, X.tolist()) param = svmutil.svm_parameter() param.svm_type = NuSVC param.kernel_type = svmutil.RBF param.eps = 1 param.probability = 1 if noprint: param.print_func = noprint libsvm_model = svmutil.svm_train(prob, param) node = convert(libsvm_model, "LibSvmNuSvmc", [('input', FloatTensorType(shape=['None', 'None']))]) self.assertTrue(node is not None) dump_data_and_model( X[:5].astype(numpy.float32), SkAPIClProba2(libsvm_model), node, basename="LibSvmNuSvmc-Dec2", allow_failure= "StrictVersion(onnxruntime.__version__) <= StrictVersion('0.1.3')")
def setUpClass(self): """ Set up the unit test by loading the dataset and training a model. """ if not _HAS_SKLEARN: return if not _HAS_LIBSVM: return scikit_data = load_boston() prob = svmutil.svm_problem( scikit_data["target"] > scikit_data["target"].mean(), scikit_data["data"].tolist(), ) param = svmutil.svm_parameter() param.svm_type = svmutil.C_SVC param.kernel_type = svmutil.LINEAR param.eps = 1 libsvm_model = svmutil.svm_train(prob, param) libsvm_spec = libsvm_converter.convert( libsvm_model, scikit_data.feature_names, "target" ).get_spec() # Save the data and the model self.scikit_data = scikit_data self.libsvm_spec = libsvm_spec
def test_convert_svmc_raw(self): iris = load_iris() X = iris.data[:, :2] y = iris.target y[y == 2] = 1 prob = svmutil.svm_problem(y, X.tolist()) param = svmutil.svm_parameter() param.svm_type = SVC param.kernel_type = svmutil.RBF param.eps = 1 param.probability = 0 if noprint: param.print_func = noprint libsvm_model = svmutil.svm_train(prob, param) # known svm runtime dimension error in ONNX Runtime node = convert(libsvm_model, "LibSvmSvmcRaw", [('input', FloatTensorType(shape=['None', 'None']))]) self.assertTrue(node is not None) dump_data_and_model( X[:5].astype(numpy.float32), SkAPICl(libsvm_model), node, basename="LibSvmSvmcRaw", allow_failure= "StrictVersion(onnxruntime.__version__) < StrictVersion('0.5.0')")
def test_convert_svmc_linear_raw_multi(self): iris = load_iris() X = iris.data[:, :2] y = iris.target y[-5:] = 3 prob = svmutil.svm_problem(y, X.tolist()) param = svmutil.svm_parameter() param.svm_type = SVC param.kernel_type = svmutil.LINEAR param.eps = 1 param.probability = 0 if noprint: param.print_func = noprint libsvm_model = svmutil.svm_train(prob, param) node = convert(libsvm_model, "LibSvmNuSvmcMultiRaw", [('input', FloatTensorType(shape=['None', 2]))]) self.assertTrue(node is not None) X2 = numpy.vstack([X[:2], X[60:62], X[110:112], X[147:149]]) # 5x0, 5x1 dump_data_and_model( X2.astype(numpy.float32), SkAPICl(libsvm_model), node, basename="LibSvmSvmcRaw-Dec3", verbose=False, allow_failure= "StrictVersion(onnxruntime.__version__) <= StrictVersion('0.1.3')")
def test_multi_class_without_probability(self): # Generate some random data. # This unit test should not rely on scikit learn for test data. x, y = [], [] for _ in range(50): x.append( [random.gauss(200, 30), random.gauss(-100, 22), random.gauss(100, 42)] ) y.append(random.choice([1, 2, 10, 12])) y[0], y[1], y[2], y[3] = 1, 2, 10, 12 column_names = ["x1", "x2", "x3"] prob = svmutil.svm_problem(y, x) df = pd.DataFrame(x, columns=column_names) for param1 in self.non_kernel_parameters: for param2 in self.kernel_parameters: param_str = " ".join([self.base_param, param1, param2]) param = svmutil.svm_parameter(param_str) model = svm_train(prob, param) # Get predictions with probabilities as dictionaries (df["prediction"], _, _) = svm_predict(y, x, model, " -q") spec = libsvm.convert(model, column_names, "target") metrics = evaluate_classifier(spec, df, verbose=False) self.assertEqual(metrics["num_errors"], 0)
def setUpClass(self): """ Set up the unit test by loading the dataset and training a model. """ if not _HAS_LIBSVM: # setUpClass is still called even if class is skipped. return # Generate some random data. # This unit test should not rely on scikit learn for test data. self.x, self.y = [], [] random.seed(42) for _ in range(50): self.x.append([random.gauss(200, 30), random.gauss(-100, 22)]) self.y.append(random.choice([1, 2])) self.y[0] = 1 # Make sure 1 is always the first label it sees self.y[1] = 2 self.column_names = ["x1", "x2"] self.prob = svmutil.svm_problem(self.y, self.x) param = svmutil.svm_parameter() param.svm_type = svmutil.NU_SVC param.kernel_type = svmutil.LINEAR param.eps = 1 param.probability = 1 # Save the data and the model self.libsvm_model = svmutil.svm_train(self.prob, param) self.df = pd.DataFrame(self.x, columns=self.column_names)
def test_convert_svmc(self): iris = load_iris() X = iris.data[:, :2] y = iris.target y[y == 2] = 1 prob = svmutil.svm_problem(y, X.tolist()) param = svmutil.svm_parameter() param.svm_type = SVC param.kernel_type = svmutil.RBF param.eps = 1 param.probability = 1 if noprint: param.print_func = noprint libsvm_model = svmutil.svm_train(prob, param) node = convert(libsvm_model, "LibSvmSvmc", [('input', FloatTensorType())]) self.assertTrue(node is not None) dump_data_and_model(X[:5].astype(numpy.float32), SkAPIClProba2(libsvm_model), node, basename="LibSvmSvmc-Dec2")
def get_SVM_trained_classifer(self, training_datafile, classifier_dumpfile): # read all tweets and labels tweet_items = common.get_filtered_training_data(training_datafile) tweets = [] for (words, sentiment) in tweet_items: words_filtered = [ e.lower() for e in words.split() if (common.is_ascii(e)) ] tweets.append((words_filtered, sentiment)) results = helper.get_SVM_feature_vector_and_labels( self.feature_list, tweets) self.feature_vectors = results['feature_vector'] self.labels = results['labels'] problem = svm_problem(self.labels, self.feature_vectors) # '-q' option suppress console output param = svm_parameter('-q') param.kernel_type = LINEAR # param.show() classifier = svm_train(problem, param) svm_save_model(classifier_dumpfile, classifier) return classifier
def generate_svm_model(rows, param, kmers=[1,2,3]): y, x = libsvm_generate_matrix(rows, kmers, dense=True) prob = svmutil.svm_problem(y, x) # s = 3 -- epsilon-SVR param_str = "-s 3 -b 1 -q " # epsilon-SVR, prob estimate true, quiet mode param_str += " ".join(["-{} {}".format(k,v) for k,v in param.items()]) params = svmutil.svm_parameter(param_str) model = svmutil.svm_train(prob, params) return model
def train(self, input_data_path, params="-t 0 -c 4 -b 1", is_eval=True): with commons.PhaseLogger("LIBSVM.train.read_problem"): Y, X = svmutil.svm_read_problem(input_data_path + "/Train.txt") prob = svmutil.svm_problem(Y, X) #Y, X = svmutil.svm_read_problem(input_data_path + "\\Train.txt") self._params = svmutil.svm_parameter(params) with commons.PhaseLogger("LIBSVM.train.svm_train"): self._model = svmutil.svm_train(prob, self._params) self._init = True if is_eval is True: p_labels, p_acc, p_vals = svmutil.svm_predict(Y, X, self._model) acc, mse, _ = p_acc logging.info("[%s]: train with Acc[%.4f] Mse[%.4f]" % (self._get_class_name(), acc, mse))
def setUpClass(self): """ Set up the unit test by loading the dataset and training a model. """ if not _HAS_SKLEARN: return if not _HAS_LIBSVM: return scikit_data = load_boston() prob = svmutil.svm_problem(scikit_data["target"], scikit_data["data"].tolist()) param = svmutil.svm_parameter() param.svm_type = svmutil.NU_SVR param.kernel_type = svmutil.LINEAR param.eps = 1 self.libsvm_model = svmutil.svm_train(prob, param)
def predict(tr_data_arr, tr_label_arr, pred_data_arr, pred_label_arr): data_arr = data_format(tr_data_arr) prob = svm_problem(tr_label_arr, data_arr) # 以下参数c和g通过交叉验证得到 param = svm_parameter('-c 2048.0 -g 0.001953125') svm_model = svm_train(prob, param) pred_data_arr = data_format(pred_data_arr) pred_data_len = len(pred_label_arr) wrong = 0 for idx, data in enumerate(pred_data_arr): p_label, p_acc, p_val = svm_predict( [pred_label_arr[idx]], [data], svm_model) if int(p_label[0]) != int(pred_label_arr[idx]): wrong += 1 accuracy = (pred_data_len - wrong) * 100.0 / pred_data_len return pred_data_len, wrong, accuracy
def process(training_file, test_file, check, draw): # Load training data. with open(training_file) as f: class_1 = pickle.load(f) class_2 = pickle.load(f) labels = pickle.load(f) # Convert data to lists for libsvm. class_1 = map(list, class_1) class_2 = map(list, class_2) labels = list(labels) samples = class_1 + class_2 problem = svmutil.svm_problem(labels, samples) # Don't print to stdout, use radial basis functions. param = svmutil.svm_parameter('-q -t 2') model = svmutil.svm_train(problem, param) # Load test data. with open(test_file) as f: class_1 = pickle.load(f) class_2 = pickle.load(f) labels = pickle.load(f) class_1 = map(list, class_1) class_2 = map(list, class_2) labels = list(labels) if check: # Sadly, this prints to stdout too :-/ svmutil.svm_predict(labels, class_1 + class_2, model) # Prints accuracy. if draw: def classify(x, y, model=model): return array( svmutil.svm_predict([0] * len(x), map(list, zip(x, y)), model)[0]) imtools.plot_2d_boundary( [-6, 6, -6, 6], [array(class_1), array(class_2)], classify, [1, -1]) show()
def process(training_file, test_file, check, draw): # Load training data. with open(training_file) as f: class_1 = pickle.load(f) class_2 = pickle.load(f) labels = pickle.load(f) # Convert data to lists for libsvm. class_1 = map(list, class_1) class_2 = map(list, class_2) labels = list(labels) samples = class_1 + class_2 problem = svmutil.svm_problem(labels, samples) # Don't print to stdout, use radial basis functions. param = svmutil.svm_parameter('-q -t 2') model = svmutil.svm_train(problem, param) # Load test data. with open(test_file) as f: class_1 = pickle.load(f) class_2 = pickle.load(f) labels = pickle.load(f) class_1 = map(list, class_1) class_2 = map(list, class_2) labels = list(labels) if check: # Sadly, this prints to stdout too :-/ svmutil.svm_predict(labels, class_1 + class_2, model) # Prints accuracy. if draw: def classify(x, y, model=model): return array(svmutil.svm_predict([0] * len(x), map(list, zip(x, y)), model)[0]) imtools.plot_2d_boundary( [-6, 6, -6, 6], [array(class_1), array(class_2)], classify, [1, -1]) show()
def _test_prob_model(self, param1, param2): probability_param = "-b 1" df = self.df param_str = " ".join([self.base_param, param1, param2, probability_param]) param = svmutil.svm_parameter(param_str) model = svm_train(self.prob, param) # Get predictions with probabilities as dictionaries (df["prediction"], _, probability_lists) = svm_predict( self.y, self.x, model, probability_param + " -q" ) probability_dicts = [ dict(zip([1, 2], cur_vals)) for cur_vals in probability_lists ] df["probabilities"] = probability_dicts spec = libsvm.convert(model, self.column_names, "target", "probabilities") if _is_macos() and _macos_version() >= (10, 13): metrics = evaluate_classifier_with_probabilities(spec, df, verbose=False) self.assertEqual(metrics["num_key_mismatch"], 0) self.assertLess(metrics["max_probability_error"], 0.00001)
def test_convert_svmr_linear(self): iris = load_iris() X = iris.data[:, :2] y = iris.target prob = svmutil.svm_problem(y, X.tolist()) param = svmutil.svm_parameter() param.svm_type = SVR param.kernel_type = svmutil.LINEAR param.eps = 1 if noprint: param.print_func = noprint libsvm_model = svmutil.svm_train(prob, param) node = convert(libsvm_model, "LibSvmSvmrLinear", [('input', FloatTensorType())]) self.assertTrue(node is not None) dump_data_and_model(X[:5].astype(numpy.float32), SkAPIReg(libsvm_model), node, basename="LibSvmSvmrLinear-Dec3")
def run_kfold(param_dict, rows, numfold, kmers=[1,2,3]): """ Run k KFold Args: param_dict: dictionary mapping param string to its value rows: input rows numfold: k for cross validation kmers: list of kmers, default [1,2,3] Return: dictionary of model performance (SCC, MSE) if benchmark is True, else return predictions for each fold """ kf = KFold(numfold, shuffle=True) splitted = kf.split(rows) param_str = "-s 3 -b 1 -q " # epsilon-SVR, prob estimate true, quiet mode param_str += " ".join(["-{} {}".format(k,v) for k,v in param_dict.items()]) params = svmutil.svm_parameter(param_str) foldidx = 1 fold_results = [] for train_idx, test_idx in splitted: train_list = [rows[i] for i in train_idx] test_list = [rows[i] for i in test_idx] y_train, x_train = libsvm_generate_matrix(train_list, kmers) y_test, x_test = libsvm_generate_matrix(test_list, kmers) train_prob = svmutil.svm_problem(y_train, x_train) model = svmutil.svm_train(train_prob, params) #svmutil.svm_save_model('model_name.model', m) # y is only needed when we need the model performance svmpred = svmutil.svm_predict(y_test, x_test, model, options="-q") fold_results.append({"test":test_list, "svmpred":svmpred}) return fold_results
neg_loc = np.where(label < 0) pos_x1 = x1[pos_loc] pos_x2 = x2[pos_loc] neg_x1 = x1[neg_loc] neg_x2 = x2[neg_loc] plt.plot(pos_x1, pos_x2, 'b^') plt.plot(neg_x1, neg_x2, 'ro') plt.xlim(0, 1) plt.ylim(0, 1) ax = plt.gca() ax.set_aspect(1) plt.show() y = list(label) x = svm_data_format(x1, x2) prob = svm.svm_problem(y, x) # param = svm.svm_parameter('-t 0 -c 1') # param = svm.svm_parameter('-t 1 -d 2') param = svm.svm_parameter('-t 2') model = svm.svm_train(prob, param) svm.svm_save_model('test.model', model) # test # y0 = [-1] # x0 = [{1: 1, 2: 0}] # p_label, p_acc, p_val = svm.svm_predict(y0, x0, model) # print('p_label = ', p_label) # print('p_acc = ', p_acc) # print('p_val = ', p_val)
acc = numpy.sum(1.0 * (res == test_labels)) / len(test_labels) print 'Bayes Accuracy:', acc print_confusion(res, test_labels, classnames) # FIXME: Bayes accuracy gets very bad if the input dimensions aren't reduced # enough. Probably some float underflow due to things not using log # probabilities? # Test SVM. features = map(list, features) test_features = map(list, test_features) str_int_map = {} # libSVM needs int labels. for i, c in enumerate(classnames): str_int_map[c], str_int_map[i] = i, c def convert_labels(labels, str_int_map): return [str_int_map[l] for l in labels] problem = svmutil.svm_problem(convert_labels(labels, str_int_map), features) # Use a linear kernel, radial basis functions have horrible results (~20% acc) param = svmutil.svm_parameter('-q -t 0') model = svmutil.svm_train(problem, param) res = svmutil.svm_predict(convert_labels(test_labels, str_int_map), test_features, model)[0] res = convert_labels(res, str_int_map) acc = numpy.sum(1.0 * (res == test_labels)) / len(test_labels) print 'SVM Accuracy:', acc print_confusion(res, test_labels, classnames)
def __init__(self, kernel=None, classifier=None, probability=True, params=None, input_dim=None, output_dim=None, dtype=None): """Initializes an object of type 'LibSVMClassifier'. :param kernel: The kernel to use. See self.kernel or class' description for more info. :type kernel: str :param classifier: The type of the SVM to use. See self.classifiers or class' description for more info. :type classifier: str :param probability: Must be set to True, if probabilistic algorithms shall be used. :type probability: bool :param params: A dict of parameters to be passed to the svm_parameter. :type params: dict :param input_dim: The input dimensionality. :type input_dim: int :param output_dim: The output dimensionality. :type output_dim: int :param dtype: The datatype. :type dtype: numpy.dtype or str """ if not params: params = {} # initialise the parameter and be quiet self.parameter = libsvmutil.svm_parameter("-q") if probability: # allow for probability estimates self.parameter.probability = 1 super(LibSVMClassifier, self).__init__(input_dim=input_dim, output_dim=output_dim, dtype=dtype) if kernel: self.set_kernel(kernel) if classifier: self.set_classifier(classifier) # set all other parameters for k, v in params.items(): if not k in self.parameter._names: # check that the name is a valid parameter msg = "'{}' is not a valid parameter for libsvm".format(k) raise mdp.NodeException(msg) if hasattr(self.parameter, k): setattr(self.parameter, k, v) else: msg = "'svm_parameter' has no attribute {}".format(k) raise AttributeError(msg)
import os from libsvm import svmutil import ocr OCR_PATH = '/Users/thakis/Downloads/data/sudoku_images/ocr_data/' features, labels = ocr.load_ocr_data(os.path.join(OCR_PATH, 'training')) test_features, test_labels = \ ocr.load_ocr_data(os.path.join(OCR_PATH, 'testing')) features = map(list, features) test_features = map(list, test_features) problem = svmutil.svm_problem(labels, features) param = svmutil.svm_parameter('-q -t 0') model = svmutil.svm_train(problem, param) print 'Training data fit:' svmutil.svm_predict(labels, features, model) print 'Testing data fit:' svmutil.svm_predict(test_labels, test_features, model)
def train(y, x): train_len = int(1.0 * len(y)) prob = svmutil.svm_problem(y[:train_len], x[:train_len]) param = svmutil.svm_parameter('-t 2 -c 4 -b 1 -e 1e-12') m = svmutil.svm_train(prob, param) return m
eigenfaces = pca.components_.reshape((n_components, h, w)) print("Projecting the input data on the eigenfaces orthonormal basis") X_train_pca = pca.transform(X_train) X_test_pca = pca.transform(X_test) # ################################################################################ ## Train a SVM classification model # print("Fitting the classifier to the training set") param_grid = {'C': [1e3, 5e3, 1e4, 5e4, 1e5], 'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1], } prob = svm_problem(y_train,X_train_pca) param = svm_parameter("-q") param.kernel='rbf' #param_grid = {'C': [1e3, 5e3, 1e4, 5e4, 1e5], # 'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1], } param.C=32 param.gamma=0.0001 print("Pass") #parameters = GridSearchCV( param_grid) model= svm_train(prob,param) #clf = clf.fit(X_train_pca, y_train) #print("Best estimator found by grid search:") #print(m.best_estimator_) y_pred, pred_acc, pred_val = svm_predict(y_test,X_test_pca,model) ################################################################################