def self_training2(X, y, X_unLabeled, param, th): model = svmutil.svm_train(svmutil.svm_problem(x=X.tolist(), y=y.tolist()), param) obj = model.get_objective_value()[0] itr_num = 0 while True: predicted_labels = np.array(svmutil.svm_predict(x=X_unLabeled.tolist(), y=[1]*len(X_unLabeled), m=model, options="-q")[0]) model = svmutil.svm_train(svmutil.svm_problem(x=np.append(X, X_unLabeled, axis=0).tolist(), y=np.append(y, predicted_labels).tolist()), param) obj_new = model.get_objective_value()[0] itr_num += 1 if abs(obj_new - obj) < th: break else: obj = obj_new y_unlabeled = ma.array(data=np.array(svmutil.svm_predict(x=X_unLabeled.tolist(), y=[1]*len(X_unLabeled), m=model, options="-q")[0]), mask=[True]*len(X_unLabeled)) return model, y_unlabeled, obj_new, itr_num
def get_SVM_trained_classifer(self, training_datafile, classifier_dumpfile): # read all tweets and labels tweet_items = common.get_filtered_training_data(training_datafile) tweets = [] for (words, sentiment) in tweet_items: words_filtered = [ e.lower() for e in words.split() if (common.is_ascii(e)) ] tweets.append((words_filtered, sentiment)) results = helper.get_SVM_feature_vector_and_labels( self.feature_list, tweets) self.feature_vectors = results['feature_vector'] self.labels = results['labels'] problem = svm_problem(self.labels, self.feature_vectors) # '-q' option suppress console output param = svm_parameter('-q') param.kernel_type = LINEAR # param.show() classifier = svm_train(problem, param) svm_save_model(classifier_dumpfile, classifier) return classifier
def test_convert_nusvmc(self): iris = load_iris() X = iris.data[:, :2] y = iris.target y[y == 2] = 1 prob = svmutil.svm_problem(y, X.tolist()) param = svmutil.svm_parameter() param.svm_type = NuSVC param.kernel_type = svmutil.RBF param.eps = 1 param.probability = 1 if noprint: param.print_func = noprint libsvm_model = svmutil.svm_train(prob, param) node = convert(libsvm_model, "LibSvmNuSvmc", [('input', FloatTensorType(shape=['None', 'None']))]) self.assertTrue(node is not None) dump_data_and_model( X[:5].astype(numpy.float32), SkAPIClProba2(libsvm_model), node, basename="LibSvmNuSvmc-Dec2", allow_failure= "StrictVersion(onnxruntime.__version__) <= StrictVersion('0.1.3')")
def setUpClass(self): """ Set up the unit test by loading the dataset and training a model. """ if not _HAS_SKLEARN: return if not _HAS_LIBSVM: return scikit_data = load_boston() prob = svmutil.svm_problem( scikit_data["target"] > scikit_data["target"].mean(), scikit_data["data"].tolist(), ) param = svmutil.svm_parameter() param.svm_type = svmutil.C_SVC param.kernel_type = svmutil.LINEAR param.eps = 1 libsvm_model = svmutil.svm_train(prob, param) libsvm_spec = libsvm_converter.convert( libsvm_model, scikit_data.feature_names, "target" ).get_spec() # Save the data and the model self.scikit_data = scikit_data self.libsvm_spec = libsvm_spec
def test_multi_class_without_probability(self): # Generate some random data. # This unit test should not rely on scikit learn for test data. x, y = [], [] for _ in range(50): x.append( [random.gauss(200, 30), random.gauss(-100, 22), random.gauss(100, 42)] ) y.append(random.choice([1, 2, 10, 12])) y[0], y[1], y[2], y[3] = 1, 2, 10, 12 column_names = ["x1", "x2", "x3"] prob = svmutil.svm_problem(y, x) df = pd.DataFrame(x, columns=column_names) for param1 in self.non_kernel_parameters: for param2 in self.kernel_parameters: param_str = " ".join([self.base_param, param1, param2]) param = svmutil.svm_parameter(param_str) model = svm_train(prob, param) # Get predictions with probabilities as dictionaries (df["prediction"], _, _) = svm_predict(y, x, model, " -q") spec = libsvm.convert(model, column_names, "target") metrics = evaluate_classifier(spec, df, verbose=False) self.assertEqual(metrics["num_errors"], 0)
def setUpClass(self): """ Set up the unit test by loading the dataset and training a model. """ if not _HAS_LIBSVM: # setUpClass is still called even if class is skipped. return # Generate some random data. # This unit test should not rely on scikit learn for test data. self.x, self.y = [], [] random.seed(42) for _ in range(50): self.x.append([random.gauss(200, 30), random.gauss(-100, 22)]) self.y.append(random.choice([1, 2])) self.y[0] = 1 # Make sure 1 is always the first label it sees self.y[1] = 2 self.column_names = ["x1", "x2"] self.prob = svmutil.svm_problem(self.y, self.x) param = svmutil.svm_parameter() param.svm_type = svmutil.NU_SVC param.kernel_type = svmutil.LINEAR param.eps = 1 param.probability = 1 # Save the data and the model self.libsvm_model = svmutil.svm_train(self.prob, param) self.df = pd.DataFrame(self.x, columns=self.column_names)
def test_convert_svmc_raw(self): iris = load_iris() X = iris.data[:, :2] y = iris.target y[y == 2] = 1 prob = svmutil.svm_problem(y, X.tolist()) param = svmutil.svm_parameter() param.svm_type = SVC param.kernel_type = svmutil.RBF param.eps = 1 param.probability = 0 if noprint: param.print_func = noprint libsvm_model = svmutil.svm_train(prob, param) # known svm runtime dimension error in ONNX Runtime node = convert(libsvm_model, "LibSvmSvmcRaw", [('input', FloatTensorType(shape=['None', 'None']))]) self.assertTrue(node is not None) dump_data_and_model( X[:5].astype(numpy.float32), SkAPICl(libsvm_model), node, basename="LibSvmSvmcRaw", allow_failure= "StrictVersion(onnxruntime.__version__) < StrictVersion('0.5.0')")
def test_convert_svmc_linear_raw_multi(self): iris = load_iris() X = iris.data[:, :2] y = iris.target y[-5:] = 3 prob = svmutil.svm_problem(y, X.tolist()) param = svmutil.svm_parameter() param.svm_type = SVC param.kernel_type = svmutil.LINEAR param.eps = 1 param.probability = 0 if noprint: param.print_func = noprint libsvm_model = svmutil.svm_train(prob, param) node = convert(libsvm_model, "LibSvmNuSvmcMultiRaw", [('input', FloatTensorType(shape=['None', 2]))]) self.assertTrue(node is not None) X2 = numpy.vstack([X[:2], X[60:62], X[110:112], X[147:149]]) # 5x0, 5x1 dump_data_and_model( X2.astype(numpy.float32), SkAPICl(libsvm_model), node, basename="LibSvmSvmcRaw-Dec3", verbose=False, allow_failure= "StrictVersion(onnxruntime.__version__) <= StrictVersion('0.1.3')")
def test_convert_svmc(self): iris = load_iris() X = iris.data[:, :2] y = iris.target y[y == 2] = 1 prob = svmutil.svm_problem(y, X.tolist()) param = svmutil.svm_parameter() param.svm_type = SVC param.kernel_type = svmutil.RBF param.eps = 1 param.probability = 1 if noprint: param.print_func = noprint libsvm_model = svmutil.svm_train(prob, param) node = convert(libsvm_model, "LibSvmSvmc", [('input', FloatTensorType())]) self.assertTrue(node is not None) dump_data_and_model(X[:5].astype(numpy.float32), SkAPIClProba2(libsvm_model), node, basename="LibSvmSvmc-Dec2")
def generate_svm_model(rows, param, kmers=[1,2,3]): y, x = libsvm_generate_matrix(rows, kmers, dense=True) prob = svmutil.svm_problem(y, x) # s = 3 -- epsilon-SVR param_str = "-s 3 -b 1 -q " # epsilon-SVR, prob estimate true, quiet mode param_str += " ".join(["-{} {}".format(k,v) for k,v in param.items()]) params = svmutil.svm_parameter(param_str) model = svmutil.svm_train(prob, params) return model
def _stop_training(self): super(LibSVMClassifier, self)._stop_training() self.normalizer = _LabelNormalizer(self.labels) labels = self.normalizer.normalize(self.labels.tolist()) features = self.data # Call svm training method. prob = libsvmutil.svm_problem(labels, features.tolist()) # Train self.model = libsvmutil.svm_train(prob, self.parameter)
def train(self, input_data_path, params="-t 0 -c 4 -b 1", is_eval=True): with commons.PhaseLogger("LIBSVM.train.read_problem"): Y, X = svmutil.svm_read_problem(input_data_path + "/Train.txt") prob = svmutil.svm_problem(Y, X) #Y, X = svmutil.svm_read_problem(input_data_path + "\\Train.txt") self._params = svmutil.svm_parameter(params) with commons.PhaseLogger("LIBSVM.train.svm_train"): self._model = svmutil.svm_train(prob, self._params) self._init = True if is_eval is True: p_labels, p_acc, p_vals = svmutil.svm_predict(Y, X, self._model) acc, mse, _ = p_acc logging.info("[%s]: train with Acc[%.4f] Mse[%.4f]" % (self._get_class_name(), acc, mse))
def setUpClass(self): """ Set up the unit test by loading the dataset and training a model. """ if not _HAS_SKLEARN: return if not _HAS_LIBSVM: return scikit_data = load_boston() prob = svmutil.svm_problem(scikit_data["target"], scikit_data["data"].tolist()) param = svmutil.svm_parameter() param.svm_type = svmutil.NU_SVR param.kernel_type = svmutil.LINEAR param.eps = 1 self.libsvm_model = svmutil.svm_train(prob, param)
def predict(tr_data_arr, tr_label_arr, pred_data_arr, pred_label_arr): data_arr = data_format(tr_data_arr) prob = svm_problem(tr_label_arr, data_arr) # 以下参数c和g通过交叉验证得到 param = svm_parameter('-c 2048.0 -g 0.001953125') svm_model = svm_train(prob, param) pred_data_arr = data_format(pred_data_arr) pred_data_len = len(pred_label_arr) wrong = 0 for idx, data in enumerate(pred_data_arr): p_label, p_acc, p_val = svm_predict( [pred_label_arr[idx]], [data], svm_model) if int(p_label[0]) != int(pred_label_arr[idx]): wrong += 1 accuracy = (pred_data_len - wrong) * 100.0 / pred_data_len return pred_data_len, wrong, accuracy
def process(training_file, test_file, check, draw): # Load training data. with open(training_file) as f: class_1 = pickle.load(f) class_2 = pickle.load(f) labels = pickle.load(f) # Convert data to lists for libsvm. class_1 = map(list, class_1) class_2 = map(list, class_2) labels = list(labels) samples = class_1 + class_2 problem = svmutil.svm_problem(labels, samples) # Don't print to stdout, use radial basis functions. param = svmutil.svm_parameter('-q -t 2') model = svmutil.svm_train(problem, param) # Load test data. with open(test_file) as f: class_1 = pickle.load(f) class_2 = pickle.load(f) labels = pickle.load(f) class_1 = map(list, class_1) class_2 = map(list, class_2) labels = list(labels) if check: # Sadly, this prints to stdout too :-/ svmutil.svm_predict(labels, class_1 + class_2, model) # Prints accuracy. if draw: def classify(x, y, model=model): return array( svmutil.svm_predict([0] * len(x), map(list, zip(x, y)), model)[0]) imtools.plot_2d_boundary( [-6, 6, -6, 6], [array(class_1), array(class_2)], classify, [1, -1]) show()
def process(training_file, test_file, check, draw): # Load training data. with open(training_file) as f: class_1 = pickle.load(f) class_2 = pickle.load(f) labels = pickle.load(f) # Convert data to lists for libsvm. class_1 = map(list, class_1) class_2 = map(list, class_2) labels = list(labels) samples = class_1 + class_2 problem = svmutil.svm_problem(labels, samples) # Don't print to stdout, use radial basis functions. param = svmutil.svm_parameter('-q -t 2') model = svmutil.svm_train(problem, param) # Load test data. with open(test_file) as f: class_1 = pickle.load(f) class_2 = pickle.load(f) labels = pickle.load(f) class_1 = map(list, class_1) class_2 = map(list, class_2) labels = list(labels) if check: # Sadly, this prints to stdout too :-/ svmutil.svm_predict(labels, class_1 + class_2, model) # Prints accuracy. if draw: def classify(x, y, model=model): return array(svmutil.svm_predict([0] * len(x), map(list, zip(x, y)), model)[0]) imtools.plot_2d_boundary( [-6, 6, -6, 6], [array(class_1), array(class_2)], classify, [1, -1]) show()
def test_convert_svmr_linear(self): iris = load_iris() X = iris.data[:, :2] y = iris.target prob = svmutil.svm_problem(y, X.tolist()) param = svmutil.svm_parameter() param.svm_type = SVR param.kernel_type = svmutil.LINEAR param.eps = 1 if noprint: param.print_func = noprint libsvm_model = svmutil.svm_train(prob, param) node = convert(libsvm_model, "LibSvmSvmrLinear", [('input', FloatTensorType())]) self.assertTrue(node is not None) dump_data_and_model(X[:5].astype(numpy.float32), SkAPIReg(libsvm_model), node, basename="LibSvmSvmrLinear-Dec3")
def run_kfold(param_dict, rows, numfold, kmers=[1,2,3]): """ Run k KFold Args: param_dict: dictionary mapping param string to its value rows: input rows numfold: k for cross validation kmers: list of kmers, default [1,2,3] Return: dictionary of model performance (SCC, MSE) if benchmark is True, else return predictions for each fold """ kf = KFold(numfold, shuffle=True) splitted = kf.split(rows) param_str = "-s 3 -b 1 -q " # epsilon-SVR, prob estimate true, quiet mode param_str += " ".join(["-{} {}".format(k,v) for k,v in param_dict.items()]) params = svmutil.svm_parameter(param_str) foldidx = 1 fold_results = [] for train_idx, test_idx in splitted: train_list = [rows[i] for i in train_idx] test_list = [rows[i] for i in test_idx] y_train, x_train = libsvm_generate_matrix(train_list, kmers) y_test, x_test = libsvm_generate_matrix(test_list, kmers) train_prob = svmutil.svm_problem(y_train, x_train) model = svmutil.svm_train(train_prob, params) #svmutil.svm_save_model('model_name.model', m) # y is only needed when we need the model performance svmpred = svmutil.svm_predict(y_test, x_test, model, options="-q") fold_results.append({"test":test_list, "svmpred":svmpred}) return fold_results
import os from libsvm import svmutil import ocr OCR_PATH = '/Users/thakis/Downloads/data/sudoku_images/ocr_data/' features, labels = ocr.load_ocr_data(os.path.join(OCR_PATH, 'training')) test_features, test_labels = \ ocr.load_ocr_data(os.path.join(OCR_PATH, 'testing')) features = map(list, features) test_features = map(list, test_features) problem = svmutil.svm_problem(labels, features) param = svmutil.svm_parameter('-q -t 0') model = svmutil.svm_train(problem, param) print 'Training data fit:' svmutil.svm_predict(labels, features, model) print 'Testing data fit:' svmutil.svm_predict(test_labels, test_features, model)
tic.k('start') SUDOKU_PATH = '/Users/thakis/Downloads/data/sudoku_images/sudokus/' imname = os.path.join(SUDOKU_PATH, 'sudoku18.jpg') vername = os.path.join(SUDOKU_PATH, 'sudoku18.sud') im = numpy.array(Image.open(imname).convert('L')) x = sudoku.find_sudoku_edges(im, axis=0) y = sudoku.find_sudoku_edges(im, axis=1) tic.k('found edges') # Extract cells, run OCR. OCR_PATH = '/Users/thakis/Downloads/data/sudoku_images/ocr_data/' features, labels = ocr.load_ocr_data(os.path.join(OCR_PATH, 'training')) problem = svmutil.svm_problem(labels, map(list, features)) param = svmutil.svm_parameter('-q -t 0') model = svmutil.svm_train(problem, param) tic.k('built OCR model') crops = [] for col in range(9): for row in range(9): crop = im[y[col]:y[col + 1], x[row]:x[row + 1]] crops.append(ocr.compute_feature(crop)) tic.k('extracted cells') res = svmutil.svm_predict(numpy.loadtxt(vername), map(list, crops), model)[0] tic.k('recognized cells') res = numpy.array(res).reshape(9, 9)
eigenfaces = pca.components_.reshape((n_components, h, w)) print("Projecting the input data on the eigenfaces orthonormal basis") X_train_pca = pca.transform(X_train) X_test_pca = pca.transform(X_test) # ################################################################################ ## Train a SVM classification model # print("Fitting the classifier to the training set") param_grid = {'C': [1e3, 5e3, 1e4, 5e4, 1e5], 'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1], } prob = svm_problem(y_train,X_train_pca) param = svm_parameter("-q") param.kernel='rbf' #param_grid = {'C': [1e3, 5e3, 1e4, 5e4, 1e5], # 'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1], } param.C=32 param.gamma=0.0001 print("Pass") #parameters = GridSearchCV( param_grid) model= svm_train(prob,param) #clf = clf.fit(X_train_pca, y_train) #print("Best estimator found by grid search:") #print(m.best_estimator_) y_pred, pred_acc, pred_val = svm_predict(y_test,X_test_pca,model)
res = bc.classify(test_features)[0] acc = numpy.sum(1.0 * (res == test_labels)) / len(test_labels) print 'Bayes Accuracy:', acc print_confusion(res, test_labels, classnames) # FIXME: Bayes accuracy gets very bad if the input dimensions aren't reduced # enough. Probably some float underflow due to things not using log # probabilities? # Test SVM. features = map(list, features) test_features = map(list, test_features) str_int_map = {} # libSVM needs int labels. for i, c in enumerate(classnames): str_int_map[c], str_int_map[i] = i, c def convert_labels(labels, str_int_map): return [str_int_map[l] for l in labels] problem = svmutil.svm_problem(convert_labels(labels, str_int_map), features) # Use a linear kernel, radial basis functions have horrible results (~20% acc) param = svmutil.svm_parameter('-q -t 0') model = svmutil.svm_train(problem, param) res = svmutil.svm_predict( convert_labels(test_labels, str_int_map), test_features, model)[0] res = convert_labels(res, str_int_map) acc = numpy.sum(1.0 * (res == test_labels)) / len(test_labels) print 'SVM Accuracy:', acc print_confusion(res, test_labels, classnames)
acc = numpy.sum(1.0 * (res == test_labels)) / len(test_labels) print 'Bayes Accuracy:', acc print_confusion(res, test_labels, classnames) # FIXME: Bayes accuracy gets very bad if the input dimensions aren't reduced # enough. Probably some float underflow due to things not using log # probabilities? # Test SVM. features = map(list, features) test_features = map(list, test_features) str_int_map = {} # libSVM needs int labels. for i, c in enumerate(classnames): str_int_map[c], str_int_map[i] = i, c def convert_labels(labels, str_int_map): return [str_int_map[l] for l in labels] problem = svmutil.svm_problem(convert_labels(labels, str_int_map), features) # Use a linear kernel, radial basis functions have horrible results (~20% acc) param = svmutil.svm_parameter('-q -t 0') model = svmutil.svm_train(problem, param) res = svmutil.svm_predict(convert_labels(test_labels, str_int_map), test_features, model)[0] res = convert_labels(res, str_int_map) acc = numpy.sum(1.0 * (res == test_labels)) / len(test_labels) print 'SVM Accuracy:', acc print_confusion(res, test_labels, classnames)
def train(y, x): train_len = int(1.0 * len(y)) prob = svmutil.svm_problem(y[:train_len], x[:train_len]) param = svmutil.svm_parameter('-t 2 -c 4 -b 1 -e 1e-12') m = svmutil.svm_train(prob, param) return m
neg_loc = np.where(label < 0) pos_x1 = x1[pos_loc] pos_x2 = x2[pos_loc] neg_x1 = x1[neg_loc] neg_x2 = x2[neg_loc] plt.plot(pos_x1, pos_x2, 'b^') plt.plot(neg_x1, neg_x2, 'ro') plt.xlim(0, 1) plt.ylim(0, 1) ax = plt.gca() ax.set_aspect(1) plt.show() y = list(label) x = svm_data_format(x1, x2) prob = svm.svm_problem(y, x) # param = svm.svm_parameter('-t 0 -c 1') # param = svm.svm_parameter('-t 1 -d 2') param = svm.svm_parameter('-t 2') model = svm.svm_train(prob, param) svm.svm_save_model('test.model', model) # test # y0 = [-1] # x0 = [{1: 1, 2: 0}] # p_label, p_acc, p_val = svm.svm_predict(y0, x0, model) # print('p_label = ', p_label) # print('p_acc = ', p_acc) # print('p_val = ', p_val)