예제 #1
0
def self_training2(X, y, X_unLabeled, param, th):
    model = svmutil.svm_train(svmutil.svm_problem(x=X.tolist(), y=y.tolist()), param)
    obj = model.get_objective_value()[0]
    itr_num = 0

    while True:
        predicted_labels = np.array(svmutil.svm_predict(x=X_unLabeled.tolist(),
                                                        y=[1]*len(X_unLabeled),
                                                        m=model,
                                                        options="-q")[0])
        model = svmutil.svm_train(svmutil.svm_problem(x=np.append(X, X_unLabeled, axis=0).tolist(),
                                                      y=np.append(y, predicted_labels).tolist()), param)
        obj_new = model.get_objective_value()[0]
        itr_num += 1

        if abs(obj_new - obj) < th:
            break
        else:
            obj = obj_new

    y_unlabeled = ma.array(data=np.array(svmutil.svm_predict(x=X_unLabeled.tolist(),
                                                             y=[1]*len(X_unLabeled),
                                                             m=model,
                                                             options="-q")[0]),
                           mask=[True]*len(X_unLabeled))

    return model, y_unlabeled, obj_new, itr_num
예제 #2
0
    def get_SVM_trained_classifer(self, training_datafile,
                                  classifier_dumpfile):
        # read all tweets and labels
        tweet_items = common.get_filtered_training_data(training_datafile)

        tweets = []
        for (words, sentiment) in tweet_items:
            words_filtered = [
                e.lower() for e in words.split() if (common.is_ascii(e))
            ]
            tweets.append((words_filtered, sentiment))

        results = helper.get_SVM_feature_vector_and_labels(
            self.feature_list, tweets)
        self.feature_vectors = results['feature_vector']
        self.labels = results['labels']

        problem = svm_problem(self.labels, self.feature_vectors)
        # '-q' option suppress console output
        param = svm_parameter('-q')
        param.kernel_type = LINEAR
        # param.show()
        classifier = svm_train(problem, param)
        svm_save_model(classifier_dumpfile, classifier)
        return classifier
예제 #3
0
    def test_convert_nusvmc(self):
        iris = load_iris()

        X = iris.data[:, :2]
        y = iris.target
        y[y == 2] = 1

        prob = svmutil.svm_problem(y, X.tolist())

        param = svmutil.svm_parameter()
        param.svm_type = NuSVC
        param.kernel_type = svmutil.RBF
        param.eps = 1
        param.probability = 1
        if noprint:
            param.print_func = noprint

        libsvm_model = svmutil.svm_train(prob, param)

        node = convert(libsvm_model, "LibSvmNuSvmc",
                       [('input', FloatTensorType(shape=['None', 'None']))])
        self.assertTrue(node is not None)
        dump_data_and_model(
            X[:5].astype(numpy.float32),
            SkAPIClProba2(libsvm_model),
            node,
            basename="LibSvmNuSvmc-Dec2",
            allow_failure=
            "StrictVersion(onnxruntime.__version__) <= StrictVersion('0.1.3')")
예제 #4
0
    def setUpClass(self):
        """
        Set up the unit test by loading the dataset and training a model.
        """
        if not _HAS_SKLEARN:
            return
        if not _HAS_LIBSVM:
            return

        scikit_data = load_boston()
        prob = svmutil.svm_problem(
            scikit_data["target"] > scikit_data["target"].mean(),
            scikit_data["data"].tolist(),
        )
        param = svmutil.svm_parameter()
        param.svm_type = svmutil.C_SVC
        param.kernel_type = svmutil.LINEAR
        param.eps = 1

        libsvm_model = svmutil.svm_train(prob, param)
        libsvm_spec = libsvm_converter.convert(
            libsvm_model, scikit_data.feature_names, "target"
        ).get_spec()

        # Save the data and the model
        self.scikit_data = scikit_data
        self.libsvm_spec = libsvm_spec
예제 #5
0
    def test_multi_class_without_probability(self):
        # Generate some random data.
        # This unit test should not rely on scikit learn for test data.
        x, y = [], []
        for _ in range(50):
            x.append(
                [random.gauss(200, 30), random.gauss(-100, 22), random.gauss(100, 42)]
            )
            y.append(random.choice([1, 2, 10, 12]))
        y[0], y[1], y[2], y[3] = 1, 2, 10, 12
        column_names = ["x1", "x2", "x3"]
        prob = svmutil.svm_problem(y, x)

        df = pd.DataFrame(x, columns=column_names)

        for param1 in self.non_kernel_parameters:
            for param2 in self.kernel_parameters:
                param_str = " ".join([self.base_param, param1, param2])
                param = svmutil.svm_parameter(param_str)

                model = svm_train(prob, param)

                # Get predictions with probabilities as dictionaries
                (df["prediction"], _, _) = svm_predict(y, x, model, " -q")

                spec = libsvm.convert(model, column_names, "target")

                metrics = evaluate_classifier(spec, df, verbose=False)
                self.assertEqual(metrics["num_errors"], 0)
예제 #6
0
    def setUpClass(self):
        """
        Set up the unit test by loading the dataset and training a model.
        """
        if not _HAS_LIBSVM:
            # setUpClass is still called even if class is skipped.
            return

        # Generate some random data.
        # This unit test should not rely on scikit learn for test data.
        self.x, self.y = [], []
        random.seed(42)
        for _ in range(50):
            self.x.append([random.gauss(200, 30), random.gauss(-100, 22)])
            self.y.append(random.choice([1, 2]))
        self.y[0] = 1  # Make sure 1 is always the first label it sees
        self.y[1] = 2
        self.column_names = ["x1", "x2"]
        self.prob = svmutil.svm_problem(self.y, self.x)

        param = svmutil.svm_parameter()
        param.svm_type = svmutil.NU_SVC
        param.kernel_type = svmutil.LINEAR
        param.eps = 1
        param.probability = 1

        # Save the data and the model
        self.libsvm_model = svmutil.svm_train(self.prob, param)

        self.df = pd.DataFrame(self.x, columns=self.column_names)
예제 #7
0
    def test_convert_svmc_raw(self):
        iris = load_iris()

        X = iris.data[:, :2]
        y = iris.target
        y[y == 2] = 1

        prob = svmutil.svm_problem(y, X.tolist())

        param = svmutil.svm_parameter()
        param.svm_type = SVC
        param.kernel_type = svmutil.RBF
        param.eps = 1
        param.probability = 0
        if noprint:
            param.print_func = noprint

        libsvm_model = svmutil.svm_train(prob, param)

        # known svm runtime dimension error in ONNX Runtime
        node = convert(libsvm_model, "LibSvmSvmcRaw",
                       [('input', FloatTensorType(shape=['None', 'None']))])
        self.assertTrue(node is not None)
        dump_data_and_model(
            X[:5].astype(numpy.float32),
            SkAPICl(libsvm_model),
            node,
            basename="LibSvmSvmcRaw",
            allow_failure=
            "StrictVersion(onnxruntime.__version__) < StrictVersion('0.5.0')")
예제 #8
0
    def test_convert_svmc_linear_raw_multi(self):
        iris = load_iris()

        X = iris.data[:, :2]
        y = iris.target
        y[-5:] = 3

        prob = svmutil.svm_problem(y, X.tolist())

        param = svmutil.svm_parameter()
        param.svm_type = SVC
        param.kernel_type = svmutil.LINEAR
        param.eps = 1
        param.probability = 0
        if noprint:
            param.print_func = noprint

        libsvm_model = svmutil.svm_train(prob, param)

        node = convert(libsvm_model, "LibSvmNuSvmcMultiRaw",
                       [('input', FloatTensorType(shape=['None', 2]))])
        self.assertTrue(node is not None)
        X2 = numpy.vstack([X[:2], X[60:62], X[110:112],
                           X[147:149]])  # 5x0, 5x1
        dump_data_and_model(
            X2.astype(numpy.float32),
            SkAPICl(libsvm_model),
            node,
            basename="LibSvmSvmcRaw-Dec3",
            verbose=False,
            allow_failure=
            "StrictVersion(onnxruntime.__version__) <= StrictVersion('0.1.3')")
예제 #9
0
    def test_convert_svmc(self):
        iris = load_iris()

        X = iris.data[:, :2]
        y = iris.target
        y[y == 2] = 1

        prob = svmutil.svm_problem(y, X.tolist())

        param = svmutil.svm_parameter()
        param.svm_type = SVC
        param.kernel_type = svmutil.RBF
        param.eps = 1
        param.probability = 1
        if noprint:
            param.print_func = noprint

        libsvm_model = svmutil.svm_train(prob, param)

        node = convert(libsvm_model, "LibSvmSvmc",
                       [('input', FloatTensorType())])
        self.assertTrue(node is not None)
        dump_data_and_model(X[:5].astype(numpy.float32),
                            SkAPIClProba2(libsvm_model),
                            node,
                            basename="LibSvmSvmc-Dec2")
예제 #10
0
def generate_svm_model(rows, param, kmers=[1,2,3]):
    y, x = libsvm_generate_matrix(rows, kmers, dense=True)
    prob  = svmutil.svm_problem(y, x)
    # s = 3 -- epsilon-SVR
    param_str = "-s 3 -b 1 -q " # epsilon-SVR, prob estimate true, quiet mode
    param_str += " ".join(["-{} {}".format(k,v) for k,v in param.items()])
    params = svmutil.svm_parameter(param_str)
    model = svmutil.svm_train(prob, params)
    return model
예제 #11
0
    def _stop_training(self):
        super(LibSVMClassifier, self)._stop_training()
        self.normalizer = _LabelNormalizer(self.labels)

        labels = self.normalizer.normalize(self.labels.tolist())
        features = self.data

        # Call svm training method.
        prob = libsvmutil.svm_problem(labels, features.tolist())
        # Train
        self.model = libsvmutil.svm_train(prob, self.parameter)
예제 #12
0
 def train(self, input_data_path, params="-t 0 -c 4 -b 1", is_eval=True):
     with commons.PhaseLogger("LIBSVM.train.read_problem"):
         Y, X = svmutil.svm_read_problem(input_data_path + "/Train.txt")
         prob = svmutil.svm_problem(Y, X)
         #Y, X = svmutil.svm_read_problem(input_data_path + "\\Train.txt")
     self._params = svmutil.svm_parameter(params)
     with commons.PhaseLogger("LIBSVM.train.svm_train"):
         self._model = svmutil.svm_train(prob, self._params)
     self._init = True
     if is_eval is True:
         p_labels, p_acc, p_vals = svmutil.svm_predict(Y, X, self._model)
         acc, mse, _ = p_acc
         logging.info("[%s]: train with Acc[%.4f] Mse[%.4f]" %
                      (self._get_class_name(), acc, mse))
예제 #13
0
    def setUpClass(self):
        """
        Set up the unit test by loading the dataset and training a model.
        """
        if not _HAS_SKLEARN:
            return
        if not _HAS_LIBSVM:
            return

        scikit_data = load_boston()
        prob = svmutil.svm_problem(scikit_data["target"],
                                   scikit_data["data"].tolist())
        param = svmutil.svm_parameter()
        param.svm_type = svmutil.NU_SVR
        param.kernel_type = svmutil.LINEAR
        param.eps = 1

        self.libsvm_model = svmutil.svm_train(prob, param)
예제 #14
0
def predict(tr_data_arr, tr_label_arr, pred_data_arr, pred_label_arr):
    data_arr = data_format(tr_data_arr)
    prob = svm_problem(tr_label_arr, data_arr)
    # 以下参数c和g通过交叉验证得到
    param = svm_parameter('-c 2048.0 -g 0.001953125')
    svm_model = svm_train(prob, param)

    pred_data_arr = data_format(pred_data_arr)
    pred_data_len = len(pred_label_arr)
    wrong = 0
    for idx, data in enumerate(pred_data_arr):
        p_label, p_acc, p_val = svm_predict(
            [pred_label_arr[idx]], [data], svm_model)
        if int(p_label[0]) != int(pred_label_arr[idx]):
            wrong += 1

    accuracy = (pred_data_len - wrong) * 100.0 / pred_data_len
    return pred_data_len, wrong, accuracy
예제 #15
0
def process(training_file, test_file, check, draw):
    # Load training data.
    with open(training_file) as f:
        class_1 = pickle.load(f)
        class_2 = pickle.load(f)
        labels = pickle.load(f)

    # Convert data to lists for libsvm.
    class_1 = map(list, class_1)
    class_2 = map(list, class_2)
    labels = list(labels)
    samples = class_1 + class_2
    problem = svmutil.svm_problem(labels, samples)
    # Don't print to stdout, use radial basis functions.
    param = svmutil.svm_parameter('-q -t 2')
    model = svmutil.svm_train(problem, param)

    # Load test data.
    with open(test_file) as f:
        class_1 = pickle.load(f)
        class_2 = pickle.load(f)
        labels = pickle.load(f)

    class_1 = map(list, class_1)
    class_2 = map(list, class_2)
    labels = list(labels)

    if check:
        # Sadly, this prints to stdout too :-/
        svmutil.svm_predict(labels, class_1 + class_2,
                            model)  # Prints accuracy.

    if draw:

        def classify(x, y, model=model):
            return array(
                svmutil.svm_predict([0] * len(x), map(list, zip(x, y)),
                                    model)[0])

        imtools.plot_2d_boundary(
            [-6, 6, -6, 6], [array(class_1), array(class_2)], classify,
            [1, -1])
        show()
예제 #16
0
def process(training_file, test_file, check, draw):
  # Load training data.
  with open(training_file) as f:
    class_1 = pickle.load(f)
    class_2 = pickle.load(f)
    labels = pickle.load(f)

  # Convert data to lists for libsvm.
  class_1 = map(list, class_1)
  class_2 = map(list, class_2)
  labels = list(labels)
  samples = class_1 + class_2
  problem = svmutil.svm_problem(labels, samples)
  # Don't print to stdout, use radial basis functions.
  param = svmutil.svm_parameter('-q -t 2')
  model = svmutil.svm_train(problem, param)

  # Load test data.
  with open(test_file) as f:
    class_1 = pickle.load(f)
    class_2 = pickle.load(f)
    labels = pickle.load(f)

  class_1 = map(list, class_1)
  class_2 = map(list, class_2)
  labels = list(labels)

  if check:
    # Sadly, this prints to stdout too :-/
    svmutil.svm_predict(labels, class_1 + class_2, model)  # Prints accuracy.

  if draw:
    def classify(x, y, model=model):
      return array(svmutil.svm_predict([0] * len(x), map(list, zip(x, y)),
                                       model)[0])
    imtools.plot_2d_boundary(
        [-6, 6, -6, 6], [array(class_1), array(class_2)], classify, [1, -1])
    show()
예제 #17
0
    def test_convert_svmr_linear(self):
        iris = load_iris()

        X = iris.data[:, :2]
        y = iris.target
        prob = svmutil.svm_problem(y, X.tolist())

        param = svmutil.svm_parameter()
        param.svm_type = SVR
        param.kernel_type = svmutil.LINEAR
        param.eps = 1
        if noprint:
            param.print_func = noprint

        libsvm_model = svmutil.svm_train(prob, param)

        node = convert(libsvm_model, "LibSvmSvmrLinear",
                       [('input', FloatTensorType())])
        self.assertTrue(node is not None)
        dump_data_and_model(X[:5].astype(numpy.float32),
                            SkAPIReg(libsvm_model),
                            node,
                            basename="LibSvmSvmrLinear-Dec3")
예제 #18
0
def run_kfold(param_dict, rows, numfold, kmers=[1,2,3]):
    """
    Run k KFold

    Args:
        param_dict: dictionary mapping param string to its value
        rows: input rows
        numfold: k for cross validation
        kmers: list of kmers, default [1,2,3]
    Return:
        dictionary of model performance (SCC, MSE) if benchmark is True, else
        return predictions for each fold
    """
    kf = KFold(numfold, shuffle=True)
    splitted = kf.split(rows)
    param_str = "-s 3 -b 1 -q " # epsilon-SVR, prob estimate true, quiet mode
    param_str += " ".join(["-{} {}".format(k,v) for k,v in param_dict.items()])
    params = svmutil.svm_parameter(param_str)

    foldidx = 1
    fold_results = []
    for train_idx, test_idx in splitted:
        train_list = [rows[i] for i in train_idx]
        test_list = [rows[i] for i in test_idx]

        y_train, x_train = libsvm_generate_matrix(train_list, kmers)
        y_test, x_test = libsvm_generate_matrix(test_list, kmers)

        train_prob  = svmutil.svm_problem(y_train, x_train)

        model = svmutil.svm_train(train_prob, params)
        #svmutil.svm_save_model('model_name.model', m)
        # y is only needed when we need the model performance
        svmpred = svmutil.svm_predict(y_test, x_test, model, options="-q")
        fold_results.append({"test":test_list, "svmpred":svmpred})
    return fold_results
예제 #19
0
import os

from libsvm import svmutil

import ocr

OCR_PATH = '/Users/thakis/Downloads/data/sudoku_images/ocr_data/'
features, labels = ocr.load_ocr_data(os.path.join(OCR_PATH, 'training'))
test_features, test_labels = \
    ocr.load_ocr_data(os.path.join(OCR_PATH, 'testing'))

features = map(list, features)
test_features = map(list, test_features)


problem = svmutil.svm_problem(labels, features)
param = svmutil.svm_parameter('-q -t 0')
model = svmutil.svm_train(problem, param)

print 'Training data fit:'
svmutil.svm_predict(labels, features, model)
print 'Testing data fit:'
svmutil.svm_predict(test_labels, test_features, model)
예제 #20
0
tic.k('start')
SUDOKU_PATH = '/Users/thakis/Downloads/data/sudoku_images/sudokus/'
imname = os.path.join(SUDOKU_PATH, 'sudoku18.jpg')
vername = os.path.join(SUDOKU_PATH, 'sudoku18.sud')

im = numpy.array(Image.open(imname).convert('L'))

x = sudoku.find_sudoku_edges(im, axis=0)
y = sudoku.find_sudoku_edges(im, axis=1)
tic.k('found edges')


# Extract cells, run OCR.
OCR_PATH = '/Users/thakis/Downloads/data/sudoku_images/ocr_data/'
features, labels = ocr.load_ocr_data(os.path.join(OCR_PATH, 'training'))
problem = svmutil.svm_problem(labels, map(list, features))
param = svmutil.svm_parameter('-q -t 0')
model = svmutil.svm_train(problem, param)
tic.k('built OCR model')

crops = []
for col in range(9):
  for row in range(9):
    crop = im[y[col]:y[col + 1], x[row]:x[row + 1]]
    crops.append(ocr.compute_feature(crop))
tic.k('extracted cells')

res = svmutil.svm_predict(numpy.loadtxt(vername), map(list, crops), model)[0]
tic.k('recognized cells')

res = numpy.array(res).reshape(9, 9)
eigenfaces = pca.components_.reshape((n_components, h, w))

print("Projecting the input data on the eigenfaces orthonormal basis")

X_train_pca = pca.transform(X_train)
X_test_pca = pca.transform(X_test)

#
################################################################################
## Train a SVM classification model
#
print("Fitting the classifier to the training set")
param_grid = {'C': [1e3, 5e3, 1e4, 5e4, 1e5],
              'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1], }
prob = svm_problem(y_train,X_train_pca)
param = svm_parameter("-q")
param.kernel='rbf'
#param_grid = {'C': [1e3, 5e3, 1e4, 5e4, 1e5],
 #             'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1], }
param.C=32
param.gamma=0.0001
print("Pass")
#parameters = GridSearchCV( param_grid)
model= svm_train(prob,param)
#clf = clf.fit(X_train_pca, y_train)

#print("Best estimator found by grid search:")
#print(m.best_estimator_)
y_pred, pred_acc, pred_val = svm_predict(y_test,X_test_pca,model)
res = bc.classify(test_features)[0]
acc = numpy.sum(1.0 * (res == test_labels)) / len(test_labels)
print 'Bayes Accuracy:', acc
print_confusion(res, test_labels, classnames)
# FIXME: Bayes accuracy gets very bad if the input dimensions aren't reduced
# enough. Probably some float underflow due to things not using log
# probabilities?


# Test SVM.
features = map(list, features)
test_features = map(list, test_features)

str_int_map = {}  # libSVM needs int labels.
for i, c in enumerate(classnames):
  str_int_map[c], str_int_map[i] = i, c

def convert_labels(labels, str_int_map):
  return [str_int_map[l] for l in labels]

problem = svmutil.svm_problem(convert_labels(labels, str_int_map), features)
# Use a linear kernel, radial basis functions have horrible results (~20% acc)
param = svmutil.svm_parameter('-q -t 0')
model = svmutil.svm_train(problem, param)
res = svmutil.svm_predict(
    convert_labels(test_labels, str_int_map), test_features, model)[0]
res = convert_labels(res, str_int_map)
acc = numpy.sum(1.0 * (res == test_labels)) / len(test_labels)
print 'SVM Accuracy:', acc
print_confusion(res, test_labels, classnames)
예제 #23
0
acc = numpy.sum(1.0 * (res == test_labels)) / len(test_labels)
print 'Bayes Accuracy:', acc
print_confusion(res, test_labels, classnames)
# FIXME: Bayes accuracy gets very bad if the input dimensions aren't reduced
# enough. Probably some float underflow due to things not using log
# probabilities?

# Test SVM.
features = map(list, features)
test_features = map(list, test_features)

str_int_map = {}  # libSVM needs int labels.
for i, c in enumerate(classnames):
    str_int_map[c], str_int_map[i] = i, c


def convert_labels(labels, str_int_map):
    return [str_int_map[l] for l in labels]


problem = svmutil.svm_problem(convert_labels(labels, str_int_map), features)
# Use a linear kernel, radial basis functions have horrible results (~20% acc)
param = svmutil.svm_parameter('-q -t 0')
model = svmutil.svm_train(problem, param)
res = svmutil.svm_predict(convert_labels(test_labels, str_int_map),
                          test_features, model)[0]
res = convert_labels(res, str_int_map)
acc = numpy.sum(1.0 * (res == test_labels)) / len(test_labels)
print 'SVM Accuracy:', acc
print_confusion(res, test_labels, classnames)
예제 #24
0
def train(y, x):
    train_len = int(1.0 * len(y))
    prob = svmutil.svm_problem(y[:train_len], x[:train_len])
    param = svmutil.svm_parameter('-t 2 -c 4 -b 1 -e 1e-12')
    m = svmutil.svm_train(prob, param)
    return m
예제 #25
0
    neg_loc = np.where(label < 0)
    pos_x1 = x1[pos_loc]
    pos_x2 = x2[pos_loc]
    neg_x1 = x1[neg_loc]
    neg_x2 = x2[neg_loc]

    plt.plot(pos_x1, pos_x2, 'b^')
    plt.plot(neg_x1, neg_x2, 'ro')
    plt.xlim(0, 1)
    plt.ylim(0, 1)
    ax = plt.gca()
    ax.set_aspect(1)
    plt.show()

    y = list(label)
    x = svm_data_format(x1, x2)
    prob = svm.svm_problem(y, x)
    #  param = svm.svm_parameter('-t 0 -c 1')
    #  param = svm.svm_parameter('-t 1 -d 2')
    param = svm.svm_parameter('-t 2')
    model = svm.svm_train(prob, param)
    svm.svm_save_model('test.model', model)

    # test
    #  y0 = [-1]
    #  x0 = [{1: 1, 2: 0}]
    #  p_label, p_acc, p_val = svm.svm_predict(y0, x0, model)
    #  print('p_label = ', p_label)
    #  print('p_acc = ', p_acc)
    #  print('p_val = ', p_val)
예제 #26
0
import os

from libsvm import svmutil

import ocr

OCR_PATH = '/Users/thakis/Downloads/data/sudoku_images/ocr_data/'
features, labels = ocr.load_ocr_data(os.path.join(OCR_PATH, 'training'))
test_features, test_labels = \
    ocr.load_ocr_data(os.path.join(OCR_PATH, 'testing'))

features = map(list, features)
test_features = map(list, test_features)

problem = svmutil.svm_problem(labels, features)
param = svmutil.svm_parameter('-q -t 0')
model = svmutil.svm_train(problem, param)

print 'Training data fit:'
svmutil.svm_predict(labels, features, model)
print 'Testing data fit:'
svmutil.svm_predict(test_labels, test_features, model)