Exemple #1
0
 def train(self):
     smoAlgor = SMO(dataset=self.dataset,
                    labels=self.labels,
                    C=self.C,
                    tolerance=self.tolerance,
                    maxIter=self.maxIter,
                    kernel=self.kern)
     smoAlgor.mainRoutine()
     self.alphaArr = smoAlgor.alphaArr
     self.b = smoAlgor.b
Exemple #2
0
	def train(self, cost = 5, gamma = 0.5, kernel_type = 'rbf'):
		smo = SMO(self.__y, self.__x, len(self.__y), cost, gamma, kernel_type)
		resolve = smo.solve()
		
		model = {}
		model['kernel_type'] = kernel_type
		model['gamma'] = gamma
		model['cost'] = cost
		model['rho'] = resolve[1]
		model['sv'] = []
		for i, v in resolve[0]:
			point = {}
			point['ya'] = self.__y[i] * v
			point['vector'] = self.__x[i]
			model['sv'].append(point)

		self.__model = model
    def simulation(self, slot, count=1000, interval=117):
        counter = 0
        self.ui.progressBar.minimum = 0
        self.ui.progressBar.maximum = int(self.ui.request_count.text())
        smo = SMO(self.ui.progressBar, int(self.ui.request_count.text()),
                  self.ui.outer_info)
        smo.add_channel(Channel('channel_1', self.ui.state_1, 0.6 / 4))
        smo.add_channel(Channel('channel_2', self.ui.state_2, 0.6 / 4))
        smo.add_channel(Channel('channel_3', self.ui.state_3, 0.6 / 4))
        smo.add_channel(Channel('channel_4', self.ui.state_4, 0.6 / 4))
        requests = [
            Request() for i in range(int(self.ui.request_count.text()))
        ]

        def handler():
            nonlocal counter
            counter += 1
            slot(smo, requests.pop(), counter, count)
            if counter >= count:
                timer.stop()
                timer.deleteLater()

        timer = QtCore.QTimer()
        timer.timeout.connect(handler)
        timer.start(interval)
Exemple #4
0
def gen_results(X, y, numruns, loss=False):
    results = []
    if loss:
        plt.figure()
        plt.xlabel('Iteration (every 10th saved)')
        plt.ylabel('Loss')
        plt.title(
            'Objective function progression across {} runs'.format(numruns))
    for i in range(numruns):
        smo = SMO(calc_loss=loss)
        smo.fit(X, y)
        results.append(smo.fit_time)
        if loss:
            plt.plot(smo.training_loss, label='Run {}'.format(i))
        print('Run {} time: {}'.format(i, round(results[-1], 3)))
    if loss:
        plt.legend(loc='upper right')
        plt.savefig('plots/smo/loss_vs_ite_{}.pdf'.format(strftime(
            "%Y.%m.%d_%H.%M.%S", localtime()),
                                                          format='pdf'))
        plt.close('all')
    return results
#!/bin/python
from smo import SMO
import cPickle as pickle
import time
import sys

if len(sys.argv) == 5:
    args = sys.argv
    A = SMO.SMO(args[2], args[3])
    start = time.clock()
    print "Training on", args[2]
    A.train(int(args[1]))
    stop = time.clock()
    print "Time taken to train:", str(stop - start)
    print "Accuracy: ", A.get_accu()
    with open(args[4], "w+") as f:
        pickle.dump(A, f, pickle.HIGHEST_PROTOCOL)
else:
    print "Usage: ./script_smo.py <stopping_criterion> <train_data> <test_data> <model_output_file>"
    print "Outputs the info about the training"
Exemple #6
0
 def __init__(self, C, kernel=LinKernel()):
   self.k = kernel
   self.C = C
   self.optimizer = SMO(kernel, C)
Exemple #7
0
class SVM:
  # Support Vector Machine Classifier
  def __init__(self, C, kernel=LinKernel()):
    self.k = kernel
    self.C = C
    self.optimizer = SMO(kernel, C)

  def train(self, X, y):
    # store training examples
    self.supv = X
    self.supv_y = y

    # use the SMO module to compute alphas and b
    self.alphas = self.optimizer.compute_alphas(X,y)
    self.b = self.optimizer.b

  def _eval(self, x):
    # evaluate the SVM on a single example
    ret = 0
    for i, a in enumerate(self.alphas):
      # ignore non-support vectors
      if a != 0:
        ret += a * self.supv_y[i] * self.k.eval(x,self.supv[i])
    return ret + self.b

  def eval(self, X):
    # evaluate a matrix of example vectors
    result = np.zeros(len(X))
    for i in xrange(len(X)):
      result[i] = self._eval(X[i])
    return result

  def classify(self, X):
    # classify a matrix of example vectors
    return np.sign(self.eval(X))

  def test(self, X, y):
    # find the percentage of misclassified examples
    error = np.zeros(len(X))
    guess = self.classify(X)
    error[guess != y] = 1
    return np.float(np.sum(error)) / len(X)

  def countSupVectors(self):
    count = 0
    for a in self.alphas:
      if a == 0:
      	count += 1
    return count

  def findC(self, X, y, count=50, kfolds=5):
    # find a good estimate of C with kfold cross validation

    yt = y.reshape(len(y), 1)
    data = np.hstack([yt,X])
    np.random.shuffle(data)
    partitions = np.array_split(data, kfolds)
    candidates = np.logspace(0, 5, num=count, base=np.e)
    err = np.zeros(count)
    svec = err.copy()

    minErr = np.inf
    C = candidates[0]
    for index, c in enumerate(candidates):
      errors = np.zeros(kfolds)
      supvec_count = errors.copy()
      for i in range(kfolds):
        test = partitions[i]
        train = np.vstack([partitions[x] for x in range(kfolds) if x != i])
        testy = test[:,0]
        testx = test[:,1:]
        trainy = train[:,0]
        trainx = train[:,1:]
        temp = SVM(c, self.k)
        temp.train(trainx, trainy)
        errors[i] = temp.test(testx, testy)
        supvec_count[i] = temp.countSupVectors()
      err[index] = np.mean(errors)
      svec[index] = np.mean(supvec_count)
      print c, err
      if err[index] < minErr:
        C = c
        minErr = err[index]

    print "C, err, #svec"
    for i in range(count):
    	print candidates[i], err[i], svec[i]
    print "Final value: ", C
    return C
Exemple #8
0
def looPairBrute(params,data):
  """Same as looPair but does it brute force style - no approximation here."""
  dataMatrix,y = data

  # First train on all the data...
  smo = SMO()
  smo.setParams(params)
  smo.setData(dataMatrix,y)
  smo.solve()
  onAll = copy.deepcopy(smo.getModel())

  # Now iterate and retrain without each of the vectors, collating the statistics...
  correct = 0
  for i in xrange(y.shape[0]):
    noIndex = numpy.array(range(i)+range(i+1,y.shape[0]))
    smo.setData(dataMatrix[noIndex],y[noIndex])
    smo.solve()
    res = smo.getModel().classify(dataMatrix[i]) * y[i]
    if res>0: correct += 1

  # Return the loo and initial trainning on all the data...
  return (float(correct)/float(y.shape[0]),onAll)
Exemple #9
0
def looPairRange(params, data, dist = 1.1):
  """Identical to looPair, except you specifiy a distance from the boundary and it retrains for all points in that range, but not for once outside that range. For a value of one, ignoring rounding error, it should be identical to looPair, though in practise you should never do this - dist should always be >1.0. This also has a better than optimisation - if it knows its result is going to be worse than betterThan it gives up and saves computation."""
  dataMatrix,y = data

  # First train on all the data...
  smo = SMO()
  smo.setParams(params)
  smo.setData(dataMatrix,y)
  smo.solve()
  onAll = copy.deepcopy(smo.getModel())

  # Get set of indices to retrain with, collate statistics for all the non-supporting vectors...
  scores = onAll.multiClassify(dataMatrix)*y
  indices = numpy.nonzero(scores<dist)[0]
  correct = (scores>0).sum() - (scores[indices]>0).sum()

  # Now iterate and retrain without each of the supporting vectors, collating the statistics...
  for i in xrange(indices.shape[0]):
    index = indices[i]
    noIndex = numpy.array(range(index)+range(index+1,y.shape[0]))
    smo.setData(dataMatrix[noIndex],y[noIndex])
    smo.solve()
    res = smo.getModel().classify(dataMatrix[index]) * y[index]
    if res>0: correct += 1

  # Return the loo and initial trainning on all the data...
  return (float(correct)/float(y.shape[0]),onAll)
Exemple #10
0
def looPair(params,data):
  """Given a parameters object and a pair of data matrix and y (As returned by dataset.getTrainData.) this returns a (good) approximation of the leave one out negative log likellihood, and a model trained on *all* the data as a pair. Makes the assumption that losing a non-supporting vector does not require retraining, which is correct the vast majority of the time, and as a bonus avoids retrainning for most of the data, making this relativly fast."""
  dataMatrix,y = data

  # First train on all the data...
  smo = SMO()
  smo.setParams(params)
  smo.setData(dataMatrix,y)
  smo.solve()
  onAll = copy.deepcopy(smo.getModel())
  indices = smo.getIndices()

  # Collate statistics for all the non-supporting vectors...
  scores = onAll.multiClassify(dataMatrix)*y
  correct = (scores>0).sum() - (scores[indices]>0).sum()

  # Now iterate and retrain without each of the supporting vectors, collating the statistics...
  for i in xrange(indices.shape[0]):
    index = indices[i]
    noIndex = numpy.array(range(index)+range(index+1,y.shape[0]))
    smo.setData(dataMatrix[noIndex],y[noIndex])
    smo.solve()
    res = smo.getModel().classify(dataMatrix[index]) * y[index]
    if res>0: correct += 1

  # Return the loo and initial trainning on all the data...
  return (float(correct)/float(y.shape[0]),onAll)
Exemple #11
0
def looPairBrute(params, data):
    """Same as looPair but does it brute force style - no approximation here."""
    dataMatrix, y = data

    # First train on all the data...
    smo = SMO()
    smo.setParams(params)
    smo.setData(dataMatrix, y)
    smo.solve()
    onAll = copy.deepcopy(smo.getModel())

    # Now iterate and retrain without each of the vectors, collating the statistics...
    correct = 0
    for i in xrange(y.shape[0]):
        noIndex = numpy.array(range(i) + range(i + 1, y.shape[0]))
        smo.setData(dataMatrix[noIndex], y[noIndex])
        smo.solve()
        res = smo.getModel().classify(dataMatrix[i]) * y[i]
        if res > 0: correct += 1

    # Return the loo and initial trainning on all the data...
    return (float(correct) / float(y.shape[0]), onAll)
Exemple #12
0
def looPairRange(params, data, dist=1.1):
    """Identical to looPair, except you specifiy a distance from the boundary and it retrains for all points in that range, but not for once outside that range. For a value of one, ignoring rounding error, it should be identical to looPair, though in practise you should never do this - dist should always be >1.0. This also has a better than optimisation - if it knows its result is going to be worse than betterThan it gives up and saves computation."""
    dataMatrix, y = data

    # First train on all the data...
    smo = SMO()
    smo.setParams(params)
    smo.setData(dataMatrix, y)
    smo.solve()
    onAll = copy.deepcopy(smo.getModel())

    # Get set of indices to retrain with, collate statistics for all the non-supporting vectors...
    scores = onAll.multiClassify(dataMatrix) * y
    indices = numpy.nonzero(scores < dist)[0]
    correct = (scores > 0).sum() - (scores[indices] > 0).sum()

    # Now iterate and retrain without each of the supporting vectors, collating the statistics...
    for i in xrange(indices.shape[0]):
        index = indices[i]
        noIndex = numpy.array(range(index) + range(index + 1, y.shape[0]))
        smo.setData(dataMatrix[noIndex], y[noIndex])
        smo.solve()
        res = smo.getModel().classify(dataMatrix[index]) * y[index]
        if res > 0: correct += 1

    # Return the loo and initial trainning on all the data...
    return (float(correct) / float(y.shape[0]), onAll)
Exemple #13
0
def looPair(params, data):
    """Given a parameters object and a pair of data matrix and y (As returned by dataset.getTrainData.) this returns a (good) approximation of the leave one out negative log likellihood, and a model trained on *all* the data as a pair. Makes the assumption that losing a non-supporting vector does not require retraining, which is correct the vast majority of the time, and as a bonus avoids retrainning for most of the data, making this relativly fast."""
    dataMatrix, y = data

    # First train on all the data...
    smo = SMO()
    smo.setParams(params)
    smo.setData(dataMatrix, y)
    smo.solve()
    onAll = copy.deepcopy(smo.getModel())
    indices = smo.getIndices()

    # Collate statistics for all the non-supporting vectors...
    scores = onAll.multiClassify(dataMatrix) * y
    correct = (scores > 0).sum() - (scores[indices] > 0).sum()

    # Now iterate and retrain without each of the supporting vectors, collating the statistics...
    for i in xrange(indices.shape[0]):
        index = indices[i]
        noIndex = numpy.array(range(index) + range(index + 1, y.shape[0]))
        smo.setData(dataMatrix[noIndex], y[noIndex])
        smo.solve()
        res = smo.getModel().classify(dataMatrix[index]) * y[index]
        if res > 0: correct += 1

    # Return the loo and initial trainning on all the data...
    return (float(correct) / float(y.shape[0]), onAll)