Beispiel #1
0
def looPairRange(params, data, dist = 1.1):
  """Identical to looPair, except you specifiy a distance from the boundary and it retrains for all points in that range, but not for once outside that range. For a value of one, ignoring rounding error, it should be identical to looPair, though in practise you should never do this - dist should always be >1.0. This also has a better than optimisation - if it knows its result is going to be worse than betterThan it gives up and saves computation."""
  dataMatrix,y = data

  # First train on all the data...
  smo = SMO()
  smo.setParams(params)
  smo.setData(dataMatrix,y)
  smo.solve()
  onAll = copy.deepcopy(smo.getModel())

  # Get set of indices to retrain with, collate statistics for all the non-supporting vectors...
  scores = onAll.multiClassify(dataMatrix)*y
  indices = numpy.nonzero(scores<dist)[0]
  correct = (scores>0).sum() - (scores[indices]>0).sum()

  # Now iterate and retrain without each of the supporting vectors, collating the statistics...
  for i in xrange(indices.shape[0]):
    index = indices[i]
    noIndex = numpy.array(range(index)+range(index+1,y.shape[0]))
    smo.setData(dataMatrix[noIndex],y[noIndex])
    smo.solve()
    res = smo.getModel().classify(dataMatrix[index]) * y[index]
    if res>0: correct += 1

  # Return the loo and initial trainning on all the data...
  return (float(correct)/float(y.shape[0]),onAll)
Beispiel #2
0
def looPair(params,data):
  """Given a parameters object and a pair of data matrix and y (As returned by dataset.getTrainData.) this returns a (good) approximation of the leave one out negative log likellihood, and a model trained on *all* the data as a pair. Makes the assumption that losing a non-supporting vector does not require retraining, which is correct the vast majority of the time, and as a bonus avoids retrainning for most of the data, making this relativly fast."""
  dataMatrix,y = data

  # First train on all the data...
  smo = SMO()
  smo.setParams(params)
  smo.setData(dataMatrix,y)
  smo.solve()
  onAll = copy.deepcopy(smo.getModel())
  indices = smo.getIndices()

  # Collate statistics for all the non-supporting vectors...
  scores = onAll.multiClassify(dataMatrix)*y
  correct = (scores>0).sum() - (scores[indices]>0).sum()

  # Now iterate and retrain without each of the supporting vectors, collating the statistics...
  for i in xrange(indices.shape[0]):
    index = indices[i]
    noIndex = numpy.array(range(index)+range(index+1,y.shape[0]))
    smo.setData(dataMatrix[noIndex],y[noIndex])
    smo.solve()
    res = smo.getModel().classify(dataMatrix[index]) * y[index]
    if res>0: correct += 1

  # Return the loo and initial trainning on all the data...
  return (float(correct)/float(y.shape[0]),onAll)
Beispiel #3
0
def looPairRange(params, data, dist=1.1):
    """Identical to looPair, except you specifiy a distance from the boundary and it retrains for all points in that range, but not for once outside that range. For a value of one, ignoring rounding error, it should be identical to looPair, though in practise you should never do this - dist should always be >1.0. This also has a better than optimisation - if it knows its result is going to be worse than betterThan it gives up and saves computation."""
    dataMatrix, y = data

    # First train on all the data...
    smo = SMO()
    smo.setParams(params)
    smo.setData(dataMatrix, y)
    smo.solve()
    onAll = copy.deepcopy(smo.getModel())

    # Get set of indices to retrain with, collate statistics for all the non-supporting vectors...
    scores = onAll.multiClassify(dataMatrix) * y
    indices = numpy.nonzero(scores < dist)[0]
    correct = (scores > 0).sum() - (scores[indices] > 0).sum()

    # Now iterate and retrain without each of the supporting vectors, collating the statistics...
    for i in xrange(indices.shape[0]):
        index = indices[i]
        noIndex = numpy.array(range(index) + range(index + 1, y.shape[0]))
        smo.setData(dataMatrix[noIndex], y[noIndex])
        smo.solve()
        res = smo.getModel().classify(dataMatrix[index]) * y[index]
        if res > 0: correct += 1

    # Return the loo and initial trainning on all the data...
    return (float(correct) / float(y.shape[0]), onAll)
Beispiel #4
0
def looPair(params, data):
    """Given a parameters object and a pair of data matrix and y (As returned by dataset.getTrainData.) this returns a (good) approximation of the leave one out negative log likellihood, and a model trained on *all* the data as a pair. Makes the assumption that losing a non-supporting vector does not require retraining, which is correct the vast majority of the time, and as a bonus avoids retrainning for most of the data, making this relativly fast."""
    dataMatrix, y = data

    # First train on all the data...
    smo = SMO()
    smo.setParams(params)
    smo.setData(dataMatrix, y)
    smo.solve()
    onAll = copy.deepcopy(smo.getModel())
    indices = smo.getIndices()

    # Collate statistics for all the non-supporting vectors...
    scores = onAll.multiClassify(dataMatrix) * y
    correct = (scores > 0).sum() - (scores[indices] > 0).sum()

    # Now iterate and retrain without each of the supporting vectors, collating the statistics...
    for i in xrange(indices.shape[0]):
        index = indices[i]
        noIndex = numpy.array(range(index) + range(index + 1, y.shape[0]))
        smo.setData(dataMatrix[noIndex], y[noIndex])
        smo.solve()
        res = smo.getModel().classify(dataMatrix[index]) * y[index]
        if res > 0: correct += 1

    # Return the loo and initial trainning on all the data...
    return (float(correct) / float(y.shape[0]), onAll)
Beispiel #5
0
def looPairBrute(params,data):
  """Same as looPair but does it brute force style - no approximation here."""
  dataMatrix,y = data

  # First train on all the data...
  smo = SMO()
  smo.setParams(params)
  smo.setData(dataMatrix,y)
  smo.solve()
  onAll = copy.deepcopy(smo.getModel())

  # Now iterate and retrain without each of the vectors, collating the statistics...
  correct = 0
  for i in xrange(y.shape[0]):
    noIndex = numpy.array(range(i)+range(i+1,y.shape[0]))
    smo.setData(dataMatrix[noIndex],y[noIndex])
    smo.solve()
    res = smo.getModel().classify(dataMatrix[i]) * y[i]
    if res>0: correct += 1

  # Return the loo and initial trainning on all the data...
  return (float(correct)/float(y.shape[0]),onAll)
Beispiel #6
0
def looPairBrute(params, data):
    """Same as looPair but does it brute force style - no approximation here."""
    dataMatrix, y = data

    # First train on all the data...
    smo = SMO()
    smo.setParams(params)
    smo.setData(dataMatrix, y)
    smo.solve()
    onAll = copy.deepcopy(smo.getModel())

    # Now iterate and retrain without each of the vectors, collating the statistics...
    correct = 0
    for i in xrange(y.shape[0]):
        noIndex = numpy.array(range(i) + range(i + 1, y.shape[0]))
        smo.setData(dataMatrix[noIndex], y[noIndex])
        smo.solve()
        res = smo.getModel().classify(dataMatrix[i]) * y[i]
        if res > 0: correct += 1

    # Return the loo and initial trainning on all the data...
    return (float(correct) / float(y.shape[0]), onAll)
Beispiel #7
0
Datei: svm.py Projekt: yalog/misc
	def train(self, cost = 5, gamma = 0.5, kernel_type = 'rbf'):
		smo = SMO(self.__y, self.__x, len(self.__y), cost, gamma, kernel_type)
		resolve = smo.solve()
		
		model = {}
		model['kernel_type'] = kernel_type
		model['gamma'] = gamma
		model['cost'] = cost
		model['rho'] = resolve[1]
		model['sv'] = []
		for i, v in resolve[0]:
			point = {}
			point['ya'] = self.__y[i] * v
			point['vector'] = self.__x[i]
			model['sv'].append(point)

		self.__model = model