def simulation(self, slot, count=1000, interval=117): counter = 0 self.ui.progressBar.minimum = 0 self.ui.progressBar.maximum = int(self.ui.request_count.text()) smo = SMO(self.ui.progressBar, int(self.ui.request_count.text()), self.ui.outer_info) smo.add_channel(Channel('channel_1', self.ui.state_1, 0.6 / 4)) smo.add_channel(Channel('channel_2', self.ui.state_2, 0.6 / 4)) smo.add_channel(Channel('channel_3', self.ui.state_3, 0.6 / 4)) smo.add_channel(Channel('channel_4', self.ui.state_4, 0.6 / 4)) requests = [ Request() for i in range(int(self.ui.request_count.text())) ] def handler(): nonlocal counter counter += 1 slot(smo, requests.pop(), counter, count) if counter >= count: timer.stop() timer.deleteLater() timer = QtCore.QTimer() timer.timeout.connect(handler) timer.start(interval)
def looPairRange(params, data, dist=1.1): """Identical to looPair, except you specifiy a distance from the boundary and it retrains for all points in that range, but not for once outside that range. For a value of one, ignoring rounding error, it should be identical to looPair, though in practise you should never do this - dist should always be >1.0. This also has a better than optimisation - if it knows its result is going to be worse than betterThan it gives up and saves computation.""" dataMatrix, y = data # First train on all the data... smo = SMO() smo.setParams(params) smo.setData(dataMatrix, y) smo.solve() onAll = copy.deepcopy(smo.getModel()) # Get set of indices to retrain with, collate statistics for all the non-supporting vectors... scores = onAll.multiClassify(dataMatrix) * y indices = numpy.nonzero(scores < dist)[0] correct = (scores > 0).sum() - (scores[indices] > 0).sum() # Now iterate and retrain without each of the supporting vectors, collating the statistics... for i in xrange(indices.shape[0]): index = indices[i] noIndex = numpy.array(range(index) + range(index + 1, y.shape[0])) smo.setData(dataMatrix[noIndex], y[noIndex]) smo.solve() res = smo.getModel().classify(dataMatrix[index]) * y[index] if res > 0: correct += 1 # Return the loo and initial trainning on all the data... return (float(correct) / float(y.shape[0]), onAll)
def looPair(params, data): """Given a parameters object and a pair of data matrix and y (As returned by dataset.getTrainData.) this returns a (good) approximation of the leave one out negative log likellihood, and a model trained on *all* the data as a pair. Makes the assumption that losing a non-supporting vector does not require retraining, which is correct the vast majority of the time, and as a bonus avoids retrainning for most of the data, making this relativly fast.""" dataMatrix, y = data # First train on all the data... smo = SMO() smo.setParams(params) smo.setData(dataMatrix, y) smo.solve() onAll = copy.deepcopy(smo.getModel()) indices = smo.getIndices() # Collate statistics for all the non-supporting vectors... scores = onAll.multiClassify(dataMatrix) * y correct = (scores > 0).sum() - (scores[indices] > 0).sum() # Now iterate and retrain without each of the supporting vectors, collating the statistics... for i in xrange(indices.shape[0]): index = indices[i] noIndex = numpy.array(range(index) + range(index + 1, y.shape[0])) smo.setData(dataMatrix[noIndex], y[noIndex]) smo.solve() res = smo.getModel().classify(dataMatrix[index]) * y[index] if res > 0: correct += 1 # Return the loo and initial trainning on all the data... return (float(correct) / float(y.shape[0]), onAll)
def train(self): smoAlgor = SMO(dataset=self.dataset, labels=self.labels, C=self.C, tolerance=self.tolerance, maxIter=self.maxIter, kernel=self.kern) smoAlgor.mainRoutine() self.alphaArr = smoAlgor.alphaArr self.b = smoAlgor.b
def gen_results(X, y, numruns, loss=False): results = [] if loss: plt.figure() plt.xlabel('Iteration (every 10th saved)') plt.ylabel('Loss') plt.title( 'Objective function progression across {} runs'.format(numruns)) for i in range(numruns): smo = SMO(calc_loss=loss) smo.fit(X, y) results.append(smo.fit_time) if loss: plt.plot(smo.training_loss, label='Run {}'.format(i)) print('Run {} time: {}'.format(i, round(results[-1], 3))) if loss: plt.legend(loc='upper right') plt.savefig('plots/smo/loss_vs_ite_{}.pdf'.format(strftime( "%Y.%m.%d_%H.%M.%S", localtime()), format='pdf')) plt.close('all') return results
def looPairBrute(params, data): """Same as looPair but does it brute force style - no approximation here.""" dataMatrix, y = data # First train on all the data... smo = SMO() smo.setParams(params) smo.setData(dataMatrix, y) smo.solve() onAll = copy.deepcopy(smo.getModel()) # Now iterate and retrain without each of the vectors, collating the statistics... correct = 0 for i in xrange(y.shape[0]): noIndex = numpy.array(range(i) + range(i + 1, y.shape[0])) smo.setData(dataMatrix[noIndex], y[noIndex]) smo.solve() res = smo.getModel().classify(dataMatrix[i]) * y[i] if res > 0: correct += 1 # Return the loo and initial trainning on all the data... return (float(correct) / float(y.shape[0]), onAll)
#!/bin/python from smo import SMO import cPickle as pickle import time import sys if len(sys.argv) == 5: args = sys.argv A = SMO.SMO(args[2], args[3]) start = time.clock() print "Training on", args[2] A.train(int(args[1])) stop = time.clock() print "Time taken to train:", str(stop - start) print "Accuracy: ", A.get_accu() with open(args[4], "w+") as f: pickle.dump(A, f, pickle.HIGHEST_PROTOCOL) else: print "Usage: ./script_smo.py <stopping_criterion> <train_data> <test_data> <model_output_file>" print "Outputs the info about the training"