def loadData(fname): """ Read the feature vector file. Convert labels from {-1,+1} to {0,1}. """ labels, vects = convertSEQFileToArray(fname) convLabels = [(y + 1) / 2 for y in labels] return (convLabels, vects)
def train(self, trainFileName): """ This function implements the main training routine for Pegasos algorithm. """ # Print ths stats. print "L2 parameter =", self.l print "Total iterations =", self.T # Load the feature vectors. (labels, vects) = convertSEQFileToArray(trainFileName) # Get the (max) dimensionality d of vectors. d = vects[0].size self.d = d # Get the total no. of training vectors. n = len(vects) print "Total no. of training instances = %d" % n # Construct the initial weight vector. factor = 1.0 / sqrt(d * self.l) w = factor * ones(d) b = 0 # Iterative procedure. for t in xrange(1, (self.T + 1)): startTime = time() z = zeros(d) bSum = 0 k = 0 for i in xrange(0, n): x = vects[i] y = labels[i] score = (y * (dot(w, x) + b)) #print score if score < 0: # Misclassification. z += (y * x) bSum += y k += 1 eta = 1.0 / (self.l * t) whalf = ((1.0 - (eta * self.l)) * w) + ((eta / k) * z) scale = sqrt(self.l) * sqrt(dot(whalf, whalf)) scale = min((1.0, (1.0 / scale))) w = scale * whalf b = b + ((eta / k) * bSum) endTime = time() accuracy = 100 * (1.0 - (float(k) / n)) print "Iteration: %d" % t print "Time taken for this iteration %.3fs" % (endTime - startTime) print "Weight norm = %f" % (sqrt(dot(w, w))) print "Training accuracy = %f\n" % accuracy # Set the learnt weight vector. self.lw = w self.bias = b pass
def train(self, trainFileName): """ This function implements the main training routine for Pegasos algorithm. """ # Print ths stats. print "L2 parameter =", self.l print "Total iterations =", self.T # Load the feature vectors. (labels, vects) = convertSEQFileToArray(trainFileName) # Get the (max) dimensionality d of vectors. d = vects[0].size self.d = d # Get the total no. of training vectors. n = len(vects) print "Total no. of training instances = %d" % n # Construct the initial weight vector. factor = 1.0 / sqrt(d * self.l) w = factor * ones(d) b = 0 # Iterative procedure. for t in xrange(1, (self.T + 1)): startTime = time() z = zeros(d) bSum = 0 k = 0 for i in xrange(0,n): x = vects[i] y = labels[i] score = (y * (dot(w,x) + b)) #print score if score < 0: # Misclassification. z += (y * x) bSum += y k += 1 eta = 1.0 / (self.l * t) whalf = ((1.0 - (eta * self.l)) * w) + ((eta / k) * z) scale = sqrt(self.l) * sqrt(dot(whalf,whalf)) scale = min((1.0, (1.0 / scale))) w = scale * whalf b = b + ((eta / k) * bSum) endTime = time() accuracy = 100 * (1.0 - (float(k) / n)) print "Iteration: %d" % t print "Time taken for this iteration %.3fs" % (endTime - startTime) print "Weight norm = %f" % (sqrt(dot(w,w))) print "Training accuracy = %f\n" % accuracy # Set the learnt weight vector. self.lw = w self.bias = b pass