def train(self, X, Y, initStep=1., stopTol=1e-4, stopEpochs=5000, plot=None): """ Train the logistic regression using stochastic gradient descent """ M, N = X.shape # initialize the model if necessary: self.classes = np.unique(Y) # Y may have two classes, any values XX = np.hstack((np.ones( (M, 1)), X)) # XX is X, but with an extra column of ones YY = ml.toIndex(Y, self.classes) # YY is Y, but with canonical values 0 or 1 if len(self.theta) != N + 1: self.theta = np.random.rand(N + 1) # init loop variables: epoch = 0 done = False Jnll = [] J01 = [] while not done: stepsize, epoch = initStep * 2.0 / (2.0 + epoch), epoch + 1 # update stepsize # Do an SGD pass through the entire data set: for i in np.random.permutation(M): ri = XX[i].dot( self.theta) # TODO: compute linear response r(x) si = 1. / (1. + np.exp(-ri)) gradi = -(1 - si) * XX[i, :] if YY[i] else si * XX[i, :] # TODO: compute gradient of NLL loss self.theta -= stepsize * gradi # take a gradient step J01.append(self.err(X, Y)) # evaluate the current error rate ## TODO: compute surrogate loss (logistic negative log-likelihood) ## Jsur = sum_i [ (log si) if yi==1 else (log(1-si)) ] S = 1. / (1. + np.exp(-(XX.dot(self.theta)))) Jsur = -np.mean(YY * np.log(S) + (1 - YY) * np.log(1 - S)) Jnll.append(Jsur) # TODO evaluate the current NLL loss ## For debugging: you may want to print current parameters & losses # print self.theta, ' => ', Jsur[-1], ' / ', J01[-1] # raw_input() # pause for keystroke # TODO check stopping criteria: exit if exceeded # of epochs ( > stopEpochs) # or if Jnll not changing between epochs ( < stopTol ) done = epoch >= stopEpochs or (epoch > 1 and abs(Jnll[-1] - Jnll[-2]) < stopTol) plt.figure(1) plt.clf() plt.plot(Jnll, 'b-', J01, 'r-') plt.draw() # plot losses if N == 2: plt.figure(2) plt.clf() self.plotBoundary(X, Y) plt.draw() # & predictor if 2D plt.pause(.01)
def train(self, X, Y, initStep=1, stopTol=1e-4, stopEpochs=200, plot=None): """ Train the logistic regression using stochastic gradient descent """ M,N = X.shape; # initialize the model if necessary: self.classes = np.unique(Y); # Y may have two classes, any values XX = np.hstack(((np.ones((M,1))),X)); # XX is X, but with an extra column of ones YY = ml.toIndex(Y,self.classes); # YY is Y, but with canonical values 0 or 1 ## print(XX) ## print(YY) if len(self.theta)!=N+1: self.theta=np.random.rand(N+1); # init loop variables: epoch=0; done=False; Jnll=[]; J01=[]; while not done: stepsize = (initStep*2.0)/(2.0+epoch) epoch = epoch+1; # update stepsize # Do an SGD pass through the entire data set: for i in np.random.permutation(M): ri = XX[i].dot(self.theta.T); # TODO: compute linear response r(x) sigmoid = 1/(1 + math.exp(-ri)) gradi = XX[i].dot(sigmoid-YY[i])#XX[i].dot XX[i].dot(ri - YY[i]); #NotImplementedError## TODO: compute gradient of NLL loss self.theta -= stepsize * gradi; # take a gradient step J01.append( self.err(X,Y) ) # evaluate the current error rate ## TODO: compute surrogate loss (logistic negative log-likelihood) ## Jsur = sum_i [ (log si) if yi==1 else (log(1-si)) ] / M sigma = 1/(1 + np.exp(-(XX.dot(self.theta.T)))) #print(sigma) Jsur = (-np.mean(YY * np.log(sigma) - (1-YY)*np.log(1-sigma))) #print(Jsur) Jnll.append( Jsur ) # TODO evaluate the current NLL loss plt.figure(1); plt.plot(Jnll,'b-',J01,'r-'); plt.draw(); # plot losses if N==2: plt.figure(2); self.plotBoundary(X,Y); plt.draw(); # & predictor if 2D plt.pause(.01); # let OS draw the plot #print(epoch) #plt.show() plt.gcf().clear() ## For debugging: you may want to print current parameters & losses #print (self.theta, ' => ', Jsur[-1], ' / ', J01[-1] ) ## print(self.theta) ## if (epoch > 2): ## print(abs(Jnll[-2]-Jsur)) # raw_input() # pause for keystroke # TODO check stopping criteria: exit if exceeded # of epochs ( > stopEpochs) if (epoch > stopEpochs):#or abs(Jnll[-2] - Jnll[-1]) < stopTol): done = True; # or if Jnll not changing between epochs ( < stopTol ) plt.show()
def trainL2(self, X, Y, initStep=1.0, stopTol=1e-4, stopEpochs=5000, plot=None, alpha=2): M, N = X.shape self.classes = np.unique(Y) XX = np.hstack((np.ones((M, 1)), X)) YY = ml.toIndex(Y, self.classes) if len(self.theta) != N + 1: self.theta = np.random.rand(N + 1) # init loop variables: epoch = 0 done = False Jnll = [] J01 = [] while not done: stepsize, epoch = initStep * 2.0 / (2.0 + epoch), epoch + 1 # Do an SGD pass through the entire data set: for i in np.random.permutation(M): ri = XX[i, :].dot(self.theta) sigma = self.sigmoid(ri) gradi = (sigma - YY[i]) * XX[i, :] + alpha * 2 * self.theta self.theta -= stepsize * gradi J01.append(self.err(X, Y)) J = np.zeros(M) for j in range(M): rj = XX[j, :].dot(self.theta) if YY[j] == 1: J[j] = -YY[j] * np.log(self.sigmoid(rj)) else: J[j] = -(1 - YY[j]) * np.log(1 - self.sigmoid(rj)) Jsur = np.mean(J) + self.theta.dot(self.theta) * alpha Jnll.append(Jsur) plt.figure(1) plt.plot(Jnll, 'b-', J01, 'r-') plt.draw() if N == 2: plt.figure(2); self.plotBoundary(X, Y); plt.draw(); # & predictor if 2D plt.pause(.01) ## For debugging: you may want to print current parameters & losses # print self.theta, ' => ', Jsur[-1], ' / ', J01[-1] # raw_input() # pause for keystroke if epoch > stopEpochs: done = True if epoch > 2 and abs(Jnll[-1] - Jnll[-2]) < stopTol: # or if Jnll not changing between epochs ( < stopTol ) done = True
def train(self, X, Y, initStep=1.0, stopTol=1e-4, stopEpochs=5000, plot=None): """ Train the logistic regression using stochastic gradient descent """ M,N = X.shape # initialize the model if necessary: self.classes = np.unique(Y) # Y may have two classes, any values XX = np.hstack((np.ones((M,1)),X)) # XX is X, but with an extra column of ones YY = ml.toIndex(Y,self.classes) # YY is Y, but with canonical values 0 or 1 if len(self.theta)!=N+1: self.theta=np.random.rand(N+1); # init loop variables: epoch=0; done=False; Jnll=[]; J01=[]; while not done: stepsize, epoch = initStep*2.0/(2.0+epoch), epoch+1; # update stepsize # Do an SGD pass through the entire data set: for i in np.random.permutation(M): ri = XX[i, :].dot(self.theta) sigma = self.sigmoid(ri) gradi = (sigma - YY[i]) * XX[i, :] self.theta -= stepsize * gradi; # take a gradient step J01.append( self.err(X,Y) ) # evaluate the current error rate J = np.zeros(M) for j in range(M): rj = XX[j, :].dot(self.theta) if YY[j] == 1: J[j] = -YY[j] * np.log(self.sigmoid(rj)) else: J[j] = -(1-YY[j]) * np.log(1-self.sigmoid(rj)) Jnll.append(np.mean(J)) plt.figure(1); plt.plot(Jnll,'b-',J01,'r-'); plt.draw(); # plot losses if N==2: plt.figure(2); self.plotBoundary(X,Y); plt.draw(); # & predictor if 2D plt.pause(.01); # let OS draw the plot ## For debugging: you may want to print current parameters & losses # print self.theta, ' => ', Jsur[-1], ' / ', J01[-1] # raw_input() # pause for keystroke if epoch > stopEpochs: done = True if epoch > 2 and abs(Jnll[-1] - Jnll[-2]) < stopTol: # or if Jnll not changing between epochs ( < stopTol ) done = True
def train(self, X, Y, initStep=1.0, stopTol=1e-4, stopEpochs=5000, plot=None): """ Train the logistic regression using stochastic gradient descent """ def sigmoid(z): return 1 / (1 + np.exp(-z)) M,N = X.shape; # initialize the model if necessary: self.classes = np.unique(Y); # Y may have two classes, any values XX = np.hstack((np.ones((M,1)),X)) # XX is X, but with an extra column of ones YY = ml.toIndex(Y,self.classes); # YY is Y, but with canonical values 0 or 1 if len(self.theta)!=N+1: self.theta=np.random.rand(N+1); # init loop variables: epoch=0; done=False; Jnll=[]; J01=[]; while not done: stepsize, epoch = initStep*2.0/(2.0+epoch), epoch+1; # update stepsize # Do an SGD pass through the entire data set: for i in np.random.permutation(M): ri = sigmoid(self.theta.dot(XX[i])); # TODO: compute linear response r(x) gradi = XX[i] * (ri - YY[i]); # TODO: compute gradient of NLL loss self.theta -= stepsize * gradi; # take a gradient step J01.append( self.err(X,Y) ) # evaluate the current error rate ## TODO: compute surrogate loss (logistic negative log-likelihood) ## Jsur = sum_i [ (log si) if yi==1 else (log(1-si)) ] Jnll.append(np.sum(YY*np.log(sigmoid(XX.dot(self.theta))) - (1-YY)*np.log(1-sigmoid(XX.dot(self.theta))))) # TODO evaluate the current NLL loss if plot: plt.figure(1); plt.plot(Jnll,'b-',J01,'r-'); plt.draw(); # plot losses if N==2: plt.figure(2); self.plotBoundary(X,Y); plt.draw(); # & predictor if 2D plt.pause(.01); # let OS draw the plot ## For debugging: you may want to print current parameters & losses # print (self.theta, ' => ', Jnll[-1], ' / ', J01[-1]) # input() # pause for keystroke # TODO check stopping criteria: exit if exceeded # of epochs ( > stopEpochs) done = (epoch > stopEpochs) or (epoch >= 2 and (np.abs(Jnll[-1] - Jnll[-2]) < stopTol)); # or if Jnll not changing between epochs ( < stopTol ) plt.figure(1); plt.plot(Jnll,'b-',J01,'r-'); plt.draw(); if N==2: plt.figure(2); self.plotBoundary(X,Y); plt.draw() ################################################################################ ################################################################################ ################################################################################
def train(self, X, Y, initStep=1.0, stopTol=1e-4, stopEpochs=5000, plot=None): """ Train the logistic regression using stochastic gradient descent """ from IPython import display M, N = X.shape # initialize the model if necessary: self.classes = np.unique(Y) # Y may have two classes, any values XX = np.hstack((np.ones( (M, 1)), X)) # XX is X, but with an extra column of ones YY = ml.toIndex(Y, self.classes) # YY is Y, but with canonical values 0 or 1 if len(self.theta) != N + 1: self.theta = np.random.rand(N + 1) # init loop variables: epoch = 0 done = False Jnll = [] J01 = [] while not done: stepsize, epoch = initStep * 2.0 / (2.0 + epoch), epoch + 1 # update stepsize # Do an SGD pass through the entire data set: for i in np.random.permutation(M): ri = NotImplementedError # TODO: compute linear response r(x) si = NotImplementedError # TODO: compute logistic response sig(ri) gradi = NotImplementedError # TODO: compute gradient of NLL loss self.theta -= stepsize * gradi # take a gradient step J01.append(self.err(X, Y)) # evaluate the current error rate ## TODO: compute surrogate loss (logistic negative log-likelihood) ## Jsur = sum_i [ (log si) if yi==1 else (log(1-si)) ] Jnll.append( NotImplementedError) # TODO evaluate the current NLL loss display.clear_output(wait=True) # clear display if using jupyter plt.subplot(1, 2, 1) plt.cla() plt.plot(Jnll, 'b-', J01, 'r-') # plot losses if N == 2: plt.subplot(1, 2, 2) plt.cla() self.plotBoundary(X, Y) # & predictor if 2D plt.pause(.01) # let OS draw the plot ## For debugging: you may want to print current parameters & losses # print self.theta, ' => ', Jnll[-1], ' / ', J01[-1] # raw_input() # pause for keystroke # TODO check stopping criteria: exit if exceeded # of epochs ( > stopEpochs) done = NotImplementedError
def train(self, X, Y, initStep=1.0, stopTol=1e-4, stopEpochs=5000, plot=None): """ Train the logistic regression using stochastic gradient descent """ M, N = X.shape # initialize the model if necessary: self.classes = np.unique(Y) # Y may have two classes, any values XX = np.hstack((np.ones( (M, 1)), X)) # XX is X, but with an extra column of ones YY = ml.toIndex(Y, self.classes) # YY is Y, but with canonical values 0 or 1 if len(self.theta) != N + 1: self.theta = np.random.rand(N + 1) # init loop variables: epoch = 0 done = False Jnll = [] J01 = [] while not done: stepsize, epoch = initStep * 2.0 / (2.0 + epoch), epoch + 1 # update stepsize # Do an SGD pass through the entire data set: Jsur = 0 for i in np.random.permutation(M): ri = (self.theta[0] * XX[i, 0]) + ( self.theta[1] * XX[i, 1]) + (self.theta[2] * XX[i, 2]) # TODO: compute linear response r(x) gradi = np.array([ XX[i, 0] * ((1 - YY[i]) * (self.sigmoid(ri)) - (YY[i]) * (1 - self.sigmoid(ri))), XX[i, 1] * ((1 - YY[i]) * (self.sigmoid(ri)) - (YY[i]) * (1 - self.sigmoid(ri))), XX[i, 2] * ((1 - YY[i]) * (self.sigmoid(ri)) - (YY[i]) * (1 - self.sigmoid(ri))) ]) # TODO: compute gradient of NLL loss self.theta -= stepsize * gradi # take a gradient step if (YY[i] == 1): Jsur = np.add(Jsur, np.log10(self.sigmoid(ri))) else: Jsur = np.add(Jsur, 1 - np.log10(1 - self.sigmoid(ri))) J01.append(self.err(X, Y)) # evaluate the current error rate ## TODO: compute surrogate loss (logistic negative log-likelihood) ## Jsur = sum_i [ (log si) if yi==1 else (log(1-si)) ] Jsur = Jsur / M Jnll.append(Jsur) # TODO evaluate the current NLL loss plt.figure(1) plt.plot(Jnll, 'b-', J01, 'r-') plt.draw() # plot losses if N == 2: plt.figure(2) self.plotBoundary(X, Y) plt.draw() # & predictor if 2D plt.pause(.01) # let OS draw the plot ## For debugging: you may want to print current parameters & losses # print self.theta, ' => ', Jnll[-1], ' / ', J01[-1] # raw_input() # pause for keystroke # TODO check stopping criteria: exit if exceeded # of epochs ( > stopEpochs) # or if Jnll not changing between epochs ( < stopTol ) if epoch > stopEpochs: done = True if len(Jnll) > 2: if abs(Jnll[-1] - Jnll[-2]) < stopTol: done = True
def train(self, X, Y, initStep=1.0, stopTol=1e-4, stopEpochs=5000, plot=None, alpha=0): """ Train the logistic regression using stochastic gradient descent """ M, N = X.shape # initialize the model if necessary: self.classes = np.unique(Y) # Y may have two classes, any values XX = np.hstack((np.ones( (M, 1)), X)) # XX is X, but with an extra column of ones YY = ml.toIndex(Y, self.classes) # YY is Y, but with canonical values 0 or 1 if len(self.theta) != N + 1: self.theta = np.random.rand(N + 1) # init loop variables: epoch = 0 done = False Jnll = [] J01 = [] while not done: stepsize, epoch = initStep * 2.0 / (2.0 + epoch), epoch + 1 # update stepsize # Do an SGD pass through the entire data set: for i in np.random.permutation(M): ri = self.sigmoid(np.dot(XX[i], self.theta)) # TODO: compute linear response r(x) gradi = -(YY[i] - ri) * XX[i] + alpha * self.theta # TODO: compute gradient of NLL loss self.theta -= stepsize * gradi # take a gradient step J01.append(self.err(X, Y)) # evaluate the current error rate ## TODO: compute surrogate loss (logistic negative log-likelihood) ## Jsur = sum_i [ (log si) if yi==1 else (log(1-si)) ] Jsur = 0 for i in np.random.permutation(M): ri = self.sigmoid(np.dot(XX[i], self.theta)) Jsur += np.log(ri + 1e-4) if YY[i] == 1 else np.log(1 - ri + 1e-4) Jnll.append(-Jsur / M) # TODO evaluate the current NLL loss ## For debugging: you may want to print current parameters & losses # raw_input() # pause for keystroke # if epoch > 1: if epoch > stopEpochs or np.abs(Jnll[-2] - Jnll[-1]) < stopTol: done = True # plot when training is over plt.figure(1) plt.plot(Jnll, 'b-', J01, 'r-') plt.draw() # plot losses if N == 2: plt.figure(2) self.plotBoundary(X, Y) plt.draw() # & predictor if 2D plt.pause(.01) # let OS draw the plot print(self.theta, ' => ', Jnll[-1], ' / ', J01[-1])
def train(self, X, Y, initStep=1.0, stopTol=1e-4, stopEpochs=5000, plot=None): """ Train the logistic regression using stochastic gradient descent """ M, N = X.shape # initialize the model if necessary: self.classes = np.unique(Y) # Y may have two classes, any values XX = np.hstack((np.ones( (M, 1)), X)) # XX is X, but with an extra column of ones YY = ml.toIndex(Y, self.classes) # YY is Y, but with canonical values 0 or 1 if len(self.theta) != N + 1: self.theta = np.random.rand(N + 1) # init loop variables: epoch = 0 done = False Jnll = [] J01 = [] while not done: stepsize = initStep * 2.0 / (2.0 + epoch), epoch += 1 # update stepsize # Do an SGD pass through the entire data set: for i in np.random.permutation(M): ri = self.theta[0] + self.theta[1] * X[ i, 0] + self.theta[2] * X[i, 1] si = self.sig(ri) theta1Grad = -YY[i] * (1 - si) + (1 - YY[i]) * si theta2Grad = -YY[i] * (1 - si) * X[i, 0] + ( 1 - YY[i]) * si * X[i, 0] theta3Grad = -YY[i] * (1 - si) * X[i, 1] + ( 1 - YY[i]) * si * X[i, 1] gradi = np.array([theta1Grad, theta2Grad, theta3Grad]) self.theta -= stepsize * gradi # take a gradient step J01.append(self.err(X, Y)) # evaluate the current error rate ## TODO: compute surrogate loss (logistic negative log-likelihood) ## Jsur = sum_i [ (log si) if yi==1 else (log(1-si)) ] Jsur = np.sum([np.log(si) if YY[i] == 1 else np.log(1 - si)]) Jnll.append(Jsur / M) #plt.figure(1); plt.plot(Jnll,'b-',J01,'r-'); plt.draw(); # plot losses #if N==2: plt.figure(2); self.plotBoundary(X,Y); plt.draw(); # & predictor if 2D #plt.pause(.01); # let OS draw the plot ## For debugging: you may want to print current parameters & losses # print self.theta, ' => ', Jsur[-1], ' / ', J01[-1] # raw_input() # pause for keystroke # TODO check stopping criteria: exit if exceeded # of epochs ( > stopEpochs) if (epoch > stopEpochs): done = True if (epoch > 1 and abs(Jnll[-2] - Jnll[-1]) < stopTol): done = True plt.figure(1) plt.plot(Jnll, 'b-', J01, 'r-') plt.draw() # plot losses if N == 2: plt.figure(2) self.plotBoundary(X, Y) plt.draw() # & predictor if 2D plt.pause(.01)
def train(self, X, Y, initStep=1.0, stopTol=1e-4, stopEpochs=5000, plot=None): """ Train the logistic regression using stochastic gradient descent """ M, N = X.shape # initialize the model if necessary: self.classes = np.unique(Y) # Y may have two classes, any values XX = np.hstack((np.ones( (M, 1)), X)) # XX is X, but with an extra column of ones YY = ml.toIndex(Y, self.classes) # YY is Y, but with canonical values 0 or 1 if len(self.theta) != N + 1: self.theta = np.random.rand(N + 1) # init loop variables: epoch = 0 done = False Jnll = [] J01 = [] while not done: stepsize, epoch = initStep * 2.0 / (2.0 + epoch), epoch + 1 # update stepsize # Do an SGD pass through the entire data set: for i in np.random.permutation(M): ri = XX[i, :].dot(self.theta) # TODO: compute linear response r(x) gradi = self.compGrad(XX[i, :], YY[i], ri) # TODO: compute gradient of NLL loss gradi = gradi.reshape(3, 1) self.theta -= stepsize * gradi # take a gradient step J01.append(self.err(X, Y)) # evaluate the current error rate ## TODO: compute surrogate loss (logistic negative log-likelihood) i = 0 sum = 0 for i in range(len(YY)): sum += YY[i] * np.log(self.act( XX[i, :])) + (1 - YY[i]) * np.log(1 - self.act(XX[i, :])) Jsur = sum[0] / len(YY) Jnll.append(-Jsur) # TODO evaluate the current NLL loss #plt.figure(); plt.plot(Jnll,'b-', J01, 'r-'); # plt.figure(1); plt.plot(Jnll,'b-',J01,'r-'); plt.draw(); # plot losses # if N==2: plt.figure(2); self.plotBoundary(X,Y); plt.draw(); # & predictor if 2D plt.pause(.01) # let OS draw the plot ## For debugging: you may want to print current parameters & losses # print self.theta, ' => ', Jsur[-1], ' / ', J01[-1] # raw_input() # pause for keystroke # TODO check stopping criteria: exit if exceeded # of epochs ( > stopEpochs) done = (epoch > stopEpochs) #or (Jsur < stopTol); #done = NotImplementedError; # or if Jnll not changing between epochs ( < stopTol ) plt.figure() plt.plot(Jnll, 'b-', J01, 'r-') plt.figure() self.plotBoundary(X, Y)
def train(self, X, Y, init_step=1, min_change=1e-4, iteration_limit=5000, plot=None): """ Train the logistic regression using stochastic gradient descent """ # preparation data_point = X.shape[0] if Y.shape[0] != data_point: raise ValueError("Y must have the same number of data (rows) as X") self.classes = np.unique(Y) if len(self.classes) != 2: raise ValueError( "Y should have exactly two classes (binary problem expected)") features = np.concatenate((np.ones((data_point, 1)), X), axis=1) targets = ml.toIndex(Y, self.classes) if self.theta.shape[0] != features.shape[1]: self.theta = np.random.rand(features.shape[1]) # training negative_log_likely = [] error = [] for i in range(0, iteration_limit): step = self.step_k * init_step / (self.step_k + i) # gradient decent for j in range(0, data_point): sigma = 1 / (1 + np.exp(-np.dot(features[j], self.theta))) # NIL = -avg(y*log(sigma)+(1-y)log(1-sigma)) # gradient = -avg(sigma-y)*x # L2 regularization should not be enabled since there is no higher order polynomials gradient = -( sigma - targets[j]) * features[j] - self.alpha * self.theta self.theta += step * gradient # record current error rate and surrogate loss error.append(self.err(X, Y)) sigma = 1 / (1 + np.exp(-(np.dot(features, self.theta)))) negative_log_likely.append( -np.mean(targets * np.log(sigma) + (1 - targets) * np.log(1 - sigma))) # plot # TODO: this clear-and-re-plot method is slow, consider using dynamic update instead if plot: plt.figure(plot, (15, 7)) plt.clf() plt.subplot(121) plt.plot(negative_log_likely, 'b-') plt.title('surrogate loss (logistic NLL)') plt.subplot(122) plt.plot(error, 'r-') plt.title('error rate') plt.draw() plt.pause(.01) # abort if there is no significant change in surrogate loss if (i > 1) and (abs(negative_log_likely[-1] - negative_log_likely[-2]) < min_change): break if self.report: print '\n--- training report ---' print 'parameters:' print '\talpha (L2 Regu): %.1f\n\tstep constant: %.1f' % ( self.alpha, self.step_k) print '\tinitial step size: %.1f\n\tminimum change: %.1e' % ( init_step, min_change) print 'iteration took: %d' % len(error) print 'min/final error: %.2f%%/%.2f%%' % (min(error) * 100, self.err(X, Y) * 100) print 'min/final sur. lost: %.3f/%.3f' % (min(negative_log_likely), negative_log_likely[-1]) print '----------------------'
def train(self, X, Y, initStep=1.0, stopTol=1e-4, stopEpochs=5000, plot=None): """ Train the logistic regression using stochastic gradient descent """ M, N = X.shape # initialize the model if necessary: self.classes = np.unique(Y) # Y may have two classes, any values XX = np.hstack((np.ones( (M, 1)), X)) # XX is X, but with an extra column of ones YY = ml.toIndex(Y, self.classes) # YY is Y, but with canonical values 0 or 1 if len(self.theta) != N + 1: self.theta = np.random.rand(N + 1) # init loop variables: epoch = 0 done = False Jnll = [] J01 = [] while not done: stepsize, epoch = initStep * 2.0 / (2.0 + epoch), epoch + 1 # update stepsize # Do an SGD pass through the entire data set: for i in np.random.permutation(M): ri = np.dot(self.theta, XX[i]) # TODO: compute linear response r(x) gradi = -YY[i] * (1 - self.sig(ri)) * (XX[i]) - (1 - YY[i]) * ( -self.sig(ri)) * XX[i] # TODO: compute gradient of NLL loss self.theta -= stepsize * gradi # take a gradient step J01.append(self.err(X, Y)) # evaluate the current error rate ## TODO: compute surrogate loss (logistic negative log-likelihood) ## Jsur = sum_i [ (log si) if yi==1 else (log(1-si)) ] Jsur = 0 for i in np.random.permutation(M): Jsur += -YY[i] * np.log(self.sig(np.dot( self.theta, XX[i]))) - (1 - YY[i]) * np.log( 1 - self.sig(np.dot(self.theta, XX[i]))) Jsur = Jsur / M #Jnll.append( NotImplementedError ) # TODO evaluate the current NLL loss if stopEpochs <= epoch or (len(Jnll) >= 2 and np.abs(Jnll[-1] - Jnll[-2]) < stopTol): done = True plt.figure(1) plt.plot(Jnll, 'b-', J01, 'r-') plt.draw() # plot losses if N == 2: plt.figure(2) self.plotBoundary(X, Y) # & predictor if 2D plt.pause(.01) # let OS draw the plot else: done = False
def train(self, X, Y, initStep=1.0, stopTol=1e-4, stopEpochs=5000, plot=None, alpha=0): """ Train the logistic regression using stochastic gradient descent """ M, N = X.shape # initialize the model if necessary: self.classes = np.unique(Y) # Y may have two classes, any values XX = np.hstack((np.ones( (M, 1)), X)) # XX is X, but with an extra column of ones YY = ml.toIndex(Y, self.classes) # YY is Y, but with canonical values 0 or 1 if len(self.theta) != N + 1: self.theta = np.random.rand(N + 1) sigma = lambda r: 1 / (1 + np.exp(-r)) # init loop variables: epoch = 0 done = False Jnll = [float('inf')] J01 = [float('inf')] fig, [ax1, ax2, ax3] = plt.subplots(nrows=3, ncols=1, figsize=(10, 10)) recs = [mpatches.Rectangle((0, 0), 1, 1, fc=c) for c in ['b', 'r']] plt.subplots_adjust(hspace=.7) while not done: stepsize, epoch = initStep * 2.0 / (2.0 + epoch), epoch + 1 # update stepsize Jsurr_i = 0 # Do an SGD pass through the entire data set: for i in np.random.permutation(M): ri = np.dot(self.theta, XX[i, :]) gradi = (-YY[i] + sigma(ri)) * XX[i, :] + alpha * self.theta self.theta -= stepsize * gradi # take a gradient step Jsurr_i += ( -YY[i] * np.log(sigma(np.dot(self.theta, XX[i, :]))) - ((1 - YY[i]) * np.log(1 - sigma(np.dot(self.theta, XX[i, :]))))) J01.append(self.err(X, Y)) # evaluate the current error rate Jsur = Jsurr_i / M L2 = 0 if alpha: L2 = alpha * sum(list(map(lambda x: x * x, gradi))) Jsur += L2 Jnll.append(Jsur) ax1.plot(Jnll, 'b-', J01, 'r-') ax1.set_xlabel("Epoch") ax1.set_title("Convergence of Surrogate loss and Error rate") ax1.legend(recs, ["Surrogate loss", "Error rate"]) if N == 2: ax2.set_title("Convergence of classifier") self.plotBoundary(X, Y, ax2) plt.pause(.01) # let OS draw the plot ## For debugging: you may want to print current parameters & losses # print self.theta, ' => ', Jnll[-1], ' / ', J01[-1] # raw_input() # pause for keystroke done = (epoch > stopEpochs or epoch > 1 and abs(Jnll[-2] - Jnll[-1]) < stopTol) ax3.set_title("Final classifier") self.plotBoundary(X, Y, ax3)
def train(self, X, Y, initStep=1.0, stopTol=1e-4, stopEpochs=5000, plot=None): """ Train the logistic regression using stochastic gradient descent """ M, N = X.shape # initialize the model if necessary: self.classes = np.unique(Y) # Y may have two classes, any values XX = np.hstack((np.ones( (M, 1)), X)) # XX is X, but with an extra column of ones YY = ml.toIndex(Y, self.classes) # YY is Y, but with canonical values 0 or 1 if len(self.theta) != N + 1: self.theta = np.random.rand(N + 1) # init loop variables: epoch = 0 done = False Jnll = [] J01 = [] while not done: stepsize, epoch = initStep * 2.0 / (2.0 + epoch), epoch + 1 # update stepsize Jsur = 0 # Do an SGD pass through the entire data set: for i in np.random.permutation(M): alpha = 0.5 sigma = 1 / (1 + np.exp(-(XX[i].dot(np.transpose(self.theta))))) if (sigma == 1): sigma = 0.001 Jsur += (-(YY[i]) * (np.log(sigma)) - ((1 - YY[i]) * np.log(1 - sigma))) Jsur += ((alpha) * sum(self.theta**2)) gradient = (-YY[i] * XX[i]) + (sigma * XX[i]) gradient += ((2 * alpha) * (YY[i] * XX[i]) * (XX[i].dot(np.transpose(XX[i]) + alpha))**-1) self.theta -= stepsize * gradient J01.append(self.err(X, Y)) # evaluate the current error rate Jsur /= M Jnll.append(Jsur) plt.figure(1) plt.plot(Jnll, 'b-', J01, 'r-') plt.draw() # plot losses if N == 2: plt.figure(2) self.plotBoundary(X, Y) plt.draw() plt.pause(0.01) ## For debugging: you may want to print current parameters & losses print(self.theta, ' => ', Jnll[-1], ' / ', J01[-1]) if (epoch != 1): done = (epoch >= stopEpochs) or ( np.absolute(Jnll[-1] - Jnll[-2]) < stopTol)
def train(self, X, Y, initStep=1.0, stopTol=1e-4, stopEpochs=5000, plot=None): """ Train the logistic regression using stochastic gradient descent """ M, N = X.shape # initialize the model if necessary: self.classes = np.unique(Y) # Y may have two classes, any values XX = np.hstack((np.ones((M, 1)), X)) # XX is X, but with an extra column of ones YY = ml.toIndex(Y, self.classes) # YY is Y, but with canonical values 0 or 1 if len(self.theta) != N + 1: self.theta = np.random.rand(N + 1) # init loop variables: epoch = 0 done = False Jnll = [] J01 = [] while not done: stepsize, epoch = initStep * 2.0 / (2.0 + epoch), epoch + 1 # update stepsize # Do an SGD pass through the entire data set: for i in np.random.permutation(M): ri = sum( [self.theta[j] * XX[i][j] for j in range(N + 1)] ) #np.dot(XX[i], (self.theta))#NotImplementedError; # TODO: compute linear response r(x) sig_2 = self.sig(ri) gradi = [ ] #-YY[i] * (1-sig(r)) + (1-YY[i]*sig(r))#NotImplementedError; # TODO: compute gradient of NLL loss for j in range(N + 1): if YY[i] == 1: dji = -(1 - sig_2) * XX[i][j] gradi.append(dji) else: dji = (sig_2) * XX[i][j] gradi.append(dji) gradi = np.array(gradi) self.theta -= stepsize * gradi # take a gradient step J01.append(self.err(X, Y)) # evaluate the current error rate ## TODO: compute surrogate loss (logistic negative log-likelihood) #Jnll = sum_i [ (log (si) if yi==1 else (log(1-si))) ] #surrogate loss, plug in xx at i, yy at i, gives you ri #append Jsur in Jnll #sum all the n01 function and then divide it by n Jsur = 0 for i in range(M): ri = sum([self.theta[j] * XX[i][j] for j in range(N + 1)]) sig_2 = self.sig(ri) if YY[i] == 1: Jsur += -np.log(sig_2) else: Jsur += -np.log(1.0 - sig_2) Jnll.append(Jsur / M) ''' Jsur = np.mean(-1.* y * np.log(Jnll) - (1-y)*np.log(Jnll)) Jnll = sum_i [ (log (si) if yi==1 else (log(1-si))) ] np.mean(-1.*y * np.log(si) - (1-y)*np.log(si)) in which si == a random array for each y in something: if y == 1: -log Pr[y=1] else: -log Pr[y=0] add 2 theta to the gradient ''' # TODO evaluate the current NLL loss plt.figure(1) plt.plot(Jnll, 'b-', J01, 'r-') plt.draw() # plot losses if N == 2: plt.figure(2) self.plotBoundary(X, Y) plt.draw() # & predictor if 2D plt.pause(.01) # let OS draw the plot ## For debugging: you may want to print current parameters & losses # print self.theta, ' => ', Jsur[-1], ' / ', J01[-1] # input() # pause for keystroke # TODO check stopping criteria: exit if exceeded # of epochs ( > stopEpochs) if (epoch > stopEpochs) or (len(Jnll) > 2 and abs(Jnll[-1] - Jnll[-2]) < stopTol): done = True # or if Jnll not changing between epochs ( < stopTol ) done = epoch > stopEpochs
def train(self, X, Y, initStep=1.0, stopTol=1e-4, stopEpochs=5000, plot=None, plotname="", regularization=False, alpha=2): """ Train the logistic regression using stochastic gradient descent """ M, N = X.shape # initialize the model if necessary: self.classes = np.unique(Y) # Y may have two classes, any values XX = np.hstack((np.ones( (M, 1)), X)) # XX is X, but with an extra column of ones YY = ml.toIndex(Y, self.classes) # YY is Y, but with canonical values 0 or 1 if len(self.theta) != N + 1: self.theta = np.random.rand(N + 1) tmpname = plotname if regularization: tmpname += ' with regularization' # init loop variables: epoch = 0 done = False Jnll = [] J01 = [] while not done: stepsize, epoch = initStep * 2.0 / (2.0 + epoch), epoch + 1 # update stepsize # Do an SGD pass through the entire data set: ji = 0 for i in np.random.permutation(M): ri = np.dot(XX[i], self.theta) # TODO: compute linear response r(x) gradi = np.dot(XX[i], 1 / (1 + np.exp(-ri)) - YY[i]) # TODO: compute gradient of NLL loss self.theta -= stepsize * gradi # take a gradient step ji += (YY[i] * np.log(1 / (1 + np.exp(-ri))) + (1 - YY[i]) * np.log(1 - (1 / (1 + np.exp(-ri))))) J01.append(self.err(X, Y)) ## TODO: compute surrogate loss (logistic negative log-likelihood) ## Jsur = sum_i [ (log si) if yi==1 else (log(1-si)) ] if regularization: ji -= alpha * np.dot(self.theta, self.theta) Jsur = -ji / M Jnll.append(Jsur) # TODO evaluate the current NLL loss plt.figure(tmpname + ' covergence of surrogate loss and error rate') plt.title(tmpname + ' covergence of surrogate loss and error rate') plt.plot(Jnll, 'b-', J01, 'r-') plt.draw() # plot losses if N == 2: plt.figure(tmpname + ' final converged classifier with the data') plt.title(tmpname + ' final converged classifier with the data') self.plotBoundary(X, Y) plt.draw() # & predictor if 2D plt.pause(.01) # let OS draw the plot ## For debugging: you may want to print current parameters & losses #print(self.theta, ' => ', Jnll[-1], ' / ', J01[-1]) #raw_input() # pause for keystroke # input("Press Enter to continue...") #python 3 version of raw_input # TODO check stopping criteria: exit if exceeded # of epochs ( > stopEpochs) if epoch > stopEpochs: break # or if Jnll not changing between epochs ( < stopTol ) if len(Jnll) >= 2: done = abs(Jnll[-1] - Jnll[-2]) < stopTol if done: print(plotname + " stopped at epoch: ", epoch) #plt.show() else: done = False