Beispiel #1
0
    def train(self,
              X,
              Y,
              initStep=1.,
              stopTol=1e-4,
              stopEpochs=5000,
              plot=None):
        """ Train the logistic regression using stochastic gradient descent """
        M, N = X.shape
        # initialize the model if necessary:
        self.classes = np.unique(Y)
        # Y may have two classes, any values
        XX = np.hstack((np.ones(
            (M, 1)), X))  # XX is X, but with an extra column of ones
        YY = ml.toIndex(Y, self.classes)
        # YY is Y, but with canonical values 0 or 1
        if len(self.theta) != N + 1: self.theta = np.random.rand(N + 1)
        # init loop variables:
        epoch = 0
        done = False
        Jnll = []
        J01 = []
        while not done:
            stepsize, epoch = initStep * 2.0 / (2.0 + epoch), epoch + 1
            # update stepsize
            # Do an SGD pass through the entire data set:
            for i in np.random.permutation(M):
                ri = XX[i].dot(
                    self.theta)  # TODO: compute linear response r(x)
                si = 1. / (1. + np.exp(-ri))
                gradi = -(1 - si) * XX[i, :] if YY[i] else si * XX[i, :]
                # TODO: compute gradient of NLL loss
                self.theta -= stepsize * gradi
                # take a gradient step

            J01.append(self.err(X, Y))  # evaluate the current error rate
            ## TODO: compute surrogate loss (logistic negative log-likelihood)
            ##  Jsur = sum_i [ (log si) if yi==1 else (log(1-si)) ]
            S = 1. / (1. + np.exp(-(XX.dot(self.theta))))
            Jsur = -np.mean(YY * np.log(S) + (1 - YY) * np.log(1 - S))
            Jnll.append(Jsur)  # TODO evaluate the current NLL loss
            ## For debugging: you may want to print current parameters & losses
            # print self.theta, ' => ', Jsur[-1], ' / ', J01[-1]
            # raw_input()   # pause for keystroke
            # TODO check stopping criteria: exit if exceeded # of epochs ( > stopEpochs)
            # or if Jnll not changing between epochs ( < stopTol )
            done = epoch >= stopEpochs or (epoch > 1 and
                                           abs(Jnll[-1] - Jnll[-2]) < stopTol)
        plt.figure(1)
        plt.clf()
        plt.plot(Jnll, 'b-', J01, 'r-')
        plt.draw()
        # plot losses
        if N == 2:
            plt.figure(2)
            plt.clf()
            self.plotBoundary(X, Y)
            plt.draw()
            # & predictor if 2D
        plt.pause(.01)
Beispiel #2
0
    def train(self, X, Y, initStep=1, stopTol=1e-4, stopEpochs=200, plot=None):
        """ Train the logistic regression using stochastic gradient descent """
        M,N = X.shape;                     # initialize the model if necessary:
        self.classes = np.unique(Y);       # Y may have two classes, any values
        XX = np.hstack(((np.ones((M,1))),X)); # XX is X, but with an extra column of ones
        YY = ml.toIndex(Y,self.classes);   # YY is Y, but with canonical values 0 or 1

##        print(XX)
##        print(YY)
        
        if len(self.theta)!=N+1:
            self.theta=np.random.rand(N+1);
            
        # init loop variables:
        epoch=0; done=False; Jnll=[]; J01=[];
        
        while not done:
            stepsize = (initStep*2.0)/(2.0+epoch)
            epoch = epoch+1; # update stepsize

            # Do an SGD pass through the entire data set:
            for i in np.random.permutation(M):
                ri    = XX[i].dot(self.theta.T);     # TODO: compute linear response r(x)
                sigmoid = 1/(1 + math.exp(-ri))
                gradi = XX[i].dot(sigmoid-YY[i])#XX[i].dot XX[i].dot(ri - YY[i]);     #NotImplementedError## TODO: compute gradient of NLL loss
                self.theta -= stepsize * gradi;  # take a gradient step

            J01.append( self.err(X,Y) )  # evaluate the current error rate 

            ## TODO: compute surrogate loss (logistic negative log-likelihood)
            ##  Jsur = sum_i [ (log si) if yi==1 else (log(1-si)) ] / M

            sigma = 1/(1 + np.exp(-(XX.dot(self.theta.T))))
            #print(sigma)
            Jsur = (-np.mean(YY * np.log(sigma) - (1-YY)*np.log(1-sigma)))
            #print(Jsur)
            Jnll.append( Jsur ) # TODO evaluate the current NLL loss
            plt.figure(1); plt.plot(Jnll,'b-',J01,'r-'); plt.draw();    # plot losses
            if N==2: plt.figure(2); self.plotBoundary(X,Y); plt.draw(); # & predictor if 2D
            plt.pause(.01);                    # let OS draw the plot

            #print(epoch)
            #plt.show()
            plt.gcf().clear()
            ## For debugging: you may want to print current parameters & losses
            #print (self.theta, ' => ', Jsur[-1], ' / ', J01[-1]  )
##            print(self.theta)
##            if (epoch > 2):
##                print(abs(Jnll[-2]-Jsur))
            # raw_input()   # pause for keystroke

            # TODO check stopping criteria: exit if exceeded # of epochs ( > stopEpochs)
            if (epoch > stopEpochs):#or abs(Jnll[-2] - Jnll[-1]) < stopTol):
                done = True;   # or if Jnll not changing between epochs ( < stopTol )
        plt.show()
Beispiel #3
0
    def trainL2(self, X, Y, initStep=1.0, stopTol=1e-4, stopEpochs=5000, plot=None, alpha=2):

        M, N = X.shape
        self.classes = np.unique(Y)
        XX = np.hstack((np.ones((M, 1)), X))
        YY = ml.toIndex(Y, self.classes)
        if len(self.theta) != N + 1:
            self.theta = np.random.rand(N + 1)
        # init loop variables:
        epoch = 0
        done = False
        Jnll = []
        J01 = []
        while not done:
            stepsize, epoch = initStep * 2.0 / (2.0 + epoch), epoch + 1
            # Do an SGD pass through the entire data set:
            for i in np.random.permutation(M):
                ri = XX[i, :].dot(self.theta)
                sigma = self.sigmoid(ri)
                gradi = (sigma - YY[i]) * XX[i, :] + alpha * 2 * self.theta
                self.theta -= stepsize * gradi

            J01.append(self.err(X, Y))

            J = np.zeros(M)
            for j in range(M):
                rj = XX[j, :].dot(self.theta)
                if YY[j] == 1:
                    J[j] = -YY[j] * np.log(self.sigmoid(rj))
                else:
                    J[j] = -(1 - YY[j]) * np.log(1 - self.sigmoid(rj))

            Jsur = np.mean(J) + self.theta.dot(self.theta) * alpha
            Jnll.append(Jsur)

            plt.figure(1)
            plt.plot(Jnll, 'b-', J01, 'r-')
            plt.draw()
            if N == 2: plt.figure(2); self.plotBoundary(X, Y);
            plt.draw();  # & predictor if 2D
            plt.pause(.01)

            ## For debugging: you may want to print current parameters & losses
            # print self.theta, ' => ', Jsur[-1], ' / ', J01[-1]
            # raw_input()   # pause for keystroke

            if epoch > stopEpochs:
                done = True
            if epoch > 2 and abs(Jnll[-1] - Jnll[-2]) < stopTol:  # or if Jnll not changing between epochs ( < stopTol )
                done = True
Beispiel #4
0
    def train(self, X, Y, initStep=1.0, stopTol=1e-4, stopEpochs=5000, plot=None):
        """ Train the logistic regression using stochastic gradient descent """
        M,N = X.shape                     # initialize the model if necessary:
        self.classes = np.unique(Y)       # Y may have two classes, any values
        XX = np.hstack((np.ones((M,1)),X))   # XX is X, but with an extra column of ones
        YY = ml.toIndex(Y,self.classes)  # YY is Y, but with canonical values 0 or 1
        if len(self.theta)!=N+1: self.theta=np.random.rand(N+1);
        # init loop variables:
        epoch=0; done=False; Jnll=[]; J01=[]; 
        while not done:
            stepsize, epoch = initStep*2.0/(2.0+epoch), epoch+1; # update stepsize
            # Do an SGD pass through the entire data set:
            for i in np.random.permutation(M):
                ri    = XX[i, :].dot(self.theta)
                sigma = self.sigmoid(ri)
                gradi = (sigma - YY[i]) * XX[i, :]
                self.theta -= stepsize * gradi;  # take a gradient step

            J01.append( self.err(X,Y) )  # evaluate the current error rate

            J = np.zeros(M)
            for j in range(M):
                rj = XX[j, :].dot(self.theta)
                if YY[j] == 1:
                    J[j] = -YY[j] * np.log(self.sigmoid(rj))
                else:
                    J[j] = -(1-YY[j]) * np.log(1-self.sigmoid(rj))

            Jnll.append(np.mean(J))

            plt.figure(1); plt.plot(Jnll,'b-',J01,'r-'); plt.draw();    # plot losses
            if N==2: plt.figure(2); self.plotBoundary(X,Y);
            plt.draw(); # & predictor if 2D
            plt.pause(.01);                    # let OS draw the plot

            ## For debugging: you may want to print current parameters & losses
            # print self.theta, ' => ', Jsur[-1], ' / ', J01[-1]  
            # raw_input()   # pause for keystroke

            if epoch > stopEpochs:
                done = True
            if epoch > 2 and abs(Jnll[-1] - Jnll[-2]) < stopTol:  # or if Jnll not changing between epochs ( < stopTol )
                done = True
    def train(self, X, Y, initStep=1.0, stopTol=1e-4, stopEpochs=5000, plot=None):
        """ Train the logistic regression using stochastic gradient descent """
        def sigmoid(z):
            return 1 / (1 + np.exp(-z))
        
        M,N = X.shape;                     # initialize the model if necessary:
        self.classes = np.unique(Y);       # Y may have two classes, any values
        XX = np.hstack((np.ones((M,1)),X)) # XX is X, but with an extra column of ones
        YY = ml.toIndex(Y,self.classes);   # YY is Y, but with canonical values 0 or 1
        if len(self.theta)!=N+1: self.theta=np.random.rand(N+1);
        # init loop variables:
        epoch=0; done=False; Jnll=[]; J01=[];
        while not done:
            stepsize, epoch = initStep*2.0/(2.0+epoch), epoch+1; # update stepsize
            # Do an SGD pass through the entire data set:
            for i in np.random.permutation(M):
                ri = sigmoid(self.theta.dot(XX[i]));     # TODO: compute linear response r(x)
                gradi = XX[i] * (ri - YY[i]);     # TODO: compute gradient of NLL loss
                self.theta -= stepsize * gradi;  # take a gradient step

            J01.append( self.err(X,Y) )  # evaluate the current error rate

            ## TODO: compute surrogate loss (logistic negative log-likelihood)
            ##  Jsur = sum_i [ (log si) if yi==1 else (log(1-si)) ]
            Jnll.append(np.sum(YY*np.log(sigmoid(XX.dot(self.theta))) - (1-YY)*np.log(1-sigmoid(XX.dot(self.theta))))) # TODO evaluate the current NLL loss
            if plot:
                plt.figure(1); plt.plot(Jnll,'b-',J01,'r-'); plt.draw();    # plot losses
                if N==2: plt.figure(2); self.plotBoundary(X,Y); plt.draw(); # & predictor if 2D
                plt.pause(.01);                    # let OS draw the plot

            ## For debugging: you may want to print current parameters & losses
            # print (self.theta, ' => ', Jnll[-1], ' / ', J01[-1])
            # input()   # pause for keystroke

            # TODO check stopping criteria: exit if exceeded # of epochs ( > stopEpochs)
            done = (epoch > stopEpochs) or (epoch >= 2 and (np.abs(Jnll[-1] - Jnll[-2]) < stopTol));   # or if Jnll not changing between epochs ( < stopTol )
        plt.figure(1); plt.plot(Jnll,'b-',J01,'r-'); plt.draw();  
        if N==2: plt.figure(2); self.plotBoundary(X,Y); plt.draw()
################################################################################
################################################################################
################################################################################
    def train(self,
              X,
              Y,
              initStep=1.0,
              stopTol=1e-4,
              stopEpochs=5000,
              plot=None):
        """ Train the logistic regression using stochastic gradient descent """
        from IPython import display
        M, N = X.shape
        # initialize the model if necessary:
        self.classes = np.unique(Y)
        # Y may have two classes, any values
        XX = np.hstack((np.ones(
            (M, 1)), X))  # XX is X, but with an extra column of ones
        YY = ml.toIndex(Y, self.classes)
        # YY is Y, but with canonical values 0 or 1
        if len(self.theta) != N + 1: self.theta = np.random.rand(N + 1)
        # init loop variables:
        epoch = 0
        done = False
        Jnll = []
        J01 = []
        while not done:
            stepsize, epoch = initStep * 2.0 / (2.0 + epoch), epoch + 1
            # update stepsize
            # Do an SGD pass through the entire data set:
            for i in np.random.permutation(M):
                ri = NotImplementedError
                # TODO: compute linear response r(x)
                si = NotImplementedError
                # TODO: compute logistic response sig(ri)
                gradi = NotImplementedError
                # TODO: compute gradient of NLL loss
                self.theta -= stepsize * gradi
                # take a gradient step

            J01.append(self.err(X, Y))  # evaluate the current error rate

            ## TODO: compute surrogate loss (logistic negative log-likelihood)
            ##  Jsur = sum_i [ (log si) if yi==1 else (log(1-si)) ]
            Jnll.append(
                NotImplementedError)  # TODO evaluate the current NLL loss
            display.clear_output(wait=True)
            # clear display if using jupyter
            plt.subplot(1, 2, 1)
            plt.cla()
            plt.plot(Jnll, 'b-', J01, 'r-')
            # plot losses
            if N == 2:
                plt.subplot(1, 2, 2)
                plt.cla()
                self.plotBoundary(X, Y)
                # & predictor if 2D
            plt.pause(.01)
            # let OS draw the plot

            ## For debugging: you may want to print current parameters & losses
            # print self.theta, ' => ', Jnll[-1], ' / ', J01[-1]
            # raw_input()   # pause for keystroke

            # TODO check stopping criteria: exit if exceeded # of epochs ( > stopEpochs)
            done = NotImplementedError
    def train(self,
              X,
              Y,
              initStep=1.0,
              stopTol=1e-4,
              stopEpochs=5000,
              plot=None):
        """ Train the logistic regression using stochastic gradient descent """
        M, N = X.shape
        # initialize the model if necessary:
        self.classes = np.unique(Y)
        # Y may have two classes, any values
        XX = np.hstack((np.ones(
            (M, 1)), X))  # XX is X, but with an extra column of ones
        YY = ml.toIndex(Y, self.classes)
        # YY is Y, but with canonical values 0 or 1
        if len(self.theta) != N + 1: self.theta = np.random.rand(N + 1)
        # init loop variables:
        epoch = 0
        done = False
        Jnll = []
        J01 = []
        while not done:
            stepsize, epoch = initStep * 2.0 / (2.0 + epoch), epoch + 1
            # update stepsize
            # Do an SGD pass through the entire data set:
            Jsur = 0
            for i in np.random.permutation(M):
                ri = (self.theta[0] * XX[i, 0]) + (
                    self.theta[1] * XX[i, 1]) + (self.theta[2] * XX[i, 2])
                # TODO: compute linear response r(x)
                gradi = np.array([
                    XX[i, 0] * ((1 - YY[i]) * (self.sigmoid(ri)) - (YY[i]) *
                                (1 - self.sigmoid(ri))),
                    XX[i, 1] * ((1 - YY[i]) * (self.sigmoid(ri)) - (YY[i]) *
                                (1 - self.sigmoid(ri))),
                    XX[i, 2] * ((1 - YY[i]) * (self.sigmoid(ri)) - (YY[i]) *
                                (1 - self.sigmoid(ri)))
                ])
                # TODO: compute gradient of NLL loss
                self.theta -= stepsize * gradi
                # take a gradient step
                if (YY[i] == 1):
                    Jsur = np.add(Jsur, np.log10(self.sigmoid(ri)))
                else:
                    Jsur = np.add(Jsur, 1 - np.log10(1 - self.sigmoid(ri)))

            J01.append(self.err(X, Y))  # evaluate the current error rate

            ## TODO: compute surrogate loss (logistic negative log-likelihood)
            ##  Jsur = sum_i [ (log si) if yi==1 else (log(1-si)) ]
            Jsur = Jsur / M
            Jnll.append(Jsur)  # TODO evaluate the current NLL loss
            plt.figure(1)
            plt.plot(Jnll, 'b-', J01, 'r-')
            plt.draw()
            # plot losses
            if N == 2:
                plt.figure(2)
                self.plotBoundary(X, Y)
                plt.draw()
                # & predictor if 2D
            plt.pause(.01)
            # let OS draw the plot

            ## For debugging: you may want to print current parameters & losses
            # print self.theta, ' => ', Jnll[-1], ' / ', J01[-1]
            # raw_input()   # pause for keystroke

            # TODO check stopping criteria: exit if exceeded # of epochs ( > stopEpochs)
            # or if Jnll not changing between epochs ( < stopTol )
            if epoch > stopEpochs:
                done = True
            if len(Jnll) > 2:
                if abs(Jnll[-1] - Jnll[-2]) < stopTol:
                    done = True
    def train(self,
              X,
              Y,
              initStep=1.0,
              stopTol=1e-4,
              stopEpochs=5000,
              plot=None,
              alpha=0):
        """ Train the logistic regression using stochastic gradient descent """
        M, N = X.shape
        # initialize the model if necessary:
        self.classes = np.unique(Y)
        # Y may have two classes, any values
        XX = np.hstack((np.ones(
            (M, 1)), X))  # XX is X, but with an extra column of ones
        YY = ml.toIndex(Y, self.classes)
        # YY is Y, but with canonical values 0 or 1
        if len(self.theta) != N + 1: self.theta = np.random.rand(N + 1)
        # init loop variables:
        epoch = 0
        done = False
        Jnll = []
        J01 = []
        while not done:
            stepsize, epoch = initStep * 2.0 / (2.0 + epoch), epoch + 1
            # update stepsize
            # Do an SGD pass through the entire data set:
            for i in np.random.permutation(M):
                ri = self.sigmoid(np.dot(XX[i], self.theta))
                # TODO: compute linear response r(x)
                gradi = -(YY[i] - ri) * XX[i] + alpha * self.theta
                # TODO: compute gradient of NLL loss
                self.theta -= stepsize * gradi
                # take a gradient step

            J01.append(self.err(X, Y))  # evaluate the current error rate

            ## TODO: compute surrogate loss (logistic negative log-likelihood)
            ##  Jsur = sum_i [ (log si) if yi==1 else (log(1-si)) ]
            Jsur = 0
            for i in np.random.permutation(M):
                ri = self.sigmoid(np.dot(XX[i], self.theta))
                Jsur += np.log(ri + 1e-4) if YY[i] == 1 else np.log(1 - ri +
                                                                    1e-4)

            Jnll.append(-Jsur / M)  # TODO evaluate the current NLL loss

            ## For debugging: you may want to print current parameters & losses

            # raw_input()   # pause for keystroke
            #
            if epoch > 1:
                if epoch > stopEpochs or np.abs(Jnll[-2] - Jnll[-1]) < stopTol:
                    done = True
        # plot when training is over
        plt.figure(1)
        plt.plot(Jnll, 'b-', J01, 'r-')
        plt.draw()
        # plot losses
        if N == 2:
            plt.figure(2)
            self.plotBoundary(X, Y)
            plt.draw()
            # & predictor if 2D
        plt.pause(.01)
        # let OS draw the plot

        print(self.theta, ' => ', Jnll[-1], ' / ', J01[-1])
Beispiel #9
0
    def train(self,
              X,
              Y,
              initStep=1.0,
              stopTol=1e-4,
              stopEpochs=5000,
              plot=None):
        """ Train the logistic regression using stochastic gradient descent """

        M, N = X.shape
        # initialize the model if necessary:
        self.classes = np.unique(Y)
        # Y may have two classes, any values
        XX = np.hstack((np.ones(
            (M, 1)), X))  # XX is X, but with an extra column of ones
        YY = ml.toIndex(Y, self.classes)
        # YY is Y, but with canonical values 0 or 1
        if len(self.theta) != N + 1: self.theta = np.random.rand(N + 1)
        # init loop variables:
        epoch = 0
        done = False
        Jnll = []
        J01 = []
        while not done:
            stepsize = initStep * 2.0 / (2.0 + epoch),
            epoch += 1
            # update stepsize

            # Do an SGD pass through the entire data set:
            for i in np.random.permutation(M):
                ri = self.theta[0] + self.theta[1] * X[
                    i, 0] + self.theta[2] * X[i, 1]

                si = self.sig(ri)
                theta1Grad = -YY[i] * (1 - si) + (1 - YY[i]) * si
                theta2Grad = -YY[i] * (1 - si) * X[i, 0] + (
                    1 - YY[i]) * si * X[i, 0]
                theta3Grad = -YY[i] * (1 - si) * X[i, 1] + (
                    1 - YY[i]) * si * X[i, 1]
                gradi = np.array([theta1Grad, theta2Grad, theta3Grad])

                self.theta -= stepsize * gradi
                # take a gradient step

            J01.append(self.err(X, Y))  # evaluate the current error rate

            ## TODO: compute surrogate loss (logistic negative log-likelihood)
            ##  Jsur = sum_i [ (log si) if yi==1 else (log(1-si)) ]

            Jsur = np.sum([np.log(si) if YY[i] == 1 else np.log(1 - si)])
            Jnll.append(Jsur / M)

            #plt.figure(1); plt.plot(Jnll,'b-',J01,'r-'); plt.draw();    # plot losses
            #if N==2: plt.figure(2); self.plotBoundary(X,Y); plt.draw(); # & predictor if 2D
            #plt.pause(.01);                    # let OS draw the plot

            ## For debugging: you may want to print current parameters & losses
            # print self.theta, ' => ', Jsur[-1], ' / ', J01[-1]
            # raw_input()   # pause for keystroke

            # TODO check stopping criteria: exit if exceeded # of epochs ( > stopEpochs)
            if (epoch > stopEpochs):
                done = True
            if (epoch > 1 and abs(Jnll[-2] - Jnll[-1]) < stopTol):
                done = True

        plt.figure(1)
        plt.plot(Jnll, 'b-', J01, 'r-')
        plt.draw()
        # plot losses
        if N == 2:
            plt.figure(2)
            self.plotBoundary(X, Y)
            plt.draw()
            # & predictor if 2D
        plt.pause(.01)
Beispiel #10
0
    def train(self,
              X,
              Y,
              initStep=1.0,
              stopTol=1e-4,
              stopEpochs=5000,
              plot=None):
        """ Train the logistic regression using stochastic gradient descent """
        M, N = X.shape
        # initialize the model if necessary:
        self.classes = np.unique(Y)
        # Y may have two classes, any values
        XX = np.hstack((np.ones(
            (M, 1)), X))  # XX is X, but with an extra column of ones
        YY = ml.toIndex(Y, self.classes)
        # YY is Y, but with canonical values 0 or 1
        if len(self.theta) != N + 1: self.theta = np.random.rand(N + 1)
        # init loop variables:
        epoch = 0
        done = False
        Jnll = []
        J01 = []
        while not done:
            stepsize, epoch = initStep * 2.0 / (2.0 + epoch), epoch + 1
            # update stepsize
            # Do an SGD pass through the entire data set:
            for i in np.random.permutation(M):
                ri = XX[i, :].dot(self.theta)
                # TODO: compute linear response r(x)

                gradi = self.compGrad(XX[i, :], YY[i], ri)
                # TODO: compute gradient of NLL loss
                gradi = gradi.reshape(3, 1)
                self.theta -= stepsize * gradi
                # take a gradient step

            J01.append(self.err(X, Y))
            # evaluate the current error rate

            ## TODO: compute surrogate loss (logistic negative log-likelihood)
            i = 0
            sum = 0
            for i in range(len(YY)):
                sum += YY[i] * np.log(self.act(
                    XX[i, :])) + (1 - YY[i]) * np.log(1 - self.act(XX[i, :]))
            Jsur = sum[0] / len(YY)

            Jnll.append(-Jsur)
            # TODO evaluate the current NLL loss
            #plt.figure(); plt.plot(Jnll,'b-', J01, 'r-');
            #            plt.figure(1); plt.plot(Jnll,'b-',J01,'r-'); plt.draw();    # plot losses
            #            if N==2: plt.figure(2); self.plotBoundary(X,Y); plt.draw(); # & predictor if 2D
            plt.pause(.01)
            # let OS draw the plot

            ## For debugging: you may want to print current parameters & losses
            # print self.theta, ' => ', Jsur[-1], ' / ', J01[-1]
            # raw_input()   # pause for keystroke

            # TODO check stopping criteria: exit if exceeded # of epochs ( > stopEpochs)
            done = (epoch > stopEpochs)  #or (Jsur < stopTol);
            #done = NotImplementedError;   # or if Jnll not changing between epochs ( < stopTol )
        plt.figure()
        plt.plot(Jnll, 'b-', J01, 'r-')
        plt.figure()
        self.plotBoundary(X, Y)
Beispiel #11
0
    def train(self,
              X,
              Y,
              init_step=1,
              min_change=1e-4,
              iteration_limit=5000,
              plot=None):
        """ Train the logistic regression using stochastic gradient descent """
        # preparation
        data_point = X.shape[0]
        if Y.shape[0] != data_point:
            raise ValueError("Y must have the same number of data (rows) as X")
        self.classes = np.unique(Y)
        if len(self.classes) != 2:
            raise ValueError(
                "Y should have exactly two classes (binary problem expected)")
        features = np.concatenate((np.ones((data_point, 1)), X), axis=1)
        targets = ml.toIndex(Y, self.classes)
        if self.theta.shape[0] != features.shape[1]:
            self.theta = np.random.rand(features.shape[1])

        # training
        negative_log_likely = []
        error = []
        for i in range(0, iteration_limit):
            step = self.step_k * init_step / (self.step_k + i)

            # gradient decent
            for j in range(0, data_point):
                sigma = 1 / (1 + np.exp(-np.dot(features[j], self.theta)))
                # NIL = -avg(y*log(sigma)+(1-y)log(1-sigma))
                # gradient = -avg(sigma-y)*x
                # L2 regularization should not be enabled since there is no higher order polynomials
                gradient = -(
                    sigma - targets[j]) * features[j] - self.alpha * self.theta
                self.theta += step * gradient

            # record current error rate and surrogate loss
            error.append(self.err(X, Y))
            sigma = 1 / (1 + np.exp(-(np.dot(features, self.theta))))
            negative_log_likely.append(
                -np.mean(targets * np.log(sigma) +
                         (1 - targets) * np.log(1 - sigma)))

            # plot
            # TODO: this clear-and-re-plot method is slow, consider using dynamic update instead
            if plot:
                plt.figure(plot, (15, 7))
                plt.clf()
                plt.subplot(121)
                plt.plot(negative_log_likely, 'b-')
                plt.title('surrogate loss (logistic NLL)')
                plt.subplot(122)
                plt.plot(error, 'r-')
                plt.title('error rate')
                plt.draw()
                plt.pause(.01)

            # abort if there is no significant change in surrogate loss
            if (i > 1) and (abs(negative_log_likely[-1] -
                                negative_log_likely[-2]) < min_change):
                break

        if self.report:
            print '\n--- training report ---'
            print 'parameters:'
            print '\talpha (L2 Regu): %.1f\n\tstep constant: %.1f' % (
                self.alpha, self.step_k)
            print '\tinitial step size: %.1f\n\tminimum change: %.1e' % (
                init_step, min_change)
            print 'iteration took: %d' % len(error)
            print 'min/final error: %.2f%%/%.2f%%' % (min(error) * 100,
                                                      self.err(X, Y) * 100)
            print 'min/final sur. lost: %.3f/%.3f' % (min(negative_log_likely),
                                                      negative_log_likely[-1])
            print '----------------------'
Beispiel #12
0
    def train(self,
              X,
              Y,
              initStep=1.0,
              stopTol=1e-4,
              stopEpochs=5000,
              plot=None):
        """ Train the logistic regression using stochastic gradient descent """
        M, N = X.shape
        # initialize the model if necessary:
        self.classes = np.unique(Y)
        # Y may have two classes, any values
        XX = np.hstack((np.ones(
            (M, 1)), X))  # XX is X, but with an extra column of ones
        YY = ml.toIndex(Y, self.classes)
        # YY is Y, but with canonical values 0 or 1
        if len(self.theta) != N + 1: self.theta = np.random.rand(N + 1)
        # init loop variables:
        epoch = 0
        done = False
        Jnll = []
        J01 = []
        while not done:
            stepsize, epoch = initStep * 2.0 / (2.0 + epoch), epoch + 1
            # update stepsize
            # Do an SGD pass through the entire data set:
            for i in np.random.permutation(M):
                ri = np.dot(self.theta, XX[i])
                # TODO: compute linear response r(x)
                gradi = -YY[i] * (1 - self.sig(ri)) * (XX[i]) - (1 - YY[i]) * (
                    -self.sig(ri)) * XX[i]
                # TODO: compute gradient of NLL loss
                self.theta -= stepsize * gradi
                # take a gradient step

            J01.append(self.err(X, Y))  # evaluate the current error rate

            ## TODO: compute surrogate loss (logistic negative log-likelihood)
            ##  Jsur = sum_i [ (log si) if yi==1 else (log(1-si)) ]

            Jsur = 0
            for i in np.random.permutation(M):
                Jsur += -YY[i] * np.log(self.sig(np.dot(
                    self.theta, XX[i]))) - (1 - YY[i]) * np.log(
                        1 - self.sig(np.dot(self.theta, XX[i])))

            Jsur = Jsur / M
            #Jnll.append( NotImplementedError ) # TODO evaluate the current NLL loss
            if stopEpochs <= epoch or (len(Jnll) >= 2 and
                                       np.abs(Jnll[-1] - Jnll[-2]) < stopTol):
                done = True
                plt.figure(1)
                plt.plot(Jnll, 'b-', J01, 'r-')
                plt.draw()
                # plot losses
                if N == 2:
                    plt.figure(2)
                    self.plotBoundary(X, Y)
                    # & predictor if 2D
                plt.pause(.01)
                # let OS draw the plot
            else:
                done = False
Beispiel #13
0
    def train(self,
              X,
              Y,
              initStep=1.0,
              stopTol=1e-4,
              stopEpochs=5000,
              plot=None,
              alpha=0):
        """ Train the logistic regression using stochastic gradient descent """
        M, N = X.shape
        # initialize the model if necessary:
        self.classes = np.unique(Y)
        # Y may have two classes, any values
        XX = np.hstack((np.ones(
            (M, 1)), X))  # XX is X, but with an extra column of ones
        YY = ml.toIndex(Y, self.classes)
        # YY is Y, but with canonical values 0 or 1
        if len(self.theta) != N + 1: self.theta = np.random.rand(N + 1)
        sigma = lambda r: 1 / (1 + np.exp(-r))
        # init loop variables:
        epoch = 0
        done = False
        Jnll = [float('inf')]
        J01 = [float('inf')]
        fig, [ax1, ax2, ax3] = plt.subplots(nrows=3, ncols=1, figsize=(10, 10))
        recs = [mpatches.Rectangle((0, 0), 1, 1, fc=c) for c in ['b', 'r']]
        plt.subplots_adjust(hspace=.7)
        while not done:
            stepsize, epoch = initStep * 2.0 / (2.0 + epoch), epoch + 1
            # update stepsize
            Jsurr_i = 0
            # Do an SGD pass through the entire data set:
            for i in np.random.permutation(M):
                ri = np.dot(self.theta, XX[i, :])
                gradi = (-YY[i] + sigma(ri)) * XX[i, :] + alpha * self.theta
                self.theta -= stepsize * gradi
                # take a gradient step
                Jsurr_i += (
                    -YY[i] * np.log(sigma(np.dot(self.theta, XX[i, :]))) -
                    ((1 - YY[i]) *
                     np.log(1 - sigma(np.dot(self.theta, XX[i, :])))))

            J01.append(self.err(X, Y))  # evaluate the current error rate

            Jsur = Jsurr_i / M
            L2 = 0
            if alpha:
                L2 = alpha * sum(list(map(lambda x: x * x, gradi)))

            Jsur += L2
            Jnll.append(Jsur)
            ax1.plot(Jnll, 'b-', J01, 'r-')
            ax1.set_xlabel("Epoch")
            ax1.set_title("Convergence of Surrogate loss and Error rate")
            ax1.legend(recs, ["Surrogate loss", "Error rate"])
            if N == 2:
                ax2.set_title("Convergence of classifier")
                self.plotBoundary(X, Y, ax2)
            plt.pause(.01)
            # let OS draw the plot

            ## For debugging: you may want to print current parameters & losses
            # print self.theta, ' => ', Jnll[-1], ' / ', J01[-1]
            # raw_input()   # pause for keystroke

            done = (epoch > stopEpochs
                    or epoch > 1 and abs(Jnll[-2] - Jnll[-1]) < stopTol)

        ax3.set_title("Final classifier")
        self.plotBoundary(X, Y, ax3)
Beispiel #14
0
    def train(self,
              X,
              Y,
              initStep=1.0,
              stopTol=1e-4,
              stopEpochs=5000,
              plot=None):
        """ Train the logistic regression using stochastic gradient descent """
        M, N = X.shape
        # initialize the model if necessary:
        self.classes = np.unique(Y)
        # Y may have two classes, any values
        XX = np.hstack((np.ones(
            (M, 1)), X))  # XX is X, but with an extra column of ones
        YY = ml.toIndex(Y, self.classes)
        # YY is Y, but with canonical values 0 or 1

        if len(self.theta) != N + 1:
            self.theta = np.random.rand(N + 1)

        # init loop variables:
        epoch = 0
        done = False
        Jnll = []
        J01 = []
        while not done:
            stepsize, epoch = initStep * 2.0 / (2.0 + epoch), epoch + 1
            # update stepsize
            Jsur = 0

            # Do an SGD pass through the entire data set:
            for i in np.random.permutation(M):
                alpha = 0.5
                sigma = 1 / (1 +
                             np.exp(-(XX[i].dot(np.transpose(self.theta)))))
                if (sigma == 1):
                    sigma = 0.001

                Jsur += (-(YY[i]) * (np.log(sigma)) -
                         ((1 - YY[i]) * np.log(1 - sigma)))
                Jsur += ((alpha) * sum(self.theta**2))

                gradient = (-YY[i] * XX[i]) + (sigma * XX[i])
                gradient += ((2 * alpha) * (YY[i] * XX[i]) *
                             (XX[i].dot(np.transpose(XX[i]) + alpha))**-1)
                self.theta -= stepsize * gradient

            J01.append(self.err(X, Y))  # evaluate the current error rate
            Jsur /= M
            Jnll.append(Jsur)

            plt.figure(1)
            plt.plot(Jnll, 'b-', J01, 'r-')
            plt.draw()
            # plot losses
            if N == 2:
                plt.figure(2)
                self.plotBoundary(X, Y)
                plt.draw()
            plt.pause(0.01)

            ## For debugging: you may want to print current parameters & losses
            print(self.theta, ' => ', Jnll[-1], ' / ', J01[-1])

            if (epoch != 1):
                done = (epoch >= stopEpochs) or (
                    np.absolute(Jnll[-1] - Jnll[-2]) < stopTol)
Beispiel #15
0
    def train(self,
              X,
              Y,
              initStep=1.0,
              stopTol=1e-4,
              stopEpochs=5000,
              plot=None):
        """ Train the logistic regression using stochastic gradient descent """
        M, N = X.shape
        # initialize the model if necessary:
        self.classes = np.unique(Y)
        # Y may have two classes, any values
        XX = np.hstack((np.ones((M, 1)), X))
        # XX is X, but with an extra column of ones
        YY = ml.toIndex(Y, self.classes)
        # YY is Y, but with canonical values 0 or 1
        if len(self.theta) != N + 1: self.theta = np.random.rand(N + 1)
        # init loop variables:
        epoch = 0
        done = False
        Jnll = []
        J01 = []
        while not done:
            stepsize, epoch = initStep * 2.0 / (2.0 + epoch), epoch + 1
            # update stepsize
            # Do an SGD pass through the entire data set:
            for i in np.random.permutation(M):
                ri = sum(
                    [self.theta[j] * XX[i][j] for j in range(N + 1)]
                )  #np.dot(XX[i], (self.theta))#NotImplementedError;     # TODO: compute linear response r(x)
                sig_2 = self.sig(ri)
                gradi = [
                ]  #-YY[i] * (1-sig(r)) + (1-YY[i]*sig(r))#NotImplementedError;     # TODO: compute gradient of NLL loss
                for j in range(N + 1):
                    if YY[i] == 1:
                        dji = -(1 - sig_2) * XX[i][j]
                        gradi.append(dji)
                    else:
                        dji = (sig_2) * XX[i][j]
                        gradi.append(dji)
                gradi = np.array(gradi)
                self.theta -= stepsize * gradi
                # take a gradient step
            J01.append(self.err(X, Y))  # evaluate the current error rate

            ## TODO: compute surrogate loss (logistic negative log-likelihood)
            #Jnll = sum_i [ (log (si) if yi==1 else (log(1-si))) ] #surrogate loss, plug in xx at i, yy at i, gives you ri
            #append Jsur in Jnll
            #sum all the n01 function and then divide it by n

            Jsur = 0
            for i in range(M):
                ri = sum([self.theta[j] * XX[i][j] for j in range(N + 1)])
                sig_2 = self.sig(ri)
                if YY[i] == 1:
                    Jsur += -np.log(sig_2)
                else:
                    Jsur += -np.log(1.0 - sig_2)
            Jnll.append(Jsur / M)
            '''
            Jsur = np.mean(-1.* y * np.log(Jnll) - (1-y)*np.log(Jnll))
            Jnll = sum_i [ (log (si) if yi==1 else (log(1-si))) ]

            np.mean(-1.*y * np.log(si) - (1-y)*np.log(si))

            in which si == a random array
            
            for each y in something:
                if y == 1:
                    -log Pr[y=1]
                else:
                    -log Pr[y=0]

            add 2 theta to the gradient
            '''
            # TODO evaluate the current NLL loss
            plt.figure(1)
            plt.plot(Jnll, 'b-', J01, 'r-')
            plt.draw()
            # plot losses
            if N == 2:
                plt.figure(2)
                self.plotBoundary(X, Y)
                plt.draw()
                # & predictor if 2D
            plt.pause(.01)
            # let OS draw the plot

            ## For debugging: you may want to print current parameters & losses
            # print self.theta, ' => ', Jsur[-1], ' / ', J01[-1]
            # input()   # pause for keystroke

            # TODO check stopping criteria: exit if exceeded # of epochs ( > stopEpochs)
            if (epoch > stopEpochs) or (len(Jnll) > 2 and
                                        abs(Jnll[-1] - Jnll[-2]) < stopTol):
                done = True  # or if Jnll not changing between epochs ( < stopTol )
            done = epoch > stopEpochs
Beispiel #16
0
    def train(self,
              X,
              Y,
              initStep=1.0,
              stopTol=1e-4,
              stopEpochs=5000,
              plot=None,
              plotname="",
              regularization=False,
              alpha=2):
        """ Train the logistic regression using stochastic gradient descent """
        M, N = X.shape
        # initialize the model if necessary:
        self.classes = np.unique(Y)
        # Y may have two classes, any values
        XX = np.hstack((np.ones(
            (M, 1)), X))  # XX is X, but with an extra column of ones
        YY = ml.toIndex(Y, self.classes)
        # YY is Y, but with canonical values 0 or 1
        if len(self.theta) != N + 1: self.theta = np.random.rand(N + 1)
        tmpname = plotname
        if regularization: tmpname += ' with regularization'
        # init loop variables:
        epoch = 0
        done = False
        Jnll = []
        J01 = []
        while not done:
            stepsize, epoch = initStep * 2.0 / (2.0 + epoch), epoch + 1
            # update stepsize
            # Do an SGD pass through the entire data set:
            ji = 0
            for i in np.random.permutation(M):
                ri = np.dot(XX[i], self.theta)
                # TODO: compute linear response r(x)
                gradi = np.dot(XX[i], 1 / (1 + np.exp(-ri)) - YY[i])
                # TODO: compute gradient of NLL loss
                self.theta -= stepsize * gradi
                # take a gradient step
                ji += (YY[i] * np.log(1 / (1 + np.exp(-ri))) +
                       (1 - YY[i]) * np.log(1 - (1 / (1 + np.exp(-ri)))))

            J01.append(self.err(X, Y))
            ## TODO: compute surrogate loss (logistic negative log-likelihood)
            ##  Jsur = sum_i [ (log si) if yi==1 else (log(1-si)) ]
            if regularization:
                ji -= alpha * np.dot(self.theta, self.theta)
            Jsur = -ji / M
            Jnll.append(Jsur)  # TODO evaluate the current NLL loss
            plt.figure(tmpname +
                       ' covergence of surrogate loss and error rate')
            plt.title(tmpname + ' covergence of surrogate loss and error rate')
            plt.plot(Jnll, 'b-', J01, 'r-')
            plt.draw()
            # plot losses
            if N == 2:
                plt.figure(tmpname +
                           ' final converged classifier with the data')
                plt.title(tmpname +
                          ' final converged classifier with the data')
                self.plotBoundary(X, Y)
                plt.draw()
                # & predictor if 2D
            plt.pause(.01)
            # let OS draw the plot

            ## For debugging: you may want to print current parameters & losses
            #print(self.theta, ' => ', Jnll[-1], ' / ', J01[-1])
            #raw_input()   # pause for keystroke
            # input("Press Enter to continue...") #python 3 version of raw_input

            # TODO check stopping criteria: exit if exceeded # of epochs ( > stopEpochs)
            if epoch > stopEpochs: break
            # or if Jnll not changing between epochs ( < stopTol )
            if len(Jnll) >= 2:
                done = abs(Jnll[-1] - Jnll[-2]) < stopTol
                if done:
                    print(plotname + " stopped at epoch: ", epoch)
                    #plt.show()
            else:
                done = False