Beispiel #1
0
    def sgd(self,
            batch_size=50,
            epsilon=0.01,
            epochs=1000):
            # epochs=10):      # Quickly check the performance of the model

        """ Mini-batch gradient descent on training data.

            batch_size: number of training examples between each weight update
            epsilon:    learning rate
            epochs:     the number of times to go through the entire training data
        """
        
        # Compute the number of training examples and number of mini-batches.
        N = min(len(self.trainX), len(self.trainY))
        num_batches = int(N/batch_size)

        # Variables to keep track of statistics
        loss_log      = []
        test_acc_log  = []
        train_acc_log = []

        timestamp = time.time()
        timestamp2 = time.time()

        predictions_not_shown = True
        
        # In each "epoch", the network is exposed to the entire training set.
        for t in range(epochs):

            # We will order the training data using a random permutation.
            permutation = np.random.permutation(N)
            
            # Evaluate the accuracy on 1000 samples from the training and test data
            test_acc_log.append( self.evaluate(self.testX, self.testY, 1000) )
            train_acc_log.append( self.evaluate(self.trainX, self.trainY, 1000))
            batch_loss = 0

            for k in range(num_batches):
                
                # Reset buffer containing updates
                # TODO
                # Make empty containers to store the sum value of partial derivatives in one batch. 
                # Based on page: 18, slide: l05-backpropagation
                dw_buffer = [i*0 for i in self.dw]
                db_buffer = [i*0 for i in self.db]
                
                # Mini-batch loop
                for i in range(batch_size):

                    # Select the next training example (x,y)
                    x = self.trainX[permutation[k*batch_size+i]]
                    y = self.trainY[permutation[k*batch_size+i]]

                    # Feed forward inputs
                    # TODO
                    self.backward(x, y)     # self.forward is included in self.backward
                    
                    # Compute gradients
                    # TODO
                    # To sum the partial derivatives for each parameter. 
                    # Based on page: 18, slide: l05-backpropagation
                    for l in range(self.L):
                        dw_buffer[l] += self.dw[l]
                        db_buffer[l] += self.db[l]

                    # Update loss log
                    batch_loss += self.loss(self.a[self.L-1], y)

                    for l in range(self.L):
                        self.batch_a[l] += self.a[l] / batch_size
                                    
                # Update the weights at the end of the mini-batch using gradient descent
                for l in range(1,self.L):
                    # Based on page: 18, slide: l05-backpropagation
                    # self.w[l] = # TODO
                    self.w[l] -= epsilon * (dw_buffer[l] / batch_size)
                    # self.b[l] = # TODO
                    self.b[l] -= epsilon * (db_buffer[l] / batch_size)
                
                # Update logs
                loss_log.append( batch_loss / batch_size )
                batch_loss = 0

                # Update plot of statistics every 10 seconds.
                if time.time() - timestamp > 10:
                    timestamp = time.time()
                    fnn_utils.plot_stats(self.batch_a,
                                         loss_log,
                                         test_acc_log,
                                         train_acc_log)

                # Display predictions every 20 seconds.
                if (time.time() - timestamp2 > 20) or predictions_not_shown:
                    predictions_not_shown = False
                    timestamp2 = time.time()
                    fnn_utils.display_predictions(self,show_pct=True)

                # Reset batch average
                for l in range(self.L):
                    self.batch_a[l].fill(0.0)
        
        # Save the graph automatically
        # fnn_utils.save_pic(epochs, epsilon, batch_size, self.network_shape)
        return test_acc_log, train_acc_log, loss_log
Beispiel #2
0
    def sgd(self, batch_size=50, epsilon=0.01, epochs=5):
        """ Mini-batch gradient descent on training data.

            batch_size: number of training examples between each weight update
            epsilon:    learning rate
            epochs:     the number of times to go through the entire training data
        """

        # Compute the number of training examples and number of mini-batches.
        N = min(len(self.trainX), len(self.trainY))
        num_batches = int(N / batch_size)

        # Variables to keep track of statistics
        loss_log = []
        test_acc_log = []
        train_acc_log = []

        timestamp = time.time()
        timestamp2 = time.time()

        predictions_not_shown = True

        # In each "epoch", the network is exposed to the entire training set.
        for t in range(epochs):

            # We will order the training data using a random permutation.
            permutation = np.random.permutation(N)

            # Evaluate the accuracy on 1000 samples from the training and test data
            test_acc_log.append(self.evaluate(self.testX, self.testY, 1000))
            train_acc_log.append(self.evaluate(self.trainX, self.trainY, 1000))
            batch_loss = 0

            for k in range(num_batches):

                # Reset buffer containing updates
                # TODO
                batch_dw = [np.zeros(w.shape) for w in self.w]
                batch_db = [np.zeros(b.shape) for b in self.b]

                # Mini-batch loop
                for i in range(batch_size):

                    # Select the next training example (x,y)
                    x = self.trainX[permutation[k * batch_size + i]]
                    y = self.trainY[permutation[k * batch_size + i]]

                    # Feed forward inputs
                    self.forward(x)

                    # Compute gradients
                    self.backward(x, y)

                    # Update loss log
                    batch_loss += self.loss(self.a[self.L - 1], y)

                    for l in range(self.L):
                        self.batch_a[l] += self.a[l] / batch_size
                        batch_db[l] += self.db[l]
                        batch_dw[l] += self.dw[l]

                # Update the weights at the end of the mini-batch using gradient descent
                for l in range(1, self.L):
                    self.w[l] = self.w[l] - epsilon * (batch_dw[l] /
                                                       batch_size)
                    self.b[l] = self.b[l] - epsilon * (batch_db[l] /
                                                       batch_size)

                # Update logs
                loss_log.append(batch_loss / batch_size)
                batch_loss = 0

                # Update plot of statistics every 10 seconds.
                if time.time() - timestamp > 10:
                    timestamp = time.time()
                    fnn_utils.plot_stats(self.batch_a, loss_log, test_acc_log,
                                         train_acc_log)

                # Display predictions every 20 seconds.
                if (time.time() - timestamp2 > 20) or predictions_not_shown:
                    predictions_not_shown = False
                    timestamp2 = time.time()
                    fnn_utils.display_predictions(self, show_pct=True)

                # Reset batch average
                for l in range(self.L):
                    self.batch_a[l].fill(0.0)
Beispiel #3
0
    def sgd(self, batch_size=50, epsilon=0.01, epochs=1000):
        """ Mini-batch gradient descent on training data.

            batch_size: number of training examples between each weight update
            epsilon:    learning rate
            epochs:     the number of times to go through the entire training data
        """

        # Overwrite
        batch_size = 5
        epsilon = 0.01
        epochs = 150

        # Compute the number of training examples and number of mini-batches.
        N = min(len(self.trainX), len(self.trainY))
        num_batches = int(N / batch_size)

        # Variables to keep track of statistics
        loss_log = []
        test_acc_log = []
        train_acc_log = []

        timestamp = time.time()
        timestamp2 = time.time()

        predictions_not_shown = True

        # In each "epoch", the network is exposed to the entire training set.
        for t in range(epochs):
            print("epoch ", t)
            # We will order the training data using a random permutation.

            permutation = np.random.permutation(N)
            # Evaluate the accuracy on 1000 samples from the training and test data
            test_acc_log.append(self.evaluate(self.testX, self.testY, 1000))
            train_acc_log.append(self.evaluate(self.trainX, self.trainY, 1000))
            batch_loss = 0

            print(test_acc_log[-1])

            for k in range(num_batches):
                # Reset buffer containing updates
                self.dw = [np.zeros((m1, m0)) for (m0, m1) in self.crossings]
                self.db = [np.zeros(m) for m in self.network_shape]
                self.delta = [np.zeros(m) for m in self.network_shape]
                self.z = [np.zeros(m) for m in self.network_shape]
                self.a = [np.zeros(m) for m in self.network_shape]

                # Mini-batch loop
                for i in range(batch_size):
                    # Select the next training example (x,y)
                    x = self.trainX[permutation[k * batch_size + i]]
                    y = self.trainY[permutation[k * batch_size + i]]

                    # Feed forward inputs
                    x_pred = self.predict(x)

                    # Compute gradients
                    self.backward(x_pred, y)

                    # Update losgis log
                    batch_loss += self.loss(self.a[self.L - 1], y)

                    for l in range(self.L):
                        self.batch_a[l] += self.a[l] / batch_size

                # Update the weights at the end of the mini-batch using gradient descent
                for l in range(1, self.L):
                    self.w[l] -= epsilon * self.dw[l]
                    self.b[l] -= epsilon * self.db[l]

                # Update logs
                loss_log.append(batch_loss / batch_size)
                batch_loss = 0

                # Update plot of statistics every 10 seconds.
                if time.time() - timestamp > 10:
                    timestamp = time.time()
                    fnn_utils.plot_stats(self.batch_a, loss_log, test_acc_log,
                                         train_acc_log)
                    with open("stats.txt", "a") as outfile:
                        outfile.write("Epoch: " + str(t) + "\nTime elapsed: " +
                                      str(time.time() - self.starttime) +
                                      " seconds" + "\nLoss: " +
                                      str(loss_log[-1]) + "\nTest Accuracy: " +
                                      str(test_acc_log[-1]) +
                                      "\nTrain Accuracy: " +
                                      str(train_acc_log[-1]) + "\n\n")

                # Display predictions every 20 seconds.
                if (time.time() - timestamp2 > 20) or predictions_not_shown:
                    predictions_not_shown = False
                    timestamp2 = time.time()
                    fnn_utils.display_predictions(self, show_pct=True)

                # Reset batch average
                for l in range(self.L):
                    self.batch_a[l].fill(0.0)