Esempio n. 1
0
    def SGD(self, train_X, train_y, test_X, test_y):
        trainLossVec = []
        testAccVec = []
        testLossVec = []
        trainAccVec = []
        # Epoch loop
        for i in range(1, EPOCHS + 1):
            self.correct = 0
            self.used = 0
            # Batch loop
            for j in range(
                    1,
                    int(len(train_X) * USED_SHARE_OF_DATA / BATCH_SIZE) + 1):
                dw, db = self.stochasticGradient(train_X, train_y,
                                                 trainLossVec)
                self.used += BATCH_SIZE
                """print("\n", self.weights[1][5][5], "\n")
                input()"""
                # Layer loop
                for l in range(1, self.L):
                    self.weights[l - 1] -= self.lr * dw[l - 1]
                    self.biases[l - 1] -= self.lr * db[l - 1]
                hf.progress_bar(
                    j, int(len(train_X) * USED_SHARE_OF_DATA / BATCH_SIZE),
                    EPOCHS, i, round(100 * self.correct / (self.used), 4))
                testAcc, testLoss = hf.test(self, 50, test_X, test_y)
                testAccVec.append(testAcc)
                testLossVec.append(testLoss)
                trainAcc = self.correct / self.used
                trainAccVec.append(trainAcc)

            self.updateLearningRate(i)

        #loss = hf.averageLoss(self.lossFuncValues)
        return trainAccVec, trainLossVec, testAccVec, testLossVec
Esempio n. 2
0
 def updateLearningRate(self, epoch):
     if self.sc == "exponentialDecay":
         self.lr = hf.exponentialDecay(self.initialLR, self.dr, epoch)
     elif self.sc == "polynomialDecay":
         self.lr = hf.polynomialDecay(self.initialLR, END_LEARNING_RATE,
                                      epoch, EPOCHS)
     elif self.sc == "inverseTimeDecay":
         self.lr = hf.inverseTimeDecay(self.initialLR, self.dr, epoch)
     elif self.sc == "piecewiseConstantDecay":
         self.lr = hf.piecewiseConstantDecay(self.initialLR, epoch)
     else:
         pass
Esempio n. 3
0
    def SGDADAM(self, train_X, train_y, test_X, test_y):
        trainLossVec = []
        testAccVec = []
        testLossVec = []
        trainAccVec = []
        beta1 = 0.9
        beta2 = 0.999
        epsilon = 1e-8
        alpha = self.lr
        t = 1
        Vdw = [0] * (self.L - 1)
        Sdw = [0] * (self.L - 1)
        Vdb = [0] * (self.L - 1)
        Sdb = [0] * (self.L - 1)
        # Epoch loop
        for i in range(1, EPOCHS + 1):
            self.correct = 0
            self.used = 0
            # Minibatch loop
            for j in range(
                    1,
                    int(len(train_X) * USED_SHARE_OF_DATA / BATCH_SIZE + 1)):
                dw, db = self.stochasticGradient(train_X, train_y,
                                                 trainLossVec)
                self.used += BATCH_SIZE
                """print("\n", self.weights[1][5][5], "\n")
                input()"""
                # Layer loop
                for l in range(1, self.L):
                    # Update first and second moments
                    Vdw[l - 1] = beta1 * Vdw[l - 1] + (1 - beta1) * dw[l - 1]
                    Vdb[l - 1] = beta1 * Vdb[l - 1] + (1 - beta1) * db[l - 1]
                    Sdw[l - 1] = beta2 * Sdw[l - 1] + (1 - beta2) * (np.square(
                        dw[l - 1]))
                    Sdb[l - 1] = beta2 * Sdb[l - 1] + (1 - beta2) * (np.square(
                        db[l - 1]))
                    # Get corrected values
                    Vdwcor = Vdw[l - 1] / (1 - beta1**t)
                    Vdbcor = Vdb[l - 1] / (1 - beta1**t)
                    Sdwcor = Sdw[l - 1] / (1 - beta2**t)
                    Sdbcor = Sdb[l - 1] / (1 - beta2**t)
                    # Update weights and biases
                    cw = np.divide(Vdwcor, np.sqrt(Sdwcor) + epsilon)
                    cb = np.divide(Vdbcor, np.sqrt(Sdbcor) + epsilon)
                    self.weights[l - 1] -= alpha * cw
                    self.biases[l - 1] -= alpha * cb
                t += 1
                #hf.progress_bar(j, int(len(train_X)*USED_SHARE_OF_DATA/BATCH_SIZE), EPOCHS, i, round(100*self.correct/(self.used), 2))
            testAcc, testLoss = hf.test(self, 10000, test_X, test_y)
            trainAcc = self.correct / self.used
            trainAccVec.append(trainAcc)
            testAccVec.append(testAcc)
            testLossVec.append(testLoss)

        #loss = hf.averageLoss(self.lossFuncValues)
        return trainAccVec, trainLossVec, testAccVec, testLossVec
Esempio n. 4
0
 def stochasticGradient(self, train_X, train_y, trainLossVec):
     changeWeights = [0] * (self.L - 1)
     changeBiases = [0] * (self.L - 1)
     lossFuncSum = 0
     for i in range(0, BATCH_SIZE):
         aVec = []
         DVec = []
         deltaVec = []
         k = random.randint(0, len(train_X) - 1)
         xk = train_X[k]
         yk = train_y[k]
         a = hf.flatten(xk)
         aVec.append(a)
         # Performs back-propagation for all layers
         for l in range(0, self.L - 1):
             z = np.matmul(self.weights[l], a) + self.biases[l]
             a = hf.relu(z)
             D = np.diag(hf.reluPrim(z))
             #a = hf.sigmoid(z)
             #D = np.diag(hf.sigmoidPrim(z))
             aVec.append(a)
             DVec.append(D)
         delta_L = np.matmul(DVec[-1], (a - hf.formatY(yk)))
         deltaVec.append(delta_L)
         for l in reversed(range(-self.L + 1, -1)):
             delta_l = np.matmul(
                 DVec[l],
                 np.matmul(np.transpose(self.weights[l + 1]),
                           deltaVec[l + 1]))
             deltaVec.insert(0, delta_l)
         for l in reversed(range(-self.L + 1, 0)):
             changeBiases[l] += deltaVec[l]
             changeWeights[l] += np.outer(deltaVec[l], aVec[l - 1])
         prediction = max(aVec[-1])
         index = aVec[-1].index(prediction)
         if (index == int(yk)):
             self.correct += 1
         lossFuncSum += hf.lossFunc(aVec[-1], yk)
     trainLossVec.append(lossFuncSum / BATCH_SIZE)
     # Calculates average values
     dw = [cw / BATCH_SIZE for cw in changeWeights]
     db = [cb / BATCH_SIZE for cb in changeBiases]
     return dw, db
Esempio n. 5
0
def main():
    (train_X, train_y), (test_X, test_y) = mnist.load_data()
    train_X = train_X / 255.0
    test_X = test_X / 255.0
    nameList = ['trainLoss', 'testLoss', 'trainAcc', 'testAcc']
    steppingSchedules = [
        "inverseTimeDecay", "inverseTimeDecay", "inverseTimeDecay",
        "piecewiseConstantDecay", "polynomialDecay", "ADAM"
    ]
    learningRates = [0.5, 0.5, 0.5, 0.3, 0.3, 0.003]
    decayRates = [0.1, 0.5, 2, 0, 2.5, 0]
    # Loop through stepping schedule
    for i, steppingSchedule in enumerate(steppingSchedules):
        trainLossMtrx = []
        testLossMtrx = []
        trainAccMtrx = []
        testAccMtrx = []
        learningRate = learningRates[i]
        decayRate = decayRates[i]
        # Each scheme is run 10 times
        for i in range(1, 11):
            NN = Network()
            print("-" * 50)
            print('Körning ', i)
            trainAccVec, trainLossVec, testAccVec, testLossVec = NN.train(
                train_X, train_y, test_X, test_y, steppingSchedule,
                learningRate, decayRate)
            trainLossMtrx.append(trainLossVec)
            testLossMtrx.append(testLossVec)
            trainAccMtrx.append(trainAccVec)
            testAccMtrx.append(testAccVec)
        # Write data to file
        nameEnd = '_' + steppingSchedule + '_' + str(learningRate) + '_' + str(
            decayRate)
        hf.writeToFile(trainLossMtrx, 'TrainLoss' + nameEnd)
        hf.writeToFile(testLossMtrx, 'TestLoss' + nameEnd)
        hf.writeToFile(trainAccMtrx, 'TrainAcc' + nameEnd)
        hf.writeToFile(testAccMtrx, 'TestAcc' + nameEnd)
Esempio n. 6
0
 def predict(self, image):
     a = hf.flatten(image)
     for l in range(0, self.L - 1):
         a = self.nextLayer(a, l)
     return a
Esempio n. 7
0
 def nextLayer(self, a, layer):
     b = self.biases[layer]
     w = self.weights[layer]
     #a1 = hf.sigmoid(np.matmul(w, a) + b)
     a1 = hf.relu(np.matmul(w, a) + b)
     return a1