def BatchGradientDecent(net,
                        trainData,
                        trainTargets,
                        eta,
                        itr,
                        valData=None,
                        valTargets=None,
                        testData=None,
                        testTargets=None,
                        annel=False):
    eta, _ = SetInitialETA(net, trainData, trainTargets, eta)
    lossToPlotTrain = []
    lossToPlotVal = []
    for i in range(0, itr):
        networkOutput, layerOutputs = net.FeedForward(trainData)
        print(
            'Loss:', net.LossFunction[net.lossFunctionName](networkOutput,
                                                            trainTargets))
        gradients = net.BackProbGradients(trainTargets, networkOutput,
                                          layerOutputs)
        for j in range(0, net.noOfLayers + 1):
            net.weights[j] = net.weights[j] - (
                eta / trainData.shape[1]) * gradients[j]
        plot.close('all')
        # lossToPlotTrain.append(CrossEntropy.CrossEntropy(networkOutput, trainTargets))
        # valOutput,_ = net.FeedForward(valData)
        # lossToPlotVal.append(CrossEntropy.CrossEntropy(valOutput, valTargets))
        # plot.plot(lossToPlotTrain)
        # plot.plot(lossToPlotVal)
        # plot.legend(['TrainErr', 'ValErr'])
        # plot.show()

        valOutput, _ = net.FeedForward(valData)
        valLoss = fns.CrossEntropy(valOutput, valTargets)
        print('Val Loss: ', valLoss)
        if net.logDir != None and i % 250 == 0:
            fns.WriteLog(net, trainData, trainTargets, i, i, eta, valData,
                         valTargets, testData, testTargets)
    return net
def AdamOptimizer(net,
                  trainData,
                  trainTargets,
                  itr,
                  batchSize,
                  eta=0.5,
                  b1=0.9,
                  b2=0.999,
                  valData=None,
                  valTargets=None,
                  testData=None,
                  testTargets=None,
                  annel=False,
                  regularization=False,
                  lamda=0.1):
    flag = True
    mt = [None] * (net.noOfLayers + 1)
    vt = [None] * (net.noOfLayers + 1)
    batchStart = 0
    step = 0
    epoch = 0
    aneelCount = 0
    previousEpochValLoss = np.inf
    eta, _ = SetInitialETA(net, trainData[:, 0:batchSize],
                           trainTargets[:, 0:batchSize], eta)

    lossToPlotTrain = []
    lossToPlotVal = []
    for i in range(0, itr):
        step = step + 1
        batchData = trainData[:, batchStart:batchStart + batchSize]
        batchTargets = trainTargets[:, batchStart:batchStart + batchSize]
        batchStart = batchSize + batchStart
        networkOutput, layerOutputs = net.FeedForward(batchData)
        if (batchStart >= trainData.shape[1]):
            epoch = epoch + 1
            batchStart = batchStart - trainData.shape[1]
            step = 0
            if annel and valData != None:
                previousEpochValLoss, tempNet = HandleAneeling(
                    net, valData, valTargets, previousEpochValLoss)
                if tempNet != None:
                    net = tempNet
                    eta = eta * 3.0 / 4.0
                    aneelCount += 1
                    if aneelCount > 3:
                        return net
        print(
            'Mini Batch Loss:',
            net.LossFunction[net.lossFunctionName](networkOutput,
                                                   batchTargets))
        gradients = net.BackProbGradients(batchTargets, networkOutput,
                                          layerOutputs)
        for j in range(0, net.noOfLayers + 1):
            if regularization:
                gradients[j] += lamda * net.weights[j]
            if mt[j] is None:
                mt[j] = (1 - b1) * gradients[j]
                vt[j] = (1 - b2) * np.square(gradients[j])
            else:
                mt[j] = b1 * mt[j] + (1 - b1) * gradients[j]
                vt[j] = b2 * vt[j] + (1 - b2) * np.square(gradients[j])
            net.weights[j] = net.weights[j] - (eta / batchSize) * np.multiply(
                (1 / np.sqrt(vt[j] + 1e-8)), gradients[j])
        plot.close('all')
        lossToPlotTrain.append(fns.CrossEntropy(networkOutput, batchTargets))
        valOutput, _ = net.FeedForward(valData)
        valLoss = fns.CrossEntropy(valOutput, valTargets)
        print('Val Loss: ', valLoss)
        if net.logDir != None and step % 250 == 0:
            fns.WriteLog(net, batchData, batchTargets, step, epoch, eta,
                         valData, valTargets, testData, testTargets)

    lossToPlotVal.append()
    plot.plot(lossToPlotTrain)
    plot.plot(lossToPlotVal)
    plot.legend(['TrainErr', 'ValErr'])
    plot.show()
    return net