def test(testX, testY, parameters, activation="sigmoid"): cache = forwardPropagation(testX, parameters, activation) L = len(parameters) // 2 cost = hp.crossEntropy(cache["H" + str(L)], testY) classificationError = hp.meanClassificationError(cache["H" + str(L)], testY) return cost, classificationError
def testBatch(testX, testY, testParameters, parameters, epsilon=0.001, activation="sigmoid"): L = len(parameters) // 4 # hp.shuffleData(testX, testY) # cacheX = batchForwardPropagation(trainX, parameters, epsilon = epsilon, activation = activation) cache = testBatchForwardPropagation(testX, parameters, testParameters, epsilon=epsilon, activation=activation) cost = hp.crossEntropy(cache["H" + str(L)], testY) meanError = hp.meanClassificationError(cache["H" + str(L)], testY) return cost, meanError
def train(X, Y, validX, validY, layerInfo, numIterations, parameters=None, learningRate=0.1, printCost=False, momentum=0, lambd=0, activation="sigmoid"): errorTrain = [] errorValid = [] meanError = [] meanErrorValid = [] # backProp = grad(crossEntropy) prevGrads = {} grads = {} if parameters == None: parameters = initializeParameters(layerInfo) else: parameters = parameters L = len(parameters) // 2 m = X.shape[0] batchSize = 32 batchNum = m // batchSize # cache = forwardPropagation(X, parameters, activation) # cost = hp.crossEntropy(cache["H" +str(L)], Y) # meanError = hp.meanClassificationError(cache["H" +str(L)], Y) # validCache = forwardPropagation(validX, parameters, activation) # validCost = hp.crossEntropy(validCache["H" + str(L)], validY) # meanErrorValid = hp.meanClassificationError(validCache["H" + str(L)], validY) # errorTrain.append(cost) # errorValid.append(validCost) for i in range(0, numIterations): cost = 0 validCost = 0 classificationError = 0 validClassificationError = 0 hp.shuffleXYData(X, Y) for b in range(batchNum): start = batchSize * b end = batchSize * (b + 1) XBatch = X[start:end] YBatch = Y[start:end] prevGrads = grads cache = forwardPropagation(XBatch, parameters, activation) cost += hp.crossEntropy(cache["H" + str(L)], YBatch) classificationError += hp.meanClassificationError( cache["H" + str(L)], YBatch) validCache = forwardPropagation(validX, parameters, activation) validClassificationError += hp.meanClassificationError( validCache["H" + str(L)], validY) validCost += hp.crossEntropy(validCache["H" + str(L)], validY) grads = backpropagation(parameters, cache, XBatch, YBatch, lambd, activation) gradsMomentum = addMomentum(prevGrads, grads, momentum) parameters = updateParameters(parameters, gradsMomentum, learningRate) # cache = forwardPropagation(X, parameters, activation) errorTrain.append(cost / float(batchNum)) errorValid.append(validCost / float(batchNum)) meanError.append(classificationError / float(batchNum)) meanErrorValid.append(validClassificationError / float(batchNum)) if printCost and i % 10 == 0: print("Cost at iteration %i: %f : %f" % (i, cost / float(batchNum), validCost / float(batchNum))) print("Mean classification Err %i: %f : %f" % (i, classificationError / float(batchNum), validClassificationError / float(batchNum))) error = { "train": errorTrain, "valid": errorValid, "trainMean": meanError, "validMean": meanErrorValid } return parameters, error
def batchTrain(X, Y, validX, validY, layerInfo, numIterations, learningRate=0.01, printCost=False, momentum=0, lambd=0, batchSize=32, epsilon=0.01, activation="sigmoid"): errorTrain = [] errorValid = [] meanError = [] meanErrorValid = [] # backProp = grad(crossEntropy) testParameters = {} prevGrads = {} grads = {} parameters = batchInitializeParameters(layerInfo) m = X.shape[0] batchNum = m // batchSize L = len(parameters) // 4 for i in range(0, numIterations): mean1 = 0 mean2 = 0 variance1 = 0 variance2 = 0 cost = 0 validCost = 0 classificationError = 0 validClassificationError = 0 hp.shuffleData(X, Y) for b in range(batchNum): prevGrads = grads start = batchSize * b end = batchSize * (b + 1) XBatch = X[start:end] YBatch = Y[start:end] cache = batchForwardPropagation(XBatch, parameters, epsilon=epsilon, activation=activation) mean1 += cache["mean1"] mean2 += cache["mean2"] variance1 += cache["variance1"] variance2 += cache["variance2"] grads = batchBackpropagation(parameters, cache, XBatch, YBatch, lambd=lambd, epsilon=epsilon, activation=activation) grads = addMomentum(prevGrads, grads, momentum) parameters = batchUpdateParameters(parameters, grads, learningRate) # cache = batchForwardPropagation(X, parameters, epsilon = epsilon, activation = activation) cost += hp.crossEntropy(cache["H" + str(L)], YBatch) classificationError += hp.meanClassificationError( cache["H" + str(L)], YBatch) testParameters = { "mean1": mean1 / float(batchNum), "mean2": mean2 / float(batchNum), "variance1": variance1 / float(batchNum), "variance2": variance2 / float(batchNum) } validCache = testBatchForwardPropagation(validX, parameters, testParameters, epsilon=epsilon, activation=activation) validClassificationError = hp.meanClassificationError( validCache["H" + str(L)], validY) validCost = hp.crossEntropy(validCache["H" + str(L)], validY) errorTrain.append(cost / float(batchNum)) errorValid.append(validCost) meanError.append(classificationError / float(batchNum)) meanErrorValid.append(validClassificationError) if printCost and i % 10 == 0: print("Cost at iteration %i: %f : %f" % (i, cost / float(batchNum), validCost)) print("Mean classification Err %i: %f : %f" % (i, classificationError / float(batchNum), validClassificationError)) error = { "train": errorTrain, "valid": errorValid, "trainMean": meanError, "validMean": meanErrorValid } return parameters, error, testParameters