예제 #1
0
def backprop(y, cache):
    derivative_w = []    
    derivative_b = []

    loss = Loss(cache[-1], y)
    loss_value = loss.forward()

    # print(loss_value)

    dA = loss.backward()
    for index,layer in reversed(list(enumerate(layers))[1:]):
        derivative_w.append(np.dot(cache[index].T,dA))
        derivative_b.append(np.sum(dA, axis=0, keepdims=True))

        dZ = np.dot(dA, layer.w.T) 
        dA = dZ * layer.backward(cache[index])

    derivative_w.append(np.dot(cache[0].T, dA))
    derivative_b.append(np.sum(dA, axis=0))

    derivative_w = derivative_w[::-1]
    derivative_b = derivative_b[::-1]

    return derivative_w, derivative_b, loss_value
optimizer = Adam(learningRate = 0.05, decay = 4e-8)

for epoch in range(10001):

	layer1.passForward(X)

	activation1.forward(layer1.output)

	layer2.passForward(activation1.output)

	activation2.forward(layer2.output)

	

	#print(lossFunc.forward(activation2.output, y))
	loss = lossFunc.forward(activation2.output, y)

	# Calculate accuracy from output of activation2 and targets
	predictions = np.argmax(activation2.output, axis=1)  # calculate values along first axis
	accuracy = np.mean(predictions==y)

	

	#-------------------------------------------------------------------------------------------------------
	#back propagation

	lossFunc.backward(activation2.output, y)
	activation2.backward(lossFunc.dvalues)
	layer2.backward(activation2.dvalues)
	activation1.backward(layer2.dvalues)
	layer1.backward(activation1.dvalues)