def test_single_backprop_step_for_single_input_target_pair(self): # ARRANGE layer = Layer(ActivationRectifiedLinearUnit()) layer.weights = np.random.rand(3, 3) layer.bias = np.random.rand(3, 1) network = Network() network.add(layer) # ACT input_vector = column([0.2, 0.4, 0.8]) target_vector = column([1, 0, 0]) output_vector = network.run(input_vector) loss = LossCategoricalCrossEntropy cost = loss.evaluate_loss(output_vector, target_vector) dC_dA = loss.gradient_wrt_output(output_vector, target_vector) network.calculate_update(dC_dA, 0.001) network.apply_update() # ASSERT new_output_vector = network.run(input_vector) new_cost = loss.evaluate_loss(new_output_vector, target_vector) assert new_cost < cost
def test_calculates_weight_change_using_gradient_wrt_weights(self): # ARRANGE layer = Layer(ActivationRectifiedLinearUnit()) layer.bias = column([0, 0, 0]) layer.weights = np.array([ [-1, -1, -1], [1, 1, 1], [2, 2, 2] ]) # ACT input_vector = column([1, 2, 3]) layer.run(input_vector) dC_dA = np.array([[0.25, 0.5, 1]]) layer.calculate_update(dC_dA, 1) # output = relu(weights * input) = [0, 6, 12] # dA_dZ = relu_gradient(output) = diag([0, 1, 1]) # dZ_dW = input = [1 2 3] # dC_dW = T(dZ_dW * dC_dA * dA_dW) # = T([1 2 3]T * [0.25 0.5 1] * diag([0 1 1])) # ASSERT expected_weight_change = -np.array([ [0, 0, 0], [0.5, 1, 1.5], [1, 2, 3] ]) np.testing.assert_array_equal(layer.weight_change, expected_weight_change)
def test_calculates_gradient_wrt_inputs(self): # ARRANGE layer = Layer(ActivationRectifiedLinearUnit()) layer.bias = column([0, 0, 0]) layer.weights = np.array([ [-1, -1, -1], [1, 1.2, 1.5], [2, 2.5, 3] ]) # ACT input_vector = column([1, 2, 3]) layer.run(input_vector) dC_dA = np.array([[0.25, 0.5, 1]]) gradient_wrt_input = layer.calculate_update(dC_dA, 1) # output = relu(weights * input) = [0, 6, 12] # dA_dZ = relu_gradient(output) = diag([0, 1, 1]) # dZ_dX = weights # dC_dX = dC_dA * dA_dZ * dZ_dX) # = [0.25 0.5 1] * diag([0 1 1])) * weights # = [0 0.5 1] * weights # ASSERT expected_gradient_wrt_input = np.array([ [2.5, 3.1, 3.75] ]) np.testing.assert_array_equal(gradient_wrt_input, expected_gradient_wrt_input)
def test_calculation_of_absolute_error_loss(self): loss_function = LossAbsoluteError output_vector = column([0, 1, 3]) target_vector = column([1, 1, 1]) result = loss_function.evaluate_loss(output_vector, target_vector) expected = 3 assert result == expected
def test_calculation_of_squared_error_loss(self): loss_function = LossSquaredError output_vector = column([0, 1, 4]) target_vector = column([1, 1, 1]) result = loss_function.evaluate_loss(output_vector, target_vector) expected = 5 assert result == expected
def test_calculation_of_gradient_wrt_output(self): loss_function = LossSquaredError() output_vector = column([0, 1, 3]) target_vector = column([1, 1, 1]) result = loss_function.gradient_wrt_output(output_vector, target_vector) # o_i - t_i expected = np.array([[-1, 0, 2]]) np.testing.assert_array_equal(result, expected)
def test_calculation_of_gradient_wrt_output(self): loss_function = LossCategoricalCrossEntropy() output_vector = column([0, 0, 1, 3, 4]) target_vector = column([0, 1, 1, 1, 0]) result = loss_function.gradient_wrt_output(output_vector, target_vector) # -t_i / o_i # division by zero cases: # target = 0, output = 0 -> derivative = 0 # target != 0, output = 0 -> derivative = 1e100 (might want something else eventually) expected = np.array([[0, -1e100, -1, -1 / 3, 0]]) np.testing.assert_array_equal(result, expected)
def test_zero_weights_and_bias_yield_zero_output(self): layer = Layer(ActivationRectifiedLinearUnit()) layer.weights = np.array([ [0, 0, 0], [0, 0, 0], [0, 0, 0] ]) layer.bias = column([0, 0, 0]) input_vector = column([1, 2, 3]) output = layer.run(input_vector) expected = column([0, 0, 0]) np.testing.assert_array_equal(output, expected)
def test_applies_activation_function_to_product_of_inputs_and_weights_with_bias(self): layer = Layer(ActivationRectifiedLinearUnit()) layer.weights = np.array([ [-1, -1/2, -1/3], [1, 1/2, 1/3], [2, 2/2, 2/3], [3, 3/2, 3/3] ]) layer.bias = column([-4, -3, -2, -1]) input_vector = column([1, 2, 3]) output = layer.run(input_vector) expected = column([0, 0, 4, 8]) np.testing.assert_array_equal(output, expected)
def test_single_step_backpropagation_using_single_input_target_pair(self): # ARRANGE activation = ActivationRectifiedLinearUnit loss = LossAbsoluteError network, trainer = create_test_network_trainer(activation, loss) input_vector = column([0.2, 0.4, 0.8]) target_vector = column([1, 0, 0]) output_vector_i = network.run(input_vector) cost_i = loss.evaluate_loss(output_vector_i, target_vector) # ACT trainer.train([input_vector], [target_vector], 0.01, 1) # ASSERT output_vector_f = network.run(input_vector) cost_f = loss.evaluate_loss(output_vector_f, target_vector) assert cost_f < cost_i
def test_single_epoch_backpropagation_using_specific_set_of_input_target_pairs( self): # ARRANGE activation = ActivationRectifiedLinearUnit loss = LossAbsoluteError network, trainer = create_test_network_trainer(activation, loss) input_vectors = [ column([0.2, 0.4, 0.8]), column([0.1, 0.3, 0.7]), column([3, 2, 1]) ] target_vectors = [ column([1, 0, 0]), column([0, 1, 0]), column([0, 0, 1]) ] epoch_cost_i = compute_epoch_cost(input_vectors, target_vectors, network, loss) trainer.train(input_vectors, target_vectors, 0.01, 1) epoch_cost_f = compute_epoch_cost(input_vectors, target_vectors, network, loss) assert epoch_cost_f < epoch_cost_i
def test_calculates_bias_change_using_gradient_wrt_bias(self): # ARRANGE layer = Layer(ActivationRectifiedLinearUnit()) layer.bias = column([-1, 1, 2]) layer.weights = np.array([ [-1, -1], [1, 1], [2, 2] ]) # ACT input_vector = column([2, 3]) layer.run(input_vector) dC_dA = np.array([[0.25, 0.5, 1]]) layer.calculate_update(dC_dA, 1) # ASSERT # Same case as above test, just with bias instead of the first weights column expected_bias_change = -column([0, 0.5, 1]) np.testing.assert_array_equal(layer.bias_change, expected_bias_change)
def run_assertion(a, b, expected): output_vector = column(a) target_vector = column(b) result = loss_function.evaluate_loss(output_vector, target_vector) assert result == expected