def _build_architecture_get_prediction_and_regularization_cost( architecture, weight_decay, current_input): architecture_built = list() regularization_cost = Variable(0.0) weight_decay_variable = Variable(weight_decay) # TODO: constant previous_layer_output = architecture[0]['input'] for layer_dictionary in architecture: assert previous_layer_output == layer_dictionary["input"], \ 'Inconsistent architecture: can not feed {} outputs to {} inputs'.format( previous_layer_output, layer_dictionary['input'] ) activation_function = activation_function_name_to_class[ layer_dictionary["nonlinear"]] regularization_method = regularization_method_name_to_class[ layer_dictionary["regularization"]] layer = FullyConnectedLayer(layer_dictionary["input"], layer_dictionary["output"], activation_function, current_input) regularization_cost = Add( regularization_cost, Multiply(weight_decay_variable, regularization_method(layer.get_weight()))) architecture_built.append(layer) current_input = layer previous_layer_output = layer_dictionary['output'] return architecture_built, current_input, regularization_cost
def test_reduce_mean_splitter_broadcasting(self): x = np.arange(6).reshape(3, 2) w = np.arange(6, 8).reshape(2, 1) b = 12.0 y = np.arange(8, 11).reshape(3, 1) dl_mse = 11.0 x_variable = Variable(x) w_variable = Variable(w) b_variable = Variable(b) y_variable = Variable(y) xw_node = Multiply(x_variable, w_variable) xwb_node = Add(xw_node, b_variable) xwb_mse_node = MSEWithSplitter(y_variable, xwb_node) xwb_mse_desired = mean_squared_error(y, (x @ w) + np.full((3, 1), b)) xwb_mean_actual = xwb_mse_node.forward() np.testing.assert_allclose(xwb_mean_actual, xwb_mse_desired) xwb_mse_node.backward(dl_mse) dl_db_actual = b_variable.get_gradient() dl_db_desired = dl_mse * 2.0 * np.sum((x @ w) + np.full((3, 1), b) - y) / x.shape[0] np.testing.assert_allclose(dl_db_actual, dl_db_desired) dl_dx = x_variable.get_gradient() dl_dw = w_variable.get_gradient()
def test_forward(self): w = np.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]]) x = np.array([[13, 14, ], [15, 16, ], [17, 18, ], [19, 20, ]]) wx_desired = np.array([[170, 180], [426, 452], [682, 724]]) w_variable = Variable(w) x_variable = Variable(x) wx_variable = Multiply(w_variable, x_variable) wx_actual = wx_variable.forward() np.testing.assert_allclose(wx_actual, wx_desired)
def test_backward_1(self): w = np.array([[1, 2], [3, 4], [5, 6], [7, 8]]) x = np.array([[9, 10, 11], [12, 13, 14]]) dl_dwx = np.ones((w.shape[0], x.shape[1])) w_variable = Variable(w) x_variable = Variable(x) wx_variable = Multiply(w_variable, x_variable) wx_desired = w @ x wx_actual = wx_variable.forward() np.testing.assert_allclose(wx_actual, wx_desired) wx_variable.backward(grad=dl_dwx) dl_dx_actual = x_variable.get_gradient() dl_dx_desired = np.array([[16, 16, 16], [20, 20, 20]]) self.assertEqual(dl_dx_desired.shape, dl_dx_actual.shape) np.testing.assert_allclose(dl_dx_actual, dl_dx_desired)
def __init__(self, inputs_num: int, outputs_num: int, activation_function: ActivationFunction.__class__, input_variable=None): super().__init__() self._af = activation_function self._w = Variable( np.random.uniform(-1 / math.sqrt(inputs_num), 1 / math.sqrt(inputs_num), (inputs_num, outputs_num))) self._b = Variable(np.zeros(outputs_num)) self._input = input_variable self._output = self._af(Add(Multiply(self._input, self._w), self._b))
def test_backward_2(self): w = np.array([[1, 2], [3, 4], [5, 6], [7, 8]]) x = np.array([[9, 10, 11], [12, 13, 14]]) dl_dwx = np.arange(1, 1 + w.shape[0] * x.shape[1]).reshape(w.shape[0], x.shape[1]) w_variable = Variable(w) x_variable = Variable(x) wx_variable = Multiply(w_variable, x_variable) wx_desired = w @ x wx_actual = wx_variable.forward() np.testing.assert_allclose(wx_actual, wx_desired) wx_variable.backward(grad=dl_dwx) dl_dw_actual = w_variable.get_gradient() dl_dw_desired = np.array([[1 * 9 + 2 * 10 + 3 * 11, 1 * 12 + 2 * 13 + 3 * 14], [4 * 9 + 5 * 10 + 6 * 11, 4 * 12 + 5 * 13 + 6 * 14], [7 * 9 + 8 * 10 + 9 * 11, 7 * 12 + 8 * 13 + 9 * 14], [10 * 9 + 11 * 10 + 12 * 11, 10 * 12 + 11 * 13 + 12 * 14]]) self.assertEqual(dl_dw_desired.shape, dl_dw_actual.shape) np.testing.assert_allclose(dl_dw_actual, dl_dw_desired)