Esempio n. 1
0
    def _build_architecture_get_prediction_and_regularization_cost(
            architecture, weight_decay, current_input):
        architecture_built = list()
        regularization_cost = Variable(0.0)
        weight_decay_variable = Variable(weight_decay)  # TODO: constant
        previous_layer_output = architecture[0]['input']

        for layer_dictionary in architecture:
            assert previous_layer_output == layer_dictionary["input"], \
                'Inconsistent architecture: can not feed {} outputs to {} inputs'.format(
                    previous_layer_output,
                    layer_dictionary['input']
                )
            activation_function = activation_function_name_to_class[
                layer_dictionary["nonlinear"]]
            regularization_method = regularization_method_name_to_class[
                layer_dictionary["regularization"]]
            layer = FullyConnectedLayer(layer_dictionary["input"],
                                        layer_dictionary["output"],
                                        activation_function, current_input)
            regularization_cost = Add(
                regularization_cost,
                Multiply(weight_decay_variable,
                         regularization_method(layer.get_weight())))
            architecture_built.append(layer)
            current_input = layer
            previous_layer_output = layer_dictionary['output']

        return architecture_built, current_input, regularization_cost
Esempio n. 2
0
    def test_reduce_mean_splitter_broadcasting(self):
        x = np.arange(6).reshape(3, 2)
        w = np.arange(6, 8).reshape(2, 1)
        b = 12.0
        y = np.arange(8, 11).reshape(3, 1)
        dl_mse = 11.0

        x_variable = Variable(x)
        w_variable = Variable(w)
        b_variable = Variable(b)
        y_variable = Variable(y)

        xw_node = Multiply(x_variable, w_variable)
        xwb_node = Add(xw_node, b_variable)
        xwb_mse_node = MSEWithSplitter(y_variable, xwb_node)

        xwb_mse_desired = mean_squared_error(y, (x @ w) + np.full((3, 1), b))
        xwb_mean_actual = xwb_mse_node.forward()
        np.testing.assert_allclose(xwb_mean_actual, xwb_mse_desired)
        xwb_mse_node.backward(dl_mse)

        dl_db_actual = b_variable.get_gradient()
        dl_db_desired = dl_mse * 2.0 * np.sum((x @ w) + np.full((3, 1), b) - y) / x.shape[0]

        np.testing.assert_allclose(dl_db_actual, dl_db_desired)

        dl_dx = x_variable.get_gradient()
        dl_dw = w_variable.get_gradient()
Esempio n. 3
0
    def test_forward(self):
        w = np.array([[1, 2, 3, 4],
                      [5, 6, 7, 8],
                      [9, 10, 11, 12]])
        x = np.array([[13, 14, ],
                      [15, 16, ],
                      [17, 18, ],
                      [19, 20, ]])

        wx_desired = np.array([[170, 180],
                               [426, 452],
                               [682, 724]])

        w_variable = Variable(w)
        x_variable = Variable(x)
        wx_variable = Multiply(w_variable, x_variable)

        wx_actual = wx_variable.forward()

        np.testing.assert_allclose(wx_actual, wx_desired)
Esempio n. 4
0
    def test_backward_1(self):
        w = np.array([[1, 2], [3, 4], [5, 6], [7, 8]])
        x = np.array([[9, 10, 11], [12, 13, 14]])

        dl_dwx = np.ones((w.shape[0], x.shape[1]))

        w_variable = Variable(w)
        x_variable = Variable(x)
        wx_variable = Multiply(w_variable, x_variable)

        wx_desired = w @ x
        wx_actual = wx_variable.forward()

        np.testing.assert_allclose(wx_actual, wx_desired)
        wx_variable.backward(grad=dl_dwx)

        dl_dx_actual = x_variable.get_gradient()
        dl_dx_desired = np.array([[16, 16, 16], [20, 20, 20]])

        self.assertEqual(dl_dx_desired.shape, dl_dx_actual.shape)
        np.testing.assert_allclose(dl_dx_actual, dl_dx_desired)
Esempio n. 5
0
 def __init__(self,
              inputs_num: int,
              outputs_num: int,
              activation_function: ActivationFunction.__class__,
              input_variable=None):
     super().__init__()
     self._af = activation_function
     self._w = Variable(
         np.random.uniform(-1 / math.sqrt(inputs_num),
                           1 / math.sqrt(inputs_num),
                           (inputs_num, outputs_num)))
     self._b = Variable(np.zeros(outputs_num))
     self._input = input_variable
     self._output = self._af(Add(Multiply(self._input, self._w), self._b))
Esempio n. 6
0
    def test_backward_2(self):
        w = np.array([[1, 2], [3, 4], [5, 6], [7, 8]])
        x = np.array([[9, 10, 11], [12, 13, 14]])

        dl_dwx = np.arange(1, 1 + w.shape[0] * x.shape[1]).reshape(w.shape[0], x.shape[1])

        w_variable = Variable(w)
        x_variable = Variable(x)
        wx_variable = Multiply(w_variable, x_variable)

        wx_desired = w @ x
        wx_actual = wx_variable.forward()

        np.testing.assert_allclose(wx_actual, wx_desired)
        wx_variable.backward(grad=dl_dwx)

        dl_dw_actual = w_variable.get_gradient()
        dl_dw_desired = np.array([[1 * 9 + 2 * 10 + 3 * 11, 1 * 12 + 2 * 13 + 3 * 14],
                                  [4 * 9 + 5 * 10 + 6 * 11, 4 * 12 + 5 * 13 + 6 * 14],
                                  [7 * 9 + 8 * 10 + 9 * 11, 7 * 12 + 8 * 13 + 9 * 14],
                                  [10 * 9 + 11 * 10 + 12 * 11, 10 * 12 + 11 * 13 + 12 * 14]])

        self.assertEqual(dl_dw_desired.shape, dl_dw_actual.shape)
        np.testing.assert_allclose(dl_dw_actual, dl_dw_desired)