예제 #1
0
def test_gradient_finite_differences(net, input_data):
    # ######## calculate deltas ##########
    net.provide_external_data({"default": input_data})
    net.forward_pass(training_pass=True)
    net.backward_pass()
    gradient_calc = net.buffer.gradients

    # ######## estimate deltas ##########
    def f(x):
        net.buffer.parameters[:] = x
        net.forward_pass()
        return net.get_loss_values()["total_loss"]

    initial_weigths = net.buffer.parameters.copy()
    gradient_approx = approx_fprime(initial_weigths, f, 1e-6)

    # ######## compare them #############
    nr_sequences = input_data.shape[1]
    diff = gradient_approx - gradient_calc
    mse = np.sum(diff ** 2) / nr_sequences
    if mse > 1e-4:
        # Hijack the network gradient buffer for the view
        net.buffer.gradients[:] = diff
        for layer_name in net.layers:
            if not net.buffer[layer_name]:
                continue
            print("============= Layer: {} =============".format(layer_name))
            for view_name in net.buffer[layer_name].gradients.keys():
                print("------------- {} -------------".format(view_name))
                print(net.buffer[layer_name].gradients[view_name])

    # print(">> Checking Gradient = %0.4f" % mse)
    assert mse < 1e-4
예제 #2
0
def test_deltas_finite_differences(net, input_data):
    # ######## calculate deltas ##########
    net.provide_external_data({"default": input_data})
    net.forward_pass(training_pass=True)
    net.backward_pass()
    delta_calc = net.buffer.Input.output_deltas.default.flatten()

    # ######## estimate deltas ##########
    def f(x):
        net.provide_external_data({"default": x.reshape(input_data.shape)})
        net.forward_pass()
        return net.get_loss_values()["total_loss"]

    delta_approx = approx_fprime(input_data.copy().flatten(), f, 1e-5)

    # ######## compare them #############
    nr_sequences = input_data.shape[1]
    mse = np.sum((delta_approx - delta_calc) ** 2) / nr_sequences
    if mse > 1e-4:
        diff = (delta_approx - delta_calc).reshape(input_data.shape)
        for t in range(diff.shape[0]):
            print("======== t=%d =========" % t)
            print(diff[t])
    # print("Checking Deltas = %0.4f" % mse)

    assert mse < 1e-4
예제 #3
0
def test_gradient_finite_differences(net, input_data):
    # ######## calculate deltas ##########
    net.provide_external_data({'default': input_data})
    net.forward_pass(training_pass=True)
    net.backward_pass()
    gradient_calc = net.buffer.gradients

    # ######## estimate deltas ##########
    def f(x):
        net.buffer.parameters[:] = x
        net.forward_pass()
        return net.get_loss_values()['total_loss']
    initial_weigths = net.buffer.parameters.copy()
    gradient_approx = approx_fprime(initial_weigths, f, 1e-6)

    # ######## compare them #############
    nr_sequences = input_data.shape[1]
    diff = gradient_approx - gradient_calc
    mse = np.sum(diff ** 2) / nr_sequences
    if mse > 1e-4:
        # Hijack the network gradient buffer for the view
        net.buffer.gradients[:] = diff
        for layer_name in net.layers:
            if not net.buffer[layer_name]:
                continue
            print("============= Layer: {} =============".format(layer_name))
            for view_name in net.buffer[layer_name].gradients.keys():
                print("------------- {} -------------".format(view_name))
                print(net.buffer[layer_name].gradients[view_name])

    # print(">> Checking Gradient = %0.4f" % mse)
    assert mse < 1e-4
예제 #4
0
def test_deltas_finite_differences(net, input_data):
    # ######## calculate deltas ##########
    net.provide_external_data({'default': input_data})
    net.forward_pass(training_pass=True)
    net.backward_pass()
    delta_calc = net.buffer.Input.output_deltas.default.flatten()

    # ######## estimate deltas ##########
    def f(x):
        net.provide_external_data({'default': x.reshape(input_data.shape)})
        net.forward_pass()
        return net.get_loss_values()['total_loss']
    delta_approx = approx_fprime(input_data.copy().flatten(), f, 1e-5)

    # ######## compare them #############
    nr_sequences = input_data.shape[1]
    mse = np.sum((delta_approx - delta_calc) ** 2) / nr_sequences
    if mse > 1e-4:
        diff = (delta_approx - delta_calc).reshape(input_data.shape)
        for t in range(diff.shape[0]):
            print("======== t=%d =========" % t)
            print(diff[t])
    # print("Checking Deltas = %0.4f" % mse)

    assert mse < 1e-4
예제 #5
0
def test_elementwise_act_func_gradients():
    pairs_to_test = [
        (HANDLER.sigmoid, HANDLER.sigmoid_deriv),
        (HANDLER.tanh, HANDLER.tanh_deriv),
        (HANDLER.rel, HANDLER.rel_deriv),
    ]
    test_shape = (3, 2, 4)

    for fwd, bwd in pairs_to_test:
        inputs = HANDLER.create_from_numpy(np.random.randn(*test_shape))
        outputs = HANDLER.zeros(test_shape)
        doutputs = HANDLER.ones(test_shape)
        dinputs = HANDLER.zeros(test_shape)
        fwd(inputs, outputs)
        bwd(inputs, outputs, doutputs, dinputs)
        grad_calc = HANDLER.get_numpy_copy(dinputs)

        size = inputs.size
        x0 = HANDLER.get_numpy_copy(inputs).reshape((size,))

        def f(x):
            flat_inputs = inputs.reshape((size,))
            HANDLER.set_from_numpy(flat_inputs, x)
            HANDLER.fill(outputs, 0.0)
            fwd(inputs, outputs)
            return HANDLER.get_numpy_copy(outputs).sum()

        grad_approx = approx_fprime(x0, f, 1e-5).reshape(grad_calc.shape)

        close = np.allclose(grad_approx, grad_calc, rtol=1e-4, atol=1e-4)
        if not close:
            print("-----------------------------")
            print("Testing", fwd.__name__)
            print("-- Approximated Gradient ----")
            print(grad_approx)
            print("---- Calculated Gradient ----")
            print(grad_calc)
            print("------------- Difference ----")
            print(grad_approx - grad_calc)
        assert close
예제 #6
0
def test_elementwise_act_func_gradients():
    pairs_to_test = [(HANDLER.sigmoid, HANDLER.sigmoid_deriv),
                     (HANDLER.tanh, HANDLER.tanh_deriv),
                     (HANDLER.rel, HANDLER.rel_deriv)]
    test_shape = (3, 2, 4)

    for fwd, bwd in pairs_to_test:
        inputs = HANDLER.create_from_numpy(np.random.randn(*test_shape))
        outputs = HANDLER.zeros(test_shape)
        doutputs = HANDLER.ones(test_shape)
        dinputs = HANDLER.zeros(test_shape)
        fwd(inputs, outputs)
        bwd(inputs, outputs, doutputs, dinputs)
        grad_calc = HANDLER.get_numpy_copy(dinputs)

        size = inputs.size
        x0 = HANDLER.get_numpy_copy(inputs).reshape((size, ))

        def f(x):
            flat_inputs = inputs.reshape((size, ))
            HANDLER.set_from_numpy(flat_inputs, x)
            HANDLER.fill(outputs, 0.)
            fwd(inputs, outputs)
            return HANDLER.get_numpy_copy(outputs).sum()

        grad_approx = approx_fprime(x0, f, 1e-5).reshape(grad_calc.shape)

        close = np.allclose(grad_approx, grad_calc, rtol=1e-4, atol=1e-4)
        if not close:
            print("-----------------------------")
            print("Testing", fwd.__name__)
            print('-- Approximated Gradient ----')
            print(grad_approx)
            print('---- Calculated Gradient ----')
            print(grad_calc)
            print('------------- Difference ----')
            print(grad_approx - grad_calc)
        assert close