Ejemplo n.º 1
0
def sanity_layers(data):
    """
    Check implementation of the forward and backward pass for all activations.
    """

    # Set the seed to reproduce results.
    np.random.seed(42)

    # Pseudo-input.
    random_input = np.random.randn(1, 50)

    # Get the activations.
    act_sigmoid = neuralnet.Activation('sigmoid')
    act_tanh = neuralnet.Activation('tanh')
    act_ReLU = neuralnet.Activation('ReLU')
    act_leakyReLU = neuralnet.Activation('leakyReLU')

    # Get the outputs for forward-pass.
    out_sigmoid = act_sigmoid(random_input)
    out_tanh = act_tanh(random_input)
    out_ReLU = act_ReLU(random_input)
    out_leakyReLU = act_leakyReLU(random_input)

    # Compute the errors.
    err_sigmoid = np.sum(np.abs(data['out_sigmoid'] - out_sigmoid))
    err_tanh = np.sum(np.abs(data['out_tanh'] - out_tanh))
    err_ReLU = np.sum(np.abs(data['out_ReLU'] - out_ReLU))
    err_leakyReLU = np.sum(np.abs(data['out_leakyReLU'] - out_leakyReLU))

    # Check the errors.
    check_error(err_sigmoid, "Sigmoid Forward Pass")
    check_error(err_tanh, "Tanh Forward Pass")
    check_error(err_ReLU, "ReLU Forward Pass")
    check_error(err_leakyReLU, "leakyReLU Forward Pass")

    print(20 * "-", "\n")

    # Compute the gradients.
    grad_sigmoid = act_sigmoid.backward(1.0, 1)
    grad_tanh = act_tanh.backward(1.0, 1)
    grad_ReLU = act_ReLU.backward(1.0, 1)
    grad_leakyReLU = act_leakyReLU.backward(1.0, 1)

    # Compute the errors.
    err_sigmoid_grad = np.sum(np.abs(data['grad_sigmoid'] - grad_sigmoid))
    err_tanh_grad = np.sum(np.abs(data['grad_tanh'] - grad_tanh))
    err_ReLU_grad = np.sum(np.abs(data['grad_ReLU'] - grad_ReLU))
    err_leakyReLU_grad = np.sum(np.abs(data['grad_leakyReLU'] -
                                       grad_leakyReLU))

    # Check the errors.
    check_error(err_sigmoid_grad, "Sigmoid Gradient")
    check_error(err_tanh_grad, "Tanh Gradient")
    check_error(err_ReLU_grad, "ReLU Gradient")
    check_error(err_leakyReLU_grad, "leakyReLU Gradient")

    print(20 * "-", "\n")
def main():
    # make_pickle()
    benchmark_data = pickle.load(open('validate_data.pkl', 'rb'),
                                 encoding='latin1')

    config = {}
    config['layer_specs'] = [
        784, 100, 100, 10
    ]  # The length of list denotes number of hidden layers; each element denotes number of neurons in that layer; first element is the size of input layer, last element is the size of output layer.
    config[
        'activation'] = 'sigmoid'  # Takes values 'sigmoid', 'tanh' or 'ReLU'; denotes activation function for hidden layers
    config[
        'batch_size'] = 1000  # Number of training samples per batch to be passed to network
    config['epochs'] = 50  # Number of epochs to train the model
    config['early_stop'] = True  # Implement early stopping or not
    config[
        'early_stop_epoch'] = 5  # Number of epochs for which validation loss increases to be counted as overfitting
    config['L2_penalty'] = 0  # Regularization constant
    config['momentum'] = False  # Denotes if momentum is to be applied or not
    config[
        'momentum_gamma'] = 0.9  # Denotes the constant 'gamma' in momentum expression

    np.random.seed(42)
    x = np.random.randn(1, 100)
    act_sigmoid = neuralnet.Activation('sigmoid')
    act_tanh = neuralnet.Activation('tanh')
    act_ReLU = neuralnet.Activation('ReLU')

    out_sigmoid = act_sigmoid.forward_pass(x)
    err_sigmoid = np.sum(np.abs(benchmark_data['out_sigmoid'] - out_sigmoid))
    check_error(err_sigmoid, "Sigmoid Forward Pass")

    out_tanh = act_tanh.forward_pass(x)
    err_tanh = np.sum(np.abs(benchmark_data['out_tanh'] - out_tanh))
    check_error(err_tanh, "Tanh Forward Pass")

    out_ReLU = act_ReLU.forward_pass(x)
    err_ReLU = np.sum(np.abs(benchmark_data['out_ReLU'] - out_ReLU))
    check_error(err_ReLU, "ReLU Forward Pass")

    print("**************")

    grad_sigmoid = act_sigmoid.backward_pass(1.0)
    err_sigmoid_grad = np.sum(
        np.abs(benchmark_data['grad_sigmoid'] - grad_sigmoid))
    check_error(err_sigmoid_grad, "Sigmoid Gradient")

    grad_tanh = act_tanh.backward_pass(1.0)
    err_tanh_grad = np.sum(np.abs(benchmark_data['grad_tanh'] - grad_tanh))
    check_error(err_tanh_grad, "Tanh Gradient")

    grad_ReLU = act_ReLU.backward_pass(1.0)
    err_ReLU_grad = np.sum(np.abs(benchmark_data['grad_ReLU'] - grad_ReLU))
    check_error(err_ReLU_grad, "ReLU Gradient")

    np.random.seed(42)
    x_image = np.random.randn(1, 784)

    nnet = neuralnet.Neuralnetwork(config)
    nnet.forward_pass(x_image,
                      targets=np.array([1, 0, 0, 0, 0, 0, 0, 0, 0, 0]))
    nnet.backward_pass()

    layer_no = 0
    for layer_idx, layer in enumerate(nnet.layers):
        if isinstance(layer, neuralnet.Layer):
            layer_no += 1
            error_x = np.sum(
                np.abs(benchmark_data['nnet'].layers[layer_idx].x - layer.x))
            error_w = np.sum(
                np.abs(benchmark_data['nnet'].layers[layer_idx].w - layer.w))
            error_b = np.sum(
                np.abs(benchmark_data['nnet'].layers[layer_idx].b - layer.b))
            error_d_w = np.sum(
                np.abs(benchmark_data['nnet'].layers[layer_idx].d_w -
                       layer.d_w))
            error_d_b = np.sum(
                np.abs(benchmark_data['nnet'].layers[layer_idx].d_b -
                       layer.d_b))

            check_error(error_x, "Layer{} Input".format(layer_no))
            check_error(error_w, "Layer{} Weights".format(layer_no))
            check_error(error_b, "Layer{} Biases".format(layer_no))
            check_error(error_d_w, "Layer{} Weight Gradient".format(layer_no))
            check_error(error_d_b, "Layer{} Bias Gradient".format(layer_no))
def main():
    # make_pickle()
    benchmark_data = pickle.load(open('validate_data.pkl', 'rb'),
                                 encoding='latin1')
    config = {}
    config['layer_specs'] = [
        784, 50, 10
    ]  # The length of list denotes number of hidden layers; each element denotes number of neurons in that layer; first element is the size of input layer, last element is the size of output layer.
    config[
        'activation'] = 'tanh'  # Takes values 'sigmoid', 'tanh' or 'ReLU'; denotes activation function for hidden layers
    input_size = config['layer_specs'][0]
    hidden_size = config['layer_specs'][1]
    output_size = config['layer_specs'][2]

    np.random.seed(42)
    w1 = np.random.randn(input_size, hidden_size)  # Weight matrix 1
    b1 = np.random.randn(1, 1).dot(
        np.ones((1, hidden_size)).astype(np.float32))  # Bias vector 1
    w2 = np.random.randn(hidden_size, output_size)  # Weight matrix 2
    b2 = np.random.randn(1, 1).dot(
        np.ones((1, output_size)).astype(np.float32))  # Bias vector 2

    X_test, y_test = neuralnet.load_data('MNIST_test.pkl')  # test data
    y_test_onehot = [[1 if i == y else 0 for i in range(10)] for y in y_test]
    act = neuralnet.Activation('tanh')
    epsilon = 0.01  # epsilon value
    x_index, y_index = 0, 3
    op = 0  # 0:input to hidden weight; 1:hidden to output weight; 2:hidden bias weight; 3:hidden bias weight

    ops = [[epsilon, 0, 0, 0], [0, epsilon, 0, 0], [0, 0, epsilon, 0],
           [0, 0, 0, epsilon]]

    w1[x_index][y_index] += ops[op][0]  # increase the weight by epsilon
    w2[x_index][y_index] += ops[op][1]
    for b in b1:
        b += ops[op][2]
    for b in b2:
        b += ops[op][3]
    input_h = np.dot(X_test[0], w1) + b1
    output_h = act.sigmoid(input_h)
    input_o = np.dot(output_h, w2) + b2
    output_o = act.sigmoid(input_o)
    loss1 = loss_func(output_o, y_test_onehot[0])

    w1[x_index][y_index] -= 2 * ops[op][0]  # decrease the weight by 2 epsilon
    w2[x_index][y_index] -= 2 * ops[op][1]
    for b in b1:
        b -= 2 * ops[op][2]
    for b in b2:
        b -= 2 * ops[op][3]
    input_h = np.dot(X_test[0], w1) + b1
    output_h = act.sigmoid(input_h)
    input_o = np.dot(output_h, w2) + b2
    output_o = act.sigmoid(input_o)
    loss2 = loss_func(output_o, y_test_onehot[0])
    diff1 = (loss1 - loss2) / (2 * epsilon)

    w1[x_index][y_index] += ops[op][0]  # recover the weight
    w2[x_index][y_index] += ops[op][1]
    for b in b1:
        b += ops[op][2]
    for b in b2:
        b += ops[op][3]
    input_h = np.dot(X_test[0], w1) + b1
    output_h = act.sigmoid(input_h)
    input_o = np.dot(output_h, w2) + b2
    output_o = act.sigmoid(input_o)

    # choose hidden weight
    if op == 0:
        ans = 0
        for l in range(10):
            ans += (w2[y_index][l] * (output_o[0][l] - y_test_onehot[0][l]) *
                    output_o[0][l] * (1 - output_o[0][l]))
        ans *= (output_h[0][y_index] * (1 - output_h[0][y_index]) *
                X_test[0][x_index])
        diff2 = ans
    # choose output weight
    elif op == 1:
        diff2 = (output_o[0][y_index] -
                 y_test_onehot[0][y_index]) * output_o[0][y_index] * (
                     1 - output_o[0][y_index]) * output_h[0][x_index]
    # choose input bias weight
    elif op == 2:
        diff2 = 0
        for i in range(50):
            ans = 0
            for l in range(10):
                ans += (w2[i][l] * (output_o[0][l] - y_test_onehot[0][l]) *
                        output_o[0][l] * (1 - output_o[0][l]))
            ans *= output_h[0][i] * (1 - output_h[0][i])
            diff2 += ans
    # choose hidden bias weight
    else:
        diff2 = 0
        for i in range(10):
            diff2 += (output_o[0][i] - y_test_onehot[0][i]
                      ) * output_o[0][i] * (1 - output_o[0][i])

    print(abs(diff1), abs(diff2))
    print("difference :", abs(diff1 - diff2))