Beispiel #1
0
def test_sgd_update():
    net = nn.layers.LinearLayer(100, 10)
    learning_rate = 1
    optimizer = sgd_optimizer.SGDOptimizer(net.parameters(), learning_rate)

    data = np.random.random((20, 100)).astype(np.float32) * 2 - 1

    initial_weight = net.weight.data.copy()
    initial_bias = net.bias.data.copy()

    torch_net = TorchNet()
    with torch.no_grad():
        torch_net.layer.weight[:] = utils.from_numpy(net.weight.data.T)
        torch_net.layer.bias[:] = utils.from_numpy(net.bias.data)
    torch_optimizer = torch.optim.SGD(torch_net.parameters(), learning_rate)

    optimizer.zero_grad()
    out = net(data)
    loss = out.sum()
    net.backward(np.ones_like(out))

    torch_optimizer.zero_grad()
    torch_out = torch_net(utils.from_numpy(data))
    assert np.allclose(out,
                       utils.to_numpy(torch_out.clone().detach()),
                       atol=0.001)

    torch_loss = torch_out.sum()
    assert np.allclose(loss, torch_loss.item(), atol=0.001)
    torch_loss.backward()

    assert np.allclose(net.weight.grad.T,
                       utils.to_numpy(torch_net.layer.weight.grad))
    assert np.allclose(net.bias.grad,
                       utils.to_numpy(torch_net.layer.bias.grad))

    optimizer.step()
    torch_optimizer.step()

    assert np.allclose(net.weight.data.T,
                       utils.to_numpy(torch_net.layer.weight))
    assert np.allclose(net.bias.data, utils.to_numpy(torch_net.layer.bias))

    assert not np.allclose(net.weight.data, initial_weight)
    assert not np.allclose(net.bias.data, initial_bias)
Beispiel #2
0
def _test_linear_backward(input_shape, out_channels,):

    in_channels = input_shape[1]
    input = np.random.random(input_shape).astype(np.float32) * 20
    layer = LinearLayer(in_channels, out_channels)

    torch_layer = nn.Linear(in_channels, out_channels, bias=True)
    with torch.no_grad():
        torch_layer.weight[:] = torch.from_numpy(layer.weight.data).transpose(0, 1)
        torch_layer.bias[:] = torch.from_numpy(layer.bias.data)

    output = layer.forward(input)
    out_grad = layer.backward(np.ones_like(output))

    torch_input = utils.from_numpy(input).requires_grad_(True)
    torch_out = torch_layer(torch_input)
    torch_out.sum().backward()

    torch_out_grad = utils.to_numpy(torch_input.grad)
    out_grad[np.abs(out_grad) < 1e-4] = 0
    torch_out_grad[np.abs(torch_out_grad) < 1e-4] = 0
    assert np.allclose(out_grad, torch_out_grad, atol=TOLERANCE)

    w_grad = layer.weight.grad
    w_grad[np.abs(w_grad) < 1e-4] = 0
    torch_w_grad = utils.to_numpy(torch_layer.weight.grad.transpose(0, 1))
    torch_w_grad[np.abs(torch_w_grad) < 1e-4] = 0

    print(w_grad)
    print(torch_w_grad)
    print()
    print("--------------")
    print()

    assert np.allclose(w_grad, torch_w_grad, atol=TOLERANCE)

    b_grad = layer.bias.grad
    b_grad[np.abs(b_grad) < 1e-4] = 0
    torch_b_grad = utils.to_numpy(torch_layer.bias.grad)
    torch_b_grad[np.abs(torch_b_grad) < 1e-4] = 0
    assert np.allclose(b_grad, torch_b_grad, atol=TOLERANCE)
def _test_backward(input_shape, reduction, axis):

    #np.random.Seed(0)

    layer = SoftmaxCrossEntropyLossLayer(reduction=reduction)
    data = np.random.random(input_shape) * 2 - 1
    labels_shape = list(data.shape)
    labels_shape.pop(axis)
    labels = np.random.randint(0, data.shape[axis], labels_shape)
    loss = layer(data, labels, axis=axis)
    if axis == 1:
        torch_input = utils.from_numpy(data).requires_grad_(True)
    else:
        torch_input = utils.from_numpy(np.moveaxis(data, axis,
                                                   1)).requires_grad_(True)
    pytorch_loss = F.cross_entropy(torch_input,
                                   utils.from_numpy(labels),
                                   reduction=reduction)
    if len(pytorch_loss.shape) > 0:
        pytorch_loss.sum().backward()
    else:
        pytorch_loss.backward()

    assert np.allclose(loss, utils.to_numpy(pytorch_loss))

    grad = layer.backward()
    torch_grad = utils.to_numpy(torch_input.grad)
    if axis != 1:
        torch_grad = np.moveaxis(torch_grad, 1, axis)

    print(torch_grad.shape)
    print(grad.shape)
    print("grad - torch_grad < .000001")
    print(np.absolute(grad - torch_grad) < .9)

    assert np.allclose(grad, torch_grad, atol=0.001)
Beispiel #4
0
def _test_relu_forward(input_shape, out_channels):
    in_channels = input_shape[1]
    input = np.random.random(input_shape).astype(np.float32) * 20
    original_input = input.copy()
    layer = ReLULayer()

    torch_layer = nn.ReLU()

    output = layer.forward(input)

    torch_data = utils.from_numpy(input)
    torch_out = utils.to_numpy(torch_layer(torch_data))
    output[np.abs(output) < 1e-4] = 0
    torch_out[np.abs(torch_out) < 1e-4] = 0

    assert np.all(input == original_input)
    assert output.shape == torch_out.shape
    assert np.allclose(output, torch_out, atol=TOLERANCE)
Beispiel #5
0
def _test_max_pool_backward(input_shape, kernel_size, stride):
    np.random.seed(0)
    torch.manual_seed(0)
    padding = (kernel_size - 1) // 2
    input = np.random.random(input_shape).astype(np.float32) * 20
    layer = MaxPoolLayer(kernel_size, stride)

    torch_layer = nn.MaxPool2d(kernel_size, stride, padding)

    output = layer.forward(input)
    out_grad = layer.backward(2 * np.ones_like(output) / output.size)

    torch_input = utils.from_numpy(input).requires_grad_(True)
    torch_out = torch_layer(torch_input)
    (2 * torch_out.mean()).backward()

    torch_out_grad = utils.to_numpy(torch_input.grad)
    utils.assert_close(out_grad, torch_out_grad, atol=TOLERANCE)
def _test_forward_overflow(input_shape, reduction, axis):
    layer = SoftmaxCrossEntropyLossLayer(reduction=reduction)
    data = np.random.random(input_shape) * 10000 - 1
    labels_shape = list(data.shape)
    labels_shape.pop(axis)
    labels = np.random.randint(0, data.shape[axis], labels_shape)
    loss = layer(data, labels, axis=axis)
    if axis == 1:
        pytorch_loss = F.cross_entropy(utils.from_numpy(data),
                                       utils.from_numpy(labels),
                                       reduction=reduction)
    else:
        pytorch_loss = F.cross_entropy(utils.from_numpy(data.swapaxes(1,
                                                                      axis)),
                                       utils.from_numpy(labels),
                                       reduction=reduction)
    pytorch_loss = utils.to_numpy(pytorch_loss)

    assert np.allclose(loss, pytorch_loss, atol=0.001)
Beispiel #7
0
def _test_max_pool_forward(input_shape, kernel_size, stride):
    return
    np.random.seed(0)
    torch.manual_seed(0)
    padding = (kernel_size - 1) // 2
    input = np.random.random(input_shape).astype(np.float32) * 20
    original_input = input.copy()
    layer = MaxPoolLayer(kernel_size, stride)

    torch_layer = nn.MaxPool2d(kernel_size, stride, padding)
    output = layer.forward(input)

    torch_data = utils.from_numpy(input)
    torch_out = utils.to_numpy(torch_layer(torch_data))
    output[np.abs(output) < 1e-4] = 0
    torch_out[np.abs(torch_out) < 1e-4] = 0

    assert np.all(input == original_input)
    assert output.shape == torch_out.shape
    utils.assert_close(output, torch_out, atol=TOLERANCE)
Beispiel #8
0
def _test_linear_forward(input_shape, out_channels):
    in_channels = input_shape[1]
    input = np.random.random(input_shape).astype(np.float32) * 20
    original_input = input.copy()
    layer = LinearLayer(in_channels, out_channels)

    torch_layer = nn.Linear(in_channels, out_channels, bias=True)
    with torch.no_grad():
        torch_layer.weight[:] = torch.from_numpy(layer.weight.data).transpose(0, 1)
        torch_layer.bias[:] = torch.from_numpy(layer.bias.data)

    output = layer.forward(input)

    torch_data = utils.from_numpy(input)
    torch_out = utils.to_numpy(torch_layer(torch_data))
    output[np.abs(output) < 1e-4] = 0
    torch_out[np.abs(torch_out) < 1e-4] = 0

    assert np.all(input == original_input)
    assert output.shape == torch_out.shape
    assert np.allclose(output, torch_out, atol=TOLERANCE)
Beispiel #9
0
def _test_relu_backward(
    input_shape,
    out_channels,
):

    in_channels = input_shape[1]
    input = np.random.random(input_shape).astype(np.float32) * 20
    layer = ReLULayer()

    torch_layer = nn.ReLU()

    output = layer.forward(input)
    out_grad = layer.backward(np.ones_like(output))

    torch_input = utils.from_numpy(input).requires_grad_(True)
    torch_out = torch_layer(torch_input)
    torch_out.sum().backward()

    torch_out_grad = utils.to_numpy(torch_input.grad)
    out_grad[np.abs(out_grad) < 1e-4] = 0
    torch_out_grad[np.abs(torch_out_grad) < 1e-4] = 0
    assert np.allclose(out_grad, torch_out_grad, atol=TOLERANCE)