コード例 #1
0
def _test_backward(input_shape, reduction, axis):
    layer = SoftmaxCrossEntropyLossLayer(reduction=reduction)
    data = np.random.random(input_shape) * 2 - 1
    labels_shape = list(data.shape)
    labels_shape.pop(axis)
    labels = np.random.randint(0, data.shape[axis],
                               labels_shape).astype(np.int64)
    loss = layer(data, labels, axis=axis)
    if axis == 1:
        torch_input = utils.from_numpy(data).requires_grad_(True)
    else:
        torch_input = utils.from_numpy(np.moveaxis(data, axis,
                                                   1)).requires_grad_(True)
    pytorch_loss = F.cross_entropy(torch_input,
                                   utils.from_numpy(labels),
                                   reduction=reduction)
    if len(pytorch_loss.shape) > 0:
        pytorch_loss.sum().backward()
    else:
        pytorch_loss.backward()

    utils.assert_close(loss, utils.to_numpy(pytorch_loss))

    grad = layer.backward()
    torch_grad = utils.to_numpy(torch_input.grad)
    if axis != 1:
        torch_grad = np.moveaxis(torch_grad, 1, axis)

    utils.assert_close(grad, torch_grad, atol=0.001)
コード例 #2
0
def _test_conv_forward(input_shape, out_channels, kernel_size, stride):
    np.random.seed(0)
    torch.manual_seed(0)
    in_channels = input_shape[1]
    padding = (kernel_size - 1) // 2
    input = np.random.random(input_shape).astype(np.float32) * 20
    original_input = input.copy()
    layer = ConvLayer(in_channels, out_channels, kernel_size, stride)

    torch_layer = nn.Conv2d(in_channels,
                            out_channels,
                            kernel_size,
                            stride,
                            padding,
                            bias=True)
    utils.assign_conv_layer_weights(layer, torch_layer)

    output = layer.forward(input)

    torch_data = utils.from_numpy(input)
    torch_out = torch_layer(torch_data)

    assert np.all(input == original_input)
    assert output.shape == torch_out.shape
    utils.assert_close(output, torch_out, atol=TOLERANCE)
コード例 #3
0
def _test_conv_backward(input_shape, out_channels, kernel_size, stride):
    np.random.seed(0)
    torch.manual_seed(0)
    in_channels = input_shape[1]
    #print('test ksize',kernel_size)
    #print('strid',stride)
    padding = (kernel_size - 1) // 2
    #print('test pad',padding)
    input = np.random.random(input_shape).astype(np.float32) * 20
    layer = ConvLayer(in_channels, out_channels, kernel_size, stride)

    torch_layer = nn.Conv2d(in_channels,
                            out_channels,
                            kernel_size,
                            stride,
                            padding,
                            bias=True)
    utils.assign_conv_layer_weights(layer, torch_layer)

    output = layer.forward(input)
    out_grad = layer.backward(2 * np.ones_like(output) / output.size)

    torch_input = utils.from_numpy(input).requires_grad_(True)
    torch_out = torch_layer(torch_input)
    (2 * torch_out.mean()).backward()

    utils.assert_close(out_grad, torch_input.grad, atol=TOLERANCE)
    utils.check_conv_grad_match(layer, torch_layer)
コード例 #4
0
def test_reduce_catted_sequences(data, batch_sizes, in_dim, hidden_dim,
                                 device):
    sequences = [[
        torch.randn((token_size, in_dim), requires_grad=True, device=device)
        for token_size in data.draw(
            token_size_lists(max_token_size=TINY_TOKEN_SIZE,
                             max_batch_size=TINY_BATCH_SIZE))
    ] for _ in batch_sizes]
    inputs = [token for sequence in sequences for token in sequence]
    catted_sequences = [
        cat_sequence(sequence, device=device) for sequence in sequences
    ]
    packed_sequences = [
        pack_sequence(sequence, device=device) for sequence in sequences
    ]

    rnn = nn.LSTM(
        input_size=in_dim,
        hidden_size=hidden_dim,
        bidirectional=True,
        bias=True,
    ).to(device=device)

    reduction_pack = reduce_catted_sequences(catted_sequences, device=device)
    _, (actual, _) = rnn(reduction_pack)
    actual = rearrange(actual, 'd n x -> n (d x)')

    excepted = []
    for pack in packed_sequences:
        _, (t, _) = rnn(pack)
        excepted.append(rearrange(t, 'd n x -> n (d x)'))
    excepted = pack_sequence(excepted).data

    assert_close(actual, excepted, check_stride=False)
    assert_grad_close(actual, excepted, inputs=inputs)
コード例 #5
0
def test_pad_sequence(data, token_sizes, dim, batch_first, device):
    inputs = [
        torch.randn((token_size, dim), device=device, requires_grad=True)
        for token_size in token_sizes
    ]

    actual = rua.pad_sequence(inputs, batch_first=batch_first)
    excepted = tgt.pad_sequence(inputs, batch_first=batch_first)

    assert_close(actual, excepted)
    assert_grad_close(actual, excepted, inputs=inputs)
コード例 #6
0
def test_tree_reduce_catted_sequence(data, token_sizes, dim, device):
    inputs = [
        torch.randn((token_size, dim), device=device, requires_grad=True)
        for token_size in token_sizes
    ]

    excepted = pad_sequence(inputs, device=device).sum(dim=0)

    catted_sequence, token_sizes = cat_sequence(inputs, device=device)
    indices = tree_reduce_catted_indices(token_sizes=token_sizes)
    actual = tree_reduce_sequence(torch.add)(catted_sequence.data, indices)

    assert_close(actual, excepted)
コード例 #7
0
def _test_backward_approx(layer, data_shape):
    h = 1e-4
    data = np.random.random(data_shape) * 10 - 5
    data[np.abs(data) < h] = 1
    output1 = layer.forward(data + h)
    output2 = layer.forward(data - h)

    output = layer.forward(data)
    previous_partial_gradient = np.ones_like(output)

    output_gradient = layer.backward(previous_partial_gradient)

    utils.assert_close((output1 - output2) / (2 * h), output_gradient)
コード例 #8
0
ファイル: test_catting.py プロジェクト: speedcell4/torchrua
def test_cat_packed_sequence(data, token_sizes, dim, device):
    inputs = [
        torch.randn((token_size, dim), device=device, requires_grad=True)
        for token_size in token_sizes
    ]
    packed_sequence = tgt.pack_sequence(inputs, enforce_sorted=False)

    actual_data, actual_token_sizes = rua.cat_sequence(inputs, device=device)
    expected_data, expected_token_sizes = rua.cat_packed_sequence(
        packed_sequence, device=device)

    assert_close(actual_data, expected_data)
    assert_equal(actual_token_sizes, expected_token_sizes)
    assert_grad_close(actual_data, expected_data, inputs=inputs)
コード例 #9
0
ファイル: test_indexing.py プロジェクト: speedcell4/torchrua
def test_select_last(data, token_sizes, dim, unsort, device):
    inputs = [
        torch.randn((token_size, dim), device=device, requires_grad=True)
        for token_size in token_sizes
    ]
    packed_sequence = pack_sequence(inputs, enforce_sorted=False)

    actual = select_last(sequence=packed_sequence, unsort=unsort)
    if not unsort:
        actual = actual[packed_sequence.unsorted_indices]

    expected = torch.stack([sequence[-1] for sequence in inputs], dim=0)

    assert_close(actual, expected)
    assert_grad_close(actual, expected, inputs=inputs)
コード例 #10
0
ファイル: test_catting.py プロジェクト: speedcell4/torchrua
def test_cat_padded_sequence(data, token_sizes, dim, batch_first, device):
    inputs = [
        torch.randn((token_size, dim), device=device, requires_grad=True)
        for token_size in token_sizes
    ]
    padded_sequence = tgt.pad_sequence(inputs, batch_first=batch_first)
    token_sizes = torch.tensor(token_sizes, device=device)

    actual_data, actual_token_sizes = rua.cat_sequence(inputs, device=device)
    expected_data, expected_token_sizes = rua.cat_padded_sequence(
        padded_sequence, token_sizes, batch_first=batch_first, device=device)

    assert_close(actual_data, expected_data)
    assert_equal(actual_token_sizes, expected_token_sizes)
    assert_grad_close(actual_data, expected_data, inputs=inputs)
コード例 #11
0
def test_pad_packed_sequence(data, token_sizes, dim, batch_first, device):
    inputs = [
        torch.randn((token_size, dim), device=device, requires_grad=True)
        for token_size in token_sizes
    ]
    packed_sequence = tgt.pack_sequence(inputs, enforce_sorted=False)
    excepted_token_sizes = torch.tensor(token_sizes,
                                        device=torch.device('cpu'))

    excepted = tgt.pad_sequence(inputs, batch_first=batch_first)
    actual, actual_token_sizes = rua.pad_packed_sequence(
        packed_sequence, batch_first=batch_first)

    assert_close(actual, excepted)
    assert_grad_close(actual, excepted, inputs=inputs)
    assert_equal(actual_token_sizes, excepted_token_sizes)
コード例 #12
0
def _test_linear_backward(input_shape, out_channels):
    in_channels = input_shape[1]
    input = np.random.random(input_shape).astype(np.float32) * 20
    layer = LinearLayer(in_channels, out_channels)

    torch_layer = nn.Linear(in_channels, out_channels, bias=True)
    utils.assign_linear_layer_weights(layer, torch_layer)

    output = layer.forward(input)
    out_grad = layer.backward(np.ones_like(output) * 2)

    torch_input = utils.from_numpy(input).requires_grad_(True)
    torch_out = torch_layer(torch_input)
    (2 * torch_out).sum().backward()

    utils.assert_close(out_grad, torch_input.grad, atol=TOLERANCE)
    utils.check_linear_grad_match(layer, torch_layer, tolerance=TOLERANCE)
コード例 #13
0
def _test_linear_forward(input_shape, out_channels):
    in_channels = input_shape[1]
    input = np.random.random(input_shape).astype(np.float32) * 20
    original_input = input.copy()
    layer = LinearLayer(in_channels, out_channels)

    torch_layer = nn.Linear(in_channels, out_channels, bias=True)
    utils.assign_linear_layer_weights(layer, torch_layer)

    output = layer.forward(input)

    torch_data = utils.from_numpy(input)
    torch_out = torch_layer(torch_data)

    assert np.all(input == original_input)
    assert output.shape == torch_out.shape
    utils.assert_close(output, torch_out, atol=TOLERANCE)
コード例 #14
0
def test_tree_reduce_packed_sequence(data, token_sizes, dim, device):
    inputs = [
        torch.randn((token_size, dim), device=device, requires_grad=True)
        for token_size in token_sizes
    ]

    excepted = pad_sequence(inputs, device=device).sum(dim=0)

    packed_sequence = pack_sequence(inputs, device=device)
    indices = tree_reduce_packed_indices(
        batch_sizes=packed_sequence.batch_sizes)

    actual = tree_reduce_sequence(torch.add)(packed_sequence.data, indices)
    actual = actual[packed_sequence.unsorted_indices]

    assert_close(actual, excepted)
    assert_grad_close(actual, excepted, inputs=inputs)
コード例 #15
0
def _test_max_pool_backward(input_shape, kernel_size, stride):
    np.random.seed(0)
    torch.manual_seed(0)
    padding = (kernel_size - 1) // 2
    input = np.random.random(input_shape).astype(np.float32) * 20
    layer = MaxPoolLayer(kernel_size, stride)

    torch_layer = nn.MaxPool2d(kernel_size, stride, padding)

    output = layer.forward(input)
    out_grad = layer.backward(2 * np.ones_like(output) / output.size)

    torch_input = utils.from_numpy(input).requires_grad_(True)
    torch_out = torch_layer(torch_input)
    (2 * torch_out.mean()).backward()

    torch_out_grad = utils.to_numpy(torch_input.grad)
    utils.assert_close(out_grad, torch_out_grad, atol=TOLERANCE)
コード例 #16
0
def test_tree_reduce_padded_sequence(data, token_sizes, dim, batch_first,
                                     device):
    inputs = [
        torch.randn((token_size, dim), device=device, requires_grad=True)
        for token_size in token_sizes
    ]

    excepted = pad_sequence(inputs, device=device).sum(dim=0)

    padded_sequence = pad_sequence(inputs,
                                   device=device,
                                   batch_first=batch_first)
    token_sizes = torch.tensor(token_sizes, device=device)
    indices = tree_reduce_padded_indices(token_sizes=token_sizes,
                                         batch_first=batch_first)
    actual = tree_reduce_sequence(torch.add)(padded_sequence.data, indices)

    assert_close(actual, excepted)
コード例 #17
0
ファイル: test_slicing.py プロジェクト: speedcell4/torchrua
def test_chunk_packed_sequence(batch_size, token_sizes, embedding_dim, dim, batch_first, device):
    excepted_sequences = sequences = [
        pack_sequence([
            torch.randn((token_size, embedding_dim), device=device, requires_grad=True)
            for token_size in token_sizes
        ], enforce_sorted=False)
        for _ in range(batch_size)
    ]
    actual_sequences = chunk_packed_sequence(
        sequence=stack_packed_sequences(sequences=sequences, dim=dim),
        chunks=len(sequences), dim=dim,
    )

    for actual_sequence, excepted_sequence in zip(actual_sequences, excepted_sequences):
        actual, actual_token_sizes = pad_packed_sequence(actual_sequence, batch_first=batch_first)
        excepted, excepted_token_sizes = pad_packed_sequence(excepted_sequence, batch_first=batch_first)

        assert_close(actual, excepted)
        assert_equal(actual_token_sizes, excepted_token_sizes)
コード例 #18
0
def _test_max_pool_forward(input_shape, kernel_size, stride):
    np.random.seed(0)
    torch.manual_seed(0)
    padding = (kernel_size - 1) // 2
    input = np.random.random(input_shape).astype(np.float32) * 20
    original_input = input.copy()
    layer = MaxPoolLayer(kernel_size, stride)

    torch_layer = nn.MaxPool2d(kernel_size, stride, padding)
    output = layer.forward(input)

    torch_data = utils.from_numpy(input)
    torch_out = utils.to_numpy(torch_layer(torch_data))
    output[np.abs(output) < 1e-4] = 0
    torch_out[np.abs(torch_out) < 1e-4] = 0

    assert np.all(input == original_input)
    assert output.shape == torch_out.shape
    utils.assert_close(output, torch_out, atol=TOLERANCE)
コード例 #19
0
def _test_forward(input_shape, reduction, axis):
    layer = SoftmaxCrossEntropyLossLayer(reduction=reduction)
    data = np.random.random(input_shape) * 2 - 1
    labels_shape = list(data.shape)
    labels_shape.pop(axis)
    labels = np.random.randint(0, data.shape[axis],
                               labels_shape).astype(np.int64)
    loss = layer(data, labels, axis=axis)
    if axis == 1:
        pytorch_loss = F.cross_entropy(utils.from_numpy(data),
                                       utils.from_numpy(labels),
                                       reduction=reduction)
    else:
        pytorch_loss = F.cross_entropy(utils.from_numpy(data.swapaxes(1,
                                                                      axis)),
                                       utils.from_numpy(labels),
                                       reduction=reduction)
    pytorch_loss = utils.to_numpy(pytorch_loss)

    utils.assert_close(loss, pytorch_loss, atol=0.001)
コード例 #20
0
def test_networks():
    np.random.seed(0)
    torch.manual_seed(0)
    data = np.random.random((100, 1, 28, 28)).astype(np.float32) * 10 - 5
    labels = np.random.randint(0, 10, 100).astype(np.int64)

    net = MNISTResNetwork()
    torch_net = TorchMNISTResNetwork()
    utils.assign_conv_layer_weights(net.layers[0], torch_net.layers[0])
    utils.assign_conv_layer_weights(net.layers[3], torch_net.layers[3])
    utils.assign_conv_layer_weights(net.layers[4].conv_layers[0],
                                    torch_net.layers[4].conv1)
    utils.assign_conv_layer_weights(net.layers[4].conv_layers[2],
                                    torch_net.layers[4].conv2)
    utils.assign_conv_layer_weights(net.layers[5].conv_layers[0],
                                    torch_net.layers[5].conv1)
    utils.assign_conv_layer_weights(net.layers[5].conv_layers[2],
                                    torch_net.layers[5].conv2)
    utils.assign_linear_layer_weights(net.layers[9], torch_net.layers[9])
    utils.assign_linear_layer_weights(net.layers[11], torch_net.layers[11])
    utils.assign_linear_layer_weights(net.layers[13], torch_net.layers[13])

    forward = net(data)

    data_torch = utils.from_numpy(data).requires_grad_(True)
    forward_torch = torch_net(data_torch)

    utils.assert_close(forward, forward_torch)

    loss = net.loss(forward, labels)
    torch_loss = torch_net.loss(forward_torch, utils.from_numpy(labels))

    utils.assert_close(loss, torch_loss)

    out_grad = net.backward()
    torch_loss.backward()

    utils.assert_close(out_grad, data_torch.grad, atol=0.01)

    tolerance = 1e-4
    utils.check_linear_grad_match(net.layers[13],
                                  torch_net.layers[13],
                                  tolerance=tolerance)
    utils.check_linear_grad_match(net.layers[11],
                                  torch_net.layers[11],
                                  tolerance=tolerance)
    utils.check_linear_grad_match(net.layers[9],
                                  torch_net.layers[9],
                                  tolerance=tolerance)
    utils.check_conv_grad_match(net.layers[5].conv_layers[2],
                                torch_net.layers[5].conv2,
                                tolerance=tolerance)
    utils.check_conv_grad_match(net.layers[5].conv_layers[0],
                                torch_net.layers[5].conv1,
                                tolerance=tolerance)
    utils.check_conv_grad_match(net.layers[4].conv_layers[2],
                                torch_net.layers[4].conv2,
                                tolerance=tolerance)
    utils.check_conv_grad_match(net.layers[4].conv_layers[0],
                                torch_net.layers[4].conv1,
                                tolerance=tolerance)
    utils.check_conv_grad_match(net.layers[3],
                                torch_net.layers[3],
                                tolerance=tolerance)
    utils.check_conv_grad_match(net.layers[0],
                                torch_net.layers[0],
                                tolerance=tolerance)