def test_backprop(dtype, device, kwargs):
    # Note: this only checks that the gradient w.r.t. all layers is different from zero.
    m = ConvBlock(**kwargs).to(device, dtype=dtype).train()
    # Convert batch input and batch sizes to appropriate type
    x = torch.randn(2,
                    kwargs["in_channels"],
                    17,
                    19,
                    device=device,
                    dtype=dtype)
    xs = torch.tensor([[13, 19], [17, 13]], device=device)

    # Check model for normal tensor inputs
    m.zero_grad()
    cost = m(x).sum()
    cost.backward()
    for n, p in m.named_parameters():
        assert p.grad is not None, f"Parameter {n} does not have a gradient"
        sp = torch.abs(p.grad).sum()
        assert not torch.allclose(
            sp, torch.tensor(0, dtype=dtype)
        ), f"Gradients for parameter {n} are close to 0 ({sp:g})"

    # Check model for padded tensor inputs
    m.zero_grad()
    cost = padded_cost_function(m(PaddedTensor(x, xs)))
    cost.backward()
    for n, p in m.named_parameters():
        assert p.grad is not None, f"Parameter {n} does not have a gradient"
        sp = torch.abs(p.grad).sum()
        assert not torch.allclose(
            sp, torch.tensor(0, dtype=dtype)
        ), f"Gradients for parameter {n} are close to 0 ({sp:g})"
 def test_output_size_dilation(self):
     # Note: padding should be added automatically to have the same output size
     m = ConvBlock(4, 5, dilation=3)
     x = torch.randn(1, 4, 11, 13)
     y = m(PaddedTensor(x, torch.tensor([[11, 13]])))
     self.assertEqual([[11, 13]], y.sizes.tolist())
     self.assertEqual([11, 13], list(y.data.size())[2:])
 def test_output_size_padded_tensor(self):
     m = ConvBlock(4, 5, kernel_size=3, stride=1, dilation=1, poolsize=2)
     x = torch.randn(3, 4, 11, 13)
     y = m(PaddedTensor(x, torch.tensor([[11, 13], [10, 12], [3, 2]])))
     self.assertEqual(
         [[11 // 2, 13 // 2], [10 // 2, 12 // 2], [3 // 2, 2 // 2]],
         y.sizes.tolist())
Exemple #4
0
 def get_conv_output_size(
     size: Param2d,
     cnn_kernel_size: Sequence[ParamNd],
     cnn_stride: Sequence[ParamNd],
     cnn_dilation: Sequence[ParamNd],
     cnn_poolsize: Sequence[ParamNd],
 ) -> Tuple[Union[torch.LongTensor, int]]:
     size_h, size_w = size
     for ks, st, di, ps in zip(
         cnn_kernel_size, cnn_stride, cnn_dilation, cnn_poolsize
     ):
         size_h = ConvBlock.get_output_size(
             size_h, kernel_size=ks[0], dilation=di[0], stride=st[0], poolsize=ps[0]
         )
         size_w = ConvBlock.get_output_size(
             size_w, kernel_size=ks[1], dilation=di[1], stride=st[1], poolsize=ps[1]
         )
     return size_h, size_w
    def test_masking(self):
        m = ConvBlock(1, 1, activation=None, use_masks=True)
        # Reset parameters so that the operation does nothing
        for name, param in m.named_parameters():
            param.data.zero_()
            if name == "conv.weight":
                param[:, :, 1, 1] = 1

        x = torch.randn(3, 1, 11, 13)
        y = m(PaddedTensor(x, torch.tensor([[11, 13], [10, 12], [3, 2]]))).data

        # Check sample 1
        torch.testing.assert_allclose(x[0, :, :, :], y[0, :, :, :])
        # Check sample 2
        torch.testing.assert_allclose(x[1, :, :10, :12], y[1, :, :10, :12])
        torch.testing.assert_allclose(torch.zeros(1, 1, 13), y[1, :, 10:, :])
        torch.testing.assert_allclose(torch.zeros(1, 11, 1), y[1, :, :, 12:])
        # Check sample 3
        torch.testing.assert_allclose(x[2, :, :3, :2], y[2, :, :3, :2])
        torch.testing.assert_allclose(torch.zeros(1, 8, 13), y[2, :, 3:, :])
        torch.testing.assert_allclose(torch.zeros(1, 11, 11), y[2, :, :, 2:])
 def test_output_size_stride(self):
     m = ConvBlock(4, 5, stride=2)
     x = torch.randn(1, 4, 11, 13)
     y = m(PaddedTensor(x, torch.tensor([[11, 13]])))
     self.assertEqual([[11 // 2 + 1, 13 // 2 + 1]], y.sizes.tolist())
     self.assertEqual([11 // 2 + 1, 13 // 2 + 1], list(y.data.size())[2:])
 def test_output_size_no_pool(self):
     m = ConvBlock(4, 5, poolsize=0)
     x = torch.randn(1, 4, 11, 13)
     y = m(PaddedTensor(x, torch.tensor([[11, 13]])))
     self.assertEqual([[11, 13]], y.sizes.tolist())
     self.assertEqual([11, 13], list(y.data.size())[2:])
 def test_output_size(self):
     m = ConvBlock(4, 5, kernel_size=3, stride=1, dilation=1, poolsize=2)
     x = torch.randn(3, 4, 11, 13)
     y = m(x)
     self.assertEqual((3, 5, 11 // 2, 13 // 2), tuple(y.size()))
Exemple #9
0
    def __init__(
        self,
        num_input_channels: int,
        num_output_labels: int,
        cnn_num_features: Sequence[int],
        cnn_kernel_size: Sequence[Param2d],
        cnn_stride: Sequence[Param2d],
        cnn_dilation: Sequence[Param2d],
        cnn_activation: Sequence[Type[nn.Module]],
        cnn_poolsize: Sequence[Param2d],
        cnn_dropout: Sequence[float],
        cnn_batchnorm: Sequence[bool],
        image_sequencer: str,
        rnn_units: int,
        rnn_layers: int,
        rnn_dropout: float,
        lin_dropout: float,
        rnn_type: Union[nn.LSTM, nn.GRU, nn.RNN] = nn.LSTM,
        inplace: bool = False,
        vertical_text: bool = False,
        use_masks: bool = False,
    ) -> None:
        super().__init__()
        self._rnn_dropout = rnn_dropout
        self._lin_dropout = lin_dropout

        # Add convolutional blocks, in a VGG style.
        conv_blocks = []
        ni = num_input_channels
        for i, nh, ks, st, di, f, ps, dr, bn in zip(
            count(),
            cnn_num_features,
            cnn_kernel_size,
            cnn_stride,
            cnn_dilation,
            cnn_activation,
            cnn_poolsize,
            cnn_dropout,
            cnn_batchnorm,
        ):
            conv_blocks.append(
                ConvBlock(
                    in_channels=ni,
                    out_channels=nh,
                    kernel_size=ks,
                    stride=st,
                    dilation=di,
                    activation=f,
                    poolsize=ps,
                    dropout=dr,
                    batchnorm=bn,
                    inplace=inplace,
                    use_masks=use_masks,
                )
            )
            ni = nh
        self.conv = nn.Sequential(*conv_blocks)
        # Add sequencer module to convert an image into a sequence
        self.sequencer = ImagePoolingSequencer(
            sequencer=image_sequencer, columnwise=not vertical_text
        )
        # Add bidirectional rnn
        self.rnn = rnn_type(
            ni * self.sequencer.fix_size,
            rnn_units,
            rnn_layers,
            dropout=rnn_dropout,
            bidirectional=True,
            batch_first=False,
        )
        self.rnn.flatten_parameters()
        # Add final linear layer
        self.linear = nn.Linear(2 * rnn_units, num_output_labels)