Example #1
0
 def test_output_size_dilation(self):
     # Note: padding should be added automatically to have the same output size
     m = ConvBlock(4, 5, dilation=3)
     x = torch.randn(1, 4, 11, 13)
     y = m(PaddedTensor(x, torch.tensor([[11, 13]])))
     self.assertEqual([[11, 13]], y.sizes.tolist())
     self.assertEqual([11, 13], list(y.data.size())[2:])
Example #2
0
 def test_output_size_padded_tensor(self):
     m = ConvBlock(4, 5, kernel_size=3, stride=1, dilation=1, poolsize=2)
     x = torch.randn(3, 4, 11, 13)
     y = m(PaddedTensor(x, torch.tensor([[11, 13], [10, 12], [3, 2]])))
     self.assertEqual(
         [[11 // 2, 13 // 2], [10 // 2, 12 // 2], [3 // 2, 2 // 2]], y.sizes.tolist()
     )
Example #3
0
 def get_conv_output_size(
         size,  # type: Tuple[int, int]
         cnn_kernel_size,  # type: Sequence[Union[int, Tuple[int, int]]]
         cnn_stride,  # type: Sequence[Union[int, Tuple[int, int]]]
         cnn_dilation,  # type: Sequence[Union[int, Tuple[int, int]]]
         cnn_poolsize,  # type: Sequence[Union[int, Tuple[int, int]]]
 ):
     size_h, size_w = size
     for ks, st, di, ps in zip(cnn_kernel_size, cnn_stride, cnn_dilation,
                               cnn_poolsize):
         size_h = ConvBlock.get_output_size(size_h,
                                            kernel_size=ks[0],
                                            dilation=di[0],
                                            stride=st[0],
                                            poolsize=ps[0])
         size_w = ConvBlock.get_output_size(size_w,
                                            kernel_size=ks[1],
                                            dilation=di[1],
                                            stride=st[1],
                                            poolsize=ps[1])
     return size_h, size_w
Example #4
0
    def test_masking(self):
        m = ConvBlock(1, 1, activation=None, use_masks=True)
        # Reset parameters so that the operation does nothing
        for name, param in m.named_parameters():
            param.data.zero_()
            if name == "conv.weight":
                param[:, :, 1, 1] = 1

        x = torch.randn(3, 1, 11, 13)
        y = m(PaddedTensor(x, torch.tensor([[11, 13], [10, 12], [3, 2]]))).data

        # Check sample 1
        torch.testing.assert_allclose(x[0, :, :, :], y[0, :, :, :])
        # Check sample 2
        torch.testing.assert_allclose(x[1, :, :10, :12], y[1, :, :10, :12])
        torch.testing.assert_allclose(torch.zeros(1, 1, 13), y[1, :, 10:, :])
        torch.testing.assert_allclose(torch.zeros(1, 11, 1), y[1, :, :, 12:])
        # Check sample 3
        torch.testing.assert_allclose(x[2, :, :3, :2], y[2, :, :3, :2])
        torch.testing.assert_allclose(torch.zeros(1, 8, 13), y[2, :, 3:, :])
        torch.testing.assert_allclose(torch.zeros(1, 11, 11), y[2, :, :, 2:])
Example #5
0
    def __init__(
            self,
            num_input_channels,  # type: int
            num_output_labels,  # type: int
            cnn_num_features,  # type: Sequence[int]
            cnn_kernel_size,  # type: Sequence[int, Tuple[int, int]]
            cnn_stride,  # type: Sequence[int, Tuple[int, int]]
            cnn_dilation,  # type: Sequence[int, Tuple[int, int]]
            cnn_activation,  # type: Sequence[nn.Module]
            cnn_poolsize,  # type: Sequence[int, Tuple[int, int]]
            cnn_dropout,  # type: Sequence[float]
            cnn_batchnorm,  # type: Sequence[bool]
            image_sequencer,  # type: str
            rnn_units,  # type: int
            rnn_layers,  # type: int
            rnn_dropout,  # type: float
            lin_dropout,  # type: float
            rnn_type=nn.LSTM,  # type: Union[nn.LSTM, nn.GRU, nn.RNN]
            inplace=False,  # type: bool
            vertical_text=False,  # type: bool
            use_masks=False,  # type: bool
    ):
        # type: (...) -> None
        super(LaiaCRNN, self).__init__()
        self._rnn_dropout = rnn_dropout
        self._lin_dropout = lin_dropout

        # Add convolutional blocks, in a VGG style.
        conv_blocks = []
        ni = num_input_channels
        for i, nh, ks, st, di, f, ps, dr, bn in zip(
                count(),
                cnn_num_features,
                cnn_kernel_size,
                cnn_stride,
                cnn_dilation,
                cnn_activation,
                cnn_poolsize,
                cnn_dropout,
                cnn_batchnorm,
        ):
            conv_blocks.append(
                ConvBlock(
                    in_channels=ni,
                    out_channels=nh,
                    kernel_size=ks,
                    stride=st,
                    dilation=di,
                    activation=f,
                    poolsize=ps,
                    dropout=dr,
                    batchnorm=bn,
                    inplace=inplace,
                    use_masks=use_masks,
                ))
            ni = nh
        self.conv = nn.Sequential(*conv_blocks)
        # Add sequencer module to convert an image into a sequence
        self.sequencer = ImagePoolingSequencer(sequencer=image_sequencer,
                                               columnwise=not vertical_text)
        # Add bidirectional rnn
        self.rnn = rnn_type(
            ni * self.sequencer.fix_size,
            rnn_units,
            rnn_layers,
            dropout=rnn_dropout,
            bidirectional=True,
            batch_first=False,
        )
        self.rnn.flatten_parameters()
        # Add final linear layer
        self.linear = nn.Linear(2 * rnn_units, num_output_labels)
Example #6
0
 def test_output_size_stride(self):
     m = ConvBlock(4, 5, stride=2)
     x = torch.randn(1, 4, 11, 13)
     y = m(PaddedTensor(x, torch.tensor([[11, 13]])))
     self.assertEqual([[11 // 2 + 1, 13 // 2 + 1]], y.sizes.tolist())
     self.assertEqual([11 // 2 + 1, 13 // 2 + 1], list(y.data.size())[2:])
Example #7
0
 def test_output_size_no_pool(self):
     m = ConvBlock(4, 5, poolsize=0)
     x = torch.randn(1, 4, 11, 13)
     y = m(PaddedTensor(x, torch.tensor([[11, 13]])))
     self.assertEqual([[11, 13]], y.sizes.tolist())
     self.assertEqual([11, 13], list(y.data.size())[2:])
Example #8
0
 def test_output_size(self):
     m = ConvBlock(4, 5, kernel_size=3, stride=1, dilation=1, poolsize=2)
     x = torch.randn(3, 4, 11, 13)
     y = m(x)
     self.assertEqual((3, 5, 11 // 2, 13 // 2), tuple(y.size()))