def test_output_size_dilation(self): # Note: padding should be added automatically to have the same output size m = ConvBlock(4, 5, dilation=3) x = torch.randn(1, 4, 11, 13) y = m(PaddedTensor(x, torch.tensor([[11, 13]]))) self.assertEqual([[11, 13]], y.sizes.tolist()) self.assertEqual([11, 13], list(y.data.size())[2:])
def test_output_size_padded_tensor(self): m = ConvBlock(4, 5, kernel_size=3, stride=1, dilation=1, poolsize=2) x = torch.randn(3, 4, 11, 13) y = m(PaddedTensor(x, torch.tensor([[11, 13], [10, 12], [3, 2]]))) self.assertEqual( [[11 // 2, 13 // 2], [10 // 2, 12 // 2], [3 // 2, 2 // 2]], y.sizes.tolist() )
def get_conv_output_size( size, # type: Tuple[int, int] cnn_kernel_size, # type: Sequence[Union[int, Tuple[int, int]]] cnn_stride, # type: Sequence[Union[int, Tuple[int, int]]] cnn_dilation, # type: Sequence[Union[int, Tuple[int, int]]] cnn_poolsize, # type: Sequence[Union[int, Tuple[int, int]]] ): size_h, size_w = size for ks, st, di, ps in zip(cnn_kernel_size, cnn_stride, cnn_dilation, cnn_poolsize): size_h = ConvBlock.get_output_size(size_h, kernel_size=ks[0], dilation=di[0], stride=st[0], poolsize=ps[0]) size_w = ConvBlock.get_output_size(size_w, kernel_size=ks[1], dilation=di[1], stride=st[1], poolsize=ps[1]) return size_h, size_w
def test_masking(self): m = ConvBlock(1, 1, activation=None, use_masks=True) # Reset parameters so that the operation does nothing for name, param in m.named_parameters(): param.data.zero_() if name == "conv.weight": param[:, :, 1, 1] = 1 x = torch.randn(3, 1, 11, 13) y = m(PaddedTensor(x, torch.tensor([[11, 13], [10, 12], [3, 2]]))).data # Check sample 1 torch.testing.assert_allclose(x[0, :, :, :], y[0, :, :, :]) # Check sample 2 torch.testing.assert_allclose(x[1, :, :10, :12], y[1, :, :10, :12]) torch.testing.assert_allclose(torch.zeros(1, 1, 13), y[1, :, 10:, :]) torch.testing.assert_allclose(torch.zeros(1, 11, 1), y[1, :, :, 12:]) # Check sample 3 torch.testing.assert_allclose(x[2, :, :3, :2], y[2, :, :3, :2]) torch.testing.assert_allclose(torch.zeros(1, 8, 13), y[2, :, 3:, :]) torch.testing.assert_allclose(torch.zeros(1, 11, 11), y[2, :, :, 2:])
def __init__( self, num_input_channels, # type: int num_output_labels, # type: int cnn_num_features, # type: Sequence[int] cnn_kernel_size, # type: Sequence[int, Tuple[int, int]] cnn_stride, # type: Sequence[int, Tuple[int, int]] cnn_dilation, # type: Sequence[int, Tuple[int, int]] cnn_activation, # type: Sequence[nn.Module] cnn_poolsize, # type: Sequence[int, Tuple[int, int]] cnn_dropout, # type: Sequence[float] cnn_batchnorm, # type: Sequence[bool] image_sequencer, # type: str rnn_units, # type: int rnn_layers, # type: int rnn_dropout, # type: float lin_dropout, # type: float rnn_type=nn.LSTM, # type: Union[nn.LSTM, nn.GRU, nn.RNN] inplace=False, # type: bool vertical_text=False, # type: bool use_masks=False, # type: bool ): # type: (...) -> None super(LaiaCRNN, self).__init__() self._rnn_dropout = rnn_dropout self._lin_dropout = lin_dropout # Add convolutional blocks, in a VGG style. conv_blocks = [] ni = num_input_channels for i, nh, ks, st, di, f, ps, dr, bn in zip( count(), cnn_num_features, cnn_kernel_size, cnn_stride, cnn_dilation, cnn_activation, cnn_poolsize, cnn_dropout, cnn_batchnorm, ): conv_blocks.append( ConvBlock( in_channels=ni, out_channels=nh, kernel_size=ks, stride=st, dilation=di, activation=f, poolsize=ps, dropout=dr, batchnorm=bn, inplace=inplace, use_masks=use_masks, )) ni = nh self.conv = nn.Sequential(*conv_blocks) # Add sequencer module to convert an image into a sequence self.sequencer = ImagePoolingSequencer(sequencer=image_sequencer, columnwise=not vertical_text) # Add bidirectional rnn self.rnn = rnn_type( ni * self.sequencer.fix_size, rnn_units, rnn_layers, dropout=rnn_dropout, bidirectional=True, batch_first=False, ) self.rnn.flatten_parameters() # Add final linear layer self.linear = nn.Linear(2 * rnn_units, num_output_labels)
def test_output_size_stride(self): m = ConvBlock(4, 5, stride=2) x = torch.randn(1, 4, 11, 13) y = m(PaddedTensor(x, torch.tensor([[11, 13]]))) self.assertEqual([[11 // 2 + 1, 13 // 2 + 1]], y.sizes.tolist()) self.assertEqual([11 // 2 + 1, 13 // 2 + 1], list(y.data.size())[2:])
def test_output_size_no_pool(self): m = ConvBlock(4, 5, poolsize=0) x = torch.randn(1, 4, 11, 13) y = m(PaddedTensor(x, torch.tensor([[11, 13]]))) self.assertEqual([[11, 13]], y.sizes.tolist()) self.assertEqual([11, 13], list(y.data.size())[2:])
def test_output_size(self): m = ConvBlock(4, 5, kernel_size=3, stride=1, dilation=1, poolsize=2) x = torch.randn(3, 4, 11, 13) y = m(x) self.assertEqual((3, 5, 11 // 2, 13 // 2), tuple(y.size()))