def test_backprop(dtype, device, kwargs): # Note: this only checks that the gradient w.r.t. all layers is different from zero. m = ConvBlock(**kwargs).to(device, dtype=dtype).train() # Convert batch input and batch sizes to appropriate type x = torch.randn(2, kwargs["in_channels"], 17, 19, device=device, dtype=dtype) xs = torch.tensor([[13, 19], [17, 13]], device=device) # Check model for normal tensor inputs m.zero_grad() cost = m(x).sum() cost.backward() for n, p in m.named_parameters(): assert p.grad is not None, f"Parameter {n} does not have a gradient" sp = torch.abs(p.grad).sum() assert not torch.allclose( sp, torch.tensor(0, dtype=dtype) ), f"Gradients for parameter {n} are close to 0 ({sp:g})" # Check model for padded tensor inputs m.zero_grad() cost = padded_cost_function(m(PaddedTensor(x, xs))) cost.backward() for n, p in m.named_parameters(): assert p.grad is not None, f"Parameter {n} does not have a gradient" sp = torch.abs(p.grad).sum() assert not torch.allclose( sp, torch.tensor(0, dtype=dtype) ), f"Gradients for parameter {n} are close to 0 ({sp:g})"
def test_output_size_dilation(self): # Note: padding should be added automatically to have the same output size m = ConvBlock(4, 5, dilation=3) x = torch.randn(1, 4, 11, 13) y = m(PaddedTensor(x, torch.tensor([[11, 13]]))) self.assertEqual([[11, 13]], y.sizes.tolist()) self.assertEqual([11, 13], list(y.data.size())[2:])
def test_output_size_padded_tensor(self): m = ConvBlock(4, 5, kernel_size=3, stride=1, dilation=1, poolsize=2) x = torch.randn(3, 4, 11, 13) y = m(PaddedTensor(x, torch.tensor([[11, 13], [10, 12], [3, 2]]))) self.assertEqual( [[11 // 2, 13 // 2], [10 // 2, 12 // 2], [3 // 2, 2 // 2]], y.sizes.tolist())
def get_conv_output_size( size: Param2d, cnn_kernel_size: Sequence[ParamNd], cnn_stride: Sequence[ParamNd], cnn_dilation: Sequence[ParamNd], cnn_poolsize: Sequence[ParamNd], ) -> Tuple[Union[torch.LongTensor, int]]: size_h, size_w = size for ks, st, di, ps in zip( cnn_kernel_size, cnn_stride, cnn_dilation, cnn_poolsize ): size_h = ConvBlock.get_output_size( size_h, kernel_size=ks[0], dilation=di[0], stride=st[0], poolsize=ps[0] ) size_w = ConvBlock.get_output_size( size_w, kernel_size=ks[1], dilation=di[1], stride=st[1], poolsize=ps[1] ) return size_h, size_w
def test_masking(self): m = ConvBlock(1, 1, activation=None, use_masks=True) # Reset parameters so that the operation does nothing for name, param in m.named_parameters(): param.data.zero_() if name == "conv.weight": param[:, :, 1, 1] = 1 x = torch.randn(3, 1, 11, 13) y = m(PaddedTensor(x, torch.tensor([[11, 13], [10, 12], [3, 2]]))).data # Check sample 1 torch.testing.assert_allclose(x[0, :, :, :], y[0, :, :, :]) # Check sample 2 torch.testing.assert_allclose(x[1, :, :10, :12], y[1, :, :10, :12]) torch.testing.assert_allclose(torch.zeros(1, 1, 13), y[1, :, 10:, :]) torch.testing.assert_allclose(torch.zeros(1, 11, 1), y[1, :, :, 12:]) # Check sample 3 torch.testing.assert_allclose(x[2, :, :3, :2], y[2, :, :3, :2]) torch.testing.assert_allclose(torch.zeros(1, 8, 13), y[2, :, 3:, :]) torch.testing.assert_allclose(torch.zeros(1, 11, 11), y[2, :, :, 2:])
def test_output_size_stride(self): m = ConvBlock(4, 5, stride=2) x = torch.randn(1, 4, 11, 13) y = m(PaddedTensor(x, torch.tensor([[11, 13]]))) self.assertEqual([[11 // 2 + 1, 13 // 2 + 1]], y.sizes.tolist()) self.assertEqual([11 // 2 + 1, 13 // 2 + 1], list(y.data.size())[2:])
def test_output_size_no_pool(self): m = ConvBlock(4, 5, poolsize=0) x = torch.randn(1, 4, 11, 13) y = m(PaddedTensor(x, torch.tensor([[11, 13]]))) self.assertEqual([[11, 13]], y.sizes.tolist()) self.assertEqual([11, 13], list(y.data.size())[2:])
def test_output_size(self): m = ConvBlock(4, 5, kernel_size=3, stride=1, dilation=1, poolsize=2) x = torch.randn(3, 4, 11, 13) y = m(x) self.assertEqual((3, 5, 11 // 2, 13 // 2), tuple(y.size()))
def __init__( self, num_input_channels: int, num_output_labels: int, cnn_num_features: Sequence[int], cnn_kernel_size: Sequence[Param2d], cnn_stride: Sequence[Param2d], cnn_dilation: Sequence[Param2d], cnn_activation: Sequence[Type[nn.Module]], cnn_poolsize: Sequence[Param2d], cnn_dropout: Sequence[float], cnn_batchnorm: Sequence[bool], image_sequencer: str, rnn_units: int, rnn_layers: int, rnn_dropout: float, lin_dropout: float, rnn_type: Union[nn.LSTM, nn.GRU, nn.RNN] = nn.LSTM, inplace: bool = False, vertical_text: bool = False, use_masks: bool = False, ) -> None: super().__init__() self._rnn_dropout = rnn_dropout self._lin_dropout = lin_dropout # Add convolutional blocks, in a VGG style. conv_blocks = [] ni = num_input_channels for i, nh, ks, st, di, f, ps, dr, bn in zip( count(), cnn_num_features, cnn_kernel_size, cnn_stride, cnn_dilation, cnn_activation, cnn_poolsize, cnn_dropout, cnn_batchnorm, ): conv_blocks.append( ConvBlock( in_channels=ni, out_channels=nh, kernel_size=ks, stride=st, dilation=di, activation=f, poolsize=ps, dropout=dr, batchnorm=bn, inplace=inplace, use_masks=use_masks, ) ) ni = nh self.conv = nn.Sequential(*conv_blocks) # Add sequencer module to convert an image into a sequence self.sequencer = ImagePoolingSequencer( sequencer=image_sequencer, columnwise=not vertical_text ) # Add bidirectional rnn self.rnn = rnn_type( ni * self.sequencer.fix_size, rnn_units, rnn_layers, dropout=rnn_dropout, bidirectional=True, batch_first=False, ) self.rnn.flatten_parameters() # Add final linear layer self.linear = nn.Linear(2 * rnn_units, num_output_labels)