コード例 #1
0
        def forward(self, x):
            x = torch.unsqueeze(x, dim=1)
            patches = slide_window(x, window_width, window_stride)
            B, C, H, Window_W, T = patches.shape

            patches = patches.permute((4, 0, 1, 2, 3))
            # PyTorch's way of TimeDistributed: merge dims T and B
            conv_o1 = self.conv1(patches.contiguous().view(
                T * B, C, H, Window_W))  # (T*B, C, H/2-2, W/2-2)
            conv_out = self.conv2(conv_o1.view(T * B, -1)).view(T, B, 128)

            lstm_out, (h_n, c_n) = self.lstm(conv_out)  # lstm_out: (T, B, 128)
            out_linear = self.linear(lstm_out)  # nn.Linear() allows 3D tensor
            logsoftmax = nn.functional.log_softmax(
                out_linear, dim=2
            )  # logsoftmax should be in shape (T, B, classes) to be consistent with ctc_decode
            input_lengths = torch.Tensor([T] * B).long()

            return logsoftmax, input_lengths
コード例 #2
0
 def slide_window_bound(image,
                        window_width=window_width,
                        window_stride=window_stride):
     return slide_window(image, window_width, window_stride)