Esempio n. 1
0
def expected_seq_size(
    seq_size: int,  # input sequence size
    padding: str,  # conv1d padding: 'same' or 'valid'
    kernel_size: int,  # conv1d kernel size
    stride: int,  # conv1d stride
    dilation: int,  # conv1d dilation rate
    pool_size: Union[None, int],  # pooling layer kernel size
    pool_padding: str,  # pooling layer padding: 'same' or 'valid'
    pool_stride: int,  # pooling layer stride
) -> int:
    # output shape for the convolutional layer
    output_seq_size = get_img_output_shape(
        img_height=0,  # img_height set to zero for 1D structure
        img_width=seq_size,  # img_width equates to sequence size
        kernel_size=kernel_size,
        stride=stride,
        padding=padding,
        dilation=dilation,
    )
    if pool_size is not None:
        # pooling layer present, adjust expected output shape for pooling layer
        output_seq_size = get_img_output_shape(
            img_height=0,  # img_height set to zero for 1D structure
            img_width=output_seq_size[1],  # img_width equates to sequence size
            kernel_size=pool_size,
            stride=pool_stride,
            padding=pool_padding,
            dilation=1,  # pooling layer only support unit dilation
        )
    return output_seq_size[1]
Esempio n. 2
0
    def __init__(
        self,
        img_height: int,
        img_width: int,
        in_channels: int,
        out_channels=256,
        kernel_size=3,
        stride=1,
        dilation=1,
        groups=1,
        use_bias=False,
    ):
        super().__init__()

        self.layers = torch.nn.ModuleList()
        self._input_shape = (in_channels, img_height, img_width)

        padding = "same"
        if stride > 1:
            padding = (kernel_size - 1) // 2

        self.layers.append(
            nn.Conv2d(
                in_channels=in_channels,
                out_channels=out_channels,
                kernel_size=kernel_size,
                stride=stride,
                padding=padding,
                dilation=dilation,
                groups=groups,
                bias=use_bias,
            ))
        img_height, img_width = get_img_output_shape(
            img_height=img_height,
            img_width=img_width,
            kernel_size=kernel_size,
            stride=stride,
            padding=padding,
            dilation=dilation,
        )

        for layer in self.layers:
            logger.debug(f"   {layer._get_name()}")

        self._output_shape = (out_channels, img_height, img_width)
Esempio n. 3
0
    def __init__(
        self,
        img_height: int,
        img_width: int,
        in_channels: int,
        out_channels: int = 256,
        kernel_size: Union[int, Tuple[int]] = 3,
        stride: Union[int, Tuple[int]] = 1,
        padding: Union[int, Tuple[int], str] = "valid",
        dilation: Union[int, Tuple[int]] = 1,
        groups: int = 1,
        use_bias: bool = True,
        padding_mode: str = "zeros",
        norm: Optional[str] = None,
        norm_params: Optional[Dict[str, Any]] = None,
        activation: str = "relu",
        dropout: float = 0,
        pool_function: int = "max",
        pool_kernel_size: Union[int, Tuple[int]] = None,
        pool_stride: Optional[int] = None,
        pool_padding: Union[int, Tuple[int]] = 0,
        pool_dilation: Union[int, Tuple[int]] = 1,
    ):
        super().__init__()

        self.layers = torch.nn.ModuleList()

        self._input_shape = (in_channels, img_height, img_width)
        pool_stride = pool_stride or pool_kernel_size

        self.layers.append(
            nn.Conv2d(
                in_channels=in_channels,
                out_channels=out_channels,
                kernel_size=kernel_size,
                stride=stride,
                padding=padding,
                dilation=dilation,
                groups=groups,
                bias=use_bias,
                padding_mode=padding_mode,
            ))
        out_height, out_width = get_img_output_shape(img_height, img_width,
                                                     kernel_size, stride,
                                                     padding, dilation)

        if norm and norm_params is None:
            norm_params = {}
        if norm == "batch":
            # Batch norm over channels
            self.layers.append(
                nn.BatchNorm2d(num_features=out_channels, **norm_params))
        elif norm == "layer":
            # Layer norm over image height and width
            self.layers.append(
                nn.LayerNorm(normalized_shape=(out_height, out_width),
                             **norm_params))

        self.layers.append(get_activation(activation))

        if dropout > 0:
            self.layers.append(nn.Dropout(dropout))

        if pool_kernel_size is not None:
            pool = partial(nn.MaxPool2d, dilation=pool_dilation)
            if pool_function in {"average", "avg", "mean"}:
                pool = nn.AvgPool2d
            self.layers.append(
                pool(kernel_size=pool_kernel_size,
                     stride=pool_stride,
                     padding=pool_padding))
            out_height, out_width = get_img_output_shape(
                img_height=out_height,
                img_width=out_width,
                kernel_size=pool_kernel_size,
                stride=pool_stride,
                padding=pool_padding,
                dilation=pool_dilation,
            )

        for layer in self.layers:
            logger.debug(f"   {layer._get_name()}")

        self._output_shape = (out_channels, out_height, out_width)
Esempio n. 4
0
    def __init__(
        self,
        img_height: int,
        img_width: int,
        first_in_channels: int,
        out_channels: int,
        resnet_size: int = 34,
        kernel_size: Union[int, Tuple[int]] = 7,
        conv_stride: Union[int, Tuple[int]] = 2,
        first_pool_kernel_size: Union[int, Tuple[int]] = 3,
        first_pool_stride: Union[int, Tuple[int]] = 2,
        block_sizes: List[int] = None,
        block_strides: List[Union[int, Tuple[int]]] = None,
        batch_norm_momentum: float = 0.1,
        batch_norm_epsilon: float = 0.001,
    ):
        """Creates a model obtaining an image representation.

        Implements ResNet v2:
        Identity Mappings in Deep Residual Networks
        https://arxiv.org/pdf/1603.05027.pdf
        by Kaiming He, Xiangyu Zhang, Shaoqing Ren, and Jian Sun, Jul 2016.

        Args:
          resnet_size: A single integer for the size of the ResNet model.
          is_bottleneck: Use regular blocks or bottleneck blocks.
          out_channels: The number of filters to use for the first block layer
            of the model. This number is then doubled for each subsequent block
            layer.
          kernel_size: The kernel size to use for convolution.
          conv_stride: stride size for the initial convolutional layer
          first_pool_kernel_size: Pool size to be used for the first pooling layer.
            If none, the first pooling layer is skipped.
          first_pool_stride: stride size for the first pooling layer. Not used
            if first_pool_kernel_size is None.
          block_sizes: A list containing n values, where n is the number of sets of
            block layers desired. Each value should be the number of blocks in the
            i-th set.
          block_strides: List of integers representing the desired stride size for
            each of the sets of block layers. Should be same length as block_sizes.
        Raises:
          ValueError: if invalid version is selected.
        """
        super().__init__()

        self._input_shape = (first_in_channels, img_height, img_width)

        is_bottleneck = self.get_is_bottleneck(resnet_size, block_sizes)
        block_class = self.get_block_fn(is_bottleneck)
        block_sizes, block_strides = self.get_blocks(resnet_size, block_sizes,
                                                     block_strides)

        self.layers = torch.nn.ModuleList()
        self.layers.append(
            Conv2DLayerFixedPadding(
                img_height=img_height,
                img_width=img_width,
                in_channels=first_in_channels,
                out_channels=out_channels,
                kernel_size=kernel_size,
                stride=conv_stride,
            ))
        in_channels, img_height, img_width = self.layers[-1].output_shape
        self.layers.append(
            nn.BatchNorm2d(num_features=out_channels,
                           eps=batch_norm_epsilon,
                           momentum=batch_norm_momentum))
        self.layers.append(get_activation("relu"))

        if first_pool_kernel_size:
            self.layers.append(
                nn.MaxPool2d(kernel_size=first_pool_kernel_size,
                             stride=first_pool_stride,
                             padding=1))
            img_height, img_width = get_img_output_shape(
                img_height=img_height,
                img_width=img_width,
                kernel_size=first_pool_kernel_size,
                stride=first_pool_stride,
                padding=1,
                dilation=1,
            )

        for i, num_blocks in enumerate(block_sizes):
            self.layers.append(
                ResNetBlockLayer(
                    img_height=img_height,
                    img_width=img_width,
                    first_in_channels=in_channels,
                    out_channels=out_channels,
                    is_bottleneck=is_bottleneck,
                    block_fn=block_class,
                    num_blocks=num_blocks,
                    stride=block_strides[i],
                    batch_norm_momentum=batch_norm_momentum,
                    batch_norm_epsilon=batch_norm_epsilon,
                ))
            out_channels *= 2
            in_channels, img_height, img_width = self.layers[-1].output_shape

        for layer in self.layers:
            logger.debug(f"   {layer._get_name()}")

        self._output_shape = (in_channels, img_height, img_width)