def expected_seq_size( seq_size: int, # input sequence size padding: str, # conv1d padding: 'same' or 'valid' kernel_size: int, # conv1d kernel size stride: int, # conv1d stride dilation: int, # conv1d dilation rate pool_size: Union[None, int], # pooling layer kernel size pool_padding: str, # pooling layer padding: 'same' or 'valid' pool_stride: int, # pooling layer stride ) -> int: # output shape for the convolutional layer output_seq_size = get_img_output_shape( img_height=0, # img_height set to zero for 1D structure img_width=seq_size, # img_width equates to sequence size kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, ) if pool_size is not None: # pooling layer present, adjust expected output shape for pooling layer output_seq_size = get_img_output_shape( img_height=0, # img_height set to zero for 1D structure img_width=output_seq_size[1], # img_width equates to sequence size kernel_size=pool_size, stride=pool_stride, padding=pool_padding, dilation=1, # pooling layer only support unit dilation ) return output_seq_size[1]
def __init__( self, img_height: int, img_width: int, in_channels: int, out_channels=256, kernel_size=3, stride=1, dilation=1, groups=1, use_bias=False, ): super().__init__() self.layers = torch.nn.ModuleList() self._input_shape = (in_channels, img_height, img_width) padding = "same" if stride > 1: padding = (kernel_size - 1) // 2 self.layers.append( nn.Conv2d( in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, groups=groups, bias=use_bias, )) img_height, img_width = get_img_output_shape( img_height=img_height, img_width=img_width, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, ) for layer in self.layers: logger.debug(f" {layer._get_name()}") self._output_shape = (out_channels, img_height, img_width)
def __init__( self, img_height: int, img_width: int, in_channels: int, out_channels: int = 256, kernel_size: Union[int, Tuple[int]] = 3, stride: Union[int, Tuple[int]] = 1, padding: Union[int, Tuple[int], str] = "valid", dilation: Union[int, Tuple[int]] = 1, groups: int = 1, use_bias: bool = True, padding_mode: str = "zeros", norm: Optional[str] = None, norm_params: Optional[Dict[str, Any]] = None, activation: str = "relu", dropout: float = 0, pool_function: int = "max", pool_kernel_size: Union[int, Tuple[int]] = None, pool_stride: Optional[int] = None, pool_padding: Union[int, Tuple[int]] = 0, pool_dilation: Union[int, Tuple[int]] = 1, ): super().__init__() self.layers = torch.nn.ModuleList() self._input_shape = (in_channels, img_height, img_width) pool_stride = pool_stride or pool_kernel_size self.layers.append( nn.Conv2d( in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, groups=groups, bias=use_bias, padding_mode=padding_mode, )) out_height, out_width = get_img_output_shape(img_height, img_width, kernel_size, stride, padding, dilation) if norm and norm_params is None: norm_params = {} if norm == "batch": # Batch norm over channels self.layers.append( nn.BatchNorm2d(num_features=out_channels, **norm_params)) elif norm == "layer": # Layer norm over image height and width self.layers.append( nn.LayerNorm(normalized_shape=(out_height, out_width), **norm_params)) self.layers.append(get_activation(activation)) if dropout > 0: self.layers.append(nn.Dropout(dropout)) if pool_kernel_size is not None: pool = partial(nn.MaxPool2d, dilation=pool_dilation) if pool_function in {"average", "avg", "mean"}: pool = nn.AvgPool2d self.layers.append( pool(kernel_size=pool_kernel_size, stride=pool_stride, padding=pool_padding)) out_height, out_width = get_img_output_shape( img_height=out_height, img_width=out_width, kernel_size=pool_kernel_size, stride=pool_stride, padding=pool_padding, dilation=pool_dilation, ) for layer in self.layers: logger.debug(f" {layer._get_name()}") self._output_shape = (out_channels, out_height, out_width)
def __init__( self, img_height: int, img_width: int, first_in_channels: int, out_channels: int, resnet_size: int = 34, kernel_size: Union[int, Tuple[int]] = 7, conv_stride: Union[int, Tuple[int]] = 2, first_pool_kernel_size: Union[int, Tuple[int]] = 3, first_pool_stride: Union[int, Tuple[int]] = 2, block_sizes: List[int] = None, block_strides: List[Union[int, Tuple[int]]] = None, batch_norm_momentum: float = 0.1, batch_norm_epsilon: float = 0.001, ): """Creates a model obtaining an image representation. Implements ResNet v2: Identity Mappings in Deep Residual Networks https://arxiv.org/pdf/1603.05027.pdf by Kaiming He, Xiangyu Zhang, Shaoqing Ren, and Jian Sun, Jul 2016. Args: resnet_size: A single integer for the size of the ResNet model. is_bottleneck: Use regular blocks or bottleneck blocks. out_channels: The number of filters to use for the first block layer of the model. This number is then doubled for each subsequent block layer. kernel_size: The kernel size to use for convolution. conv_stride: stride size for the initial convolutional layer first_pool_kernel_size: Pool size to be used for the first pooling layer. If none, the first pooling layer is skipped. first_pool_stride: stride size for the first pooling layer. Not used if first_pool_kernel_size is None. block_sizes: A list containing n values, where n is the number of sets of block layers desired. Each value should be the number of blocks in the i-th set. block_strides: List of integers representing the desired stride size for each of the sets of block layers. Should be same length as block_sizes. Raises: ValueError: if invalid version is selected. """ super().__init__() self._input_shape = (first_in_channels, img_height, img_width) is_bottleneck = self.get_is_bottleneck(resnet_size, block_sizes) block_class = self.get_block_fn(is_bottleneck) block_sizes, block_strides = self.get_blocks(resnet_size, block_sizes, block_strides) self.layers = torch.nn.ModuleList() self.layers.append( Conv2DLayerFixedPadding( img_height=img_height, img_width=img_width, in_channels=first_in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=conv_stride, )) in_channels, img_height, img_width = self.layers[-1].output_shape self.layers.append( nn.BatchNorm2d(num_features=out_channels, eps=batch_norm_epsilon, momentum=batch_norm_momentum)) self.layers.append(get_activation("relu")) if first_pool_kernel_size: self.layers.append( nn.MaxPool2d(kernel_size=first_pool_kernel_size, stride=first_pool_stride, padding=1)) img_height, img_width = get_img_output_shape( img_height=img_height, img_width=img_width, kernel_size=first_pool_kernel_size, stride=first_pool_stride, padding=1, dilation=1, ) for i, num_blocks in enumerate(block_sizes): self.layers.append( ResNetBlockLayer( img_height=img_height, img_width=img_width, first_in_channels=in_channels, out_channels=out_channels, is_bottleneck=is_bottleneck, block_fn=block_class, num_blocks=num_blocks, stride=block_strides[i], batch_norm_momentum=batch_norm_momentum, batch_norm_epsilon=batch_norm_epsilon, )) out_channels *= 2 in_channels, img_height, img_width = self.layers[-1].output_shape for layer in self.layers: logger.debug(f" {layer._get_name()}") self._output_shape = (in_channels, img_height, img_width)