def __init__( self, num_input_channel: int, base_block_name: str, num_blocks_in_conv_layer: list, *, num_classes: Optional[int] = None, ) -> None: assert (num_input_channel > 0), name_with_msg( self, "`num_input_channel` should be specified and greater than 0") assert (base_block_name in ResNetConfig.available_base_blocks( )), name_with_msg( self, f"`base_block_name` ({base_block_name}) should be one as listed below: \n{ResNetConfig.available_base_blocks()}" ) assert (len(num_blocks_in_conv_layer) == 4), name_with_msg( "The length of `num_blocks_in_conv_layer` must be qual to 4 for conv_2 to conv_5" ) if num_classes is not None: assert (num_classes > 0), name_with_msg( "`num_classes` should be specified and greater than 0") self.num_input_channel = num_input_channel self.base_block_name = base_block_name # make code more readable conv_keys = [f"conv_{idx}" for idx in range(2, 5 + 1)] self.num_blocks_in_conv_layer = dict( zip(conv_keys, num_blocks_in_conv_layer)) self.num_classes = num_classes
def __init__(self, image_size, image_channel, patch_size, use_patch_and_flat=True): super().__init__() assert image_size is not None, name_with_msg( self, "Please specify input images' size") assert patch_size is not None, name_with_msg( self, "Please specify patches' size") self.patch_size = patch_size self.patch_dim = (patch_size**2) * image_channel self.num_patches = (image_size // patch_size)**2 assert ((self.num_patches**0.5) * patch_size == image_size), name_with_msg( self, "Image size must be divided by the patch size") if use_patch_and_flat: self.patch_and_flat = Rearrange( "b c (h p) (w q) -> b (h w) (p q c)", p=self.patch_size, q=self.patch_size)
def __init__( self, image_size: int, image_channel: int, patch_size: int, num_layers_in_stages: List[int], num_channels: List[int], expand_scales: List[int], kernel_size_on_heads: Dict[int, int], heads: Optional[int] = None, ) -> None: super().__init__(image_size, image_channel, patch_size, use_patch_and_flat=False) self.image_channel = image_channel self.num_stages = len(num_layers_in_stages) self.num_layers_in_stages = num_layers_in_stages self.num_channels = num_channels self.expand_scales = expand_scales self.patch_sizes = [self.patch_size, *((2, ) * (self.num_stages - 1))] if heads is not None: assert (heads == sum(kernel_size_on_heads.values( ))), name_with_msg( f"Number of heads should be equal for `heads` ({heads}) and the sum of values of `kernel_size_on_heads` ({sum(kernel_size_on_heads.values())})" ) self.heads = heads or sum(kernel_size_on_heads.values()) self.kernel_size_on_heads = kernel_size_on_heads
def __init__( self, dim: int, kernel_size_on_heads: Dict[int, int], heads: Optional[int] = None, head_dim: Optional[int] = None, use_cls: bool = True, use_bias: bool = True, conv_relative_postion_encoder: Optional[nn.Module] = None, attention_dropout: float = 0., ff_dropout: float = 0., ) -> None: super().__init__() assert (heads is not None or head_dim is not None), name_with_msg( self, f"Either `heads` ({heads}) or `head_dim` ({head_dim}) must be specified" ) self.heads = heads if heads is not None else dim // head_dim head_dim = head_dim if head_dim is not None else dim // heads assert (head_dim * self.heads == dim), name_with_msg( self, f"Head dimension ({head_dim}) times the number of heads ({self.heads}) must be equal to embedding dimension ({dim})" ) self.relative_position_encoder = ConvolutionalRelativePositionEncoding( dim, heads, head_dim, kernel_size_on_heads=kernel_size_on_heads, use_cls=use_cls ) if conv_relative_postion_encoder is None else conv_relative_postion_encoder self.QKV = nn.Linear(dim, 3 * dim, bias=use_bias) self.out_linear = nn.Linear(dim, dim) self.attention_dropout = nn.Dropout(attention_dropout) self.out_dropout = nn.Dropout(ff_dropout) self.scale = head_dim**(-0.5) self.use_cls = use_cls
def _get_relative_indices(self, height: int, width: int) -> torch.tensor: height, width = int(height), int(width) ticks_y, ticks_x = torch.arange(height), torch.arange(width) grid_y, grid_x = torch.meshgrid(ticks_y, ticks_x) out = torch.empty(height * width, height * width).fill_(float("nan")) for idx_y in range(height): for idx_x in range(width): rel_indices_y = grid_y - idx_y + height rel_indices_x = grid_x - idx_x + width flatten_indices = (rel_indices_y * width + rel_indices_x).flatten() out[idx_y * width + idx_x] = flatten_indices assert (not out.isnan().any()), name_with_msg( self, "`relative_indices` have blank indices") assert ((out >= 0).all()), name_with_msg( self, "`relative_indices` have negative indices") return out.to(torch.long)
def __init__(self, image_height, image_width, num_blocks_in_layers, num_channels_in_layers, block_type_in_layers, expand_scale_in_layers): super().__init__() assert (len(num_blocks_in_layers) == 5), name_with_msg( self, "The length of `num_blocks_in_layers` must be 5") if isinstance(num_channels_in_layers, list): assert (len(num_channels_in_layers) == 5), name_with_msg( self, "The length of `num_channels_in_layers` must be 5") else: begin_channel = int(num_channels_in_layers) num_channels_in_layers = [ int(begin_channel // (2**layer_idx)) for layer_idx in range(0, 5) ] # We ignore `S0` here, so the length of the below lists should be 4 assert (len(block_type_in_layers) == 4), name_with_msg( self, "The length of `block_type_in_layers` must be 4") if isinstance(expand_scale_in_layers, list): assert (len(expand_scale_in_layers) == 4), name_with_msg( self, "The length of `expand_scale_in_layers` must be 4") else: expand_scale = int(expand_scale_in_layers) expand_scale_in_layers = [expand_scale for _ in range(4)] self.height_in_layers = [ int(image_height / (2**layer_idx)) for layer_idx in range(1, 6) ] self.width_in_layers = [ int(image_width / (2**layer_idx)) for layer_idx in range(1, 6) ] self.num_blocks_in_layers = num_blocks_in_layers self.num_channels_in_layers = num_channels_in_layers self.block_type_in_layers = block_type_in_layers self.expand_scale_in_layers = expand_scale_in_layers
def __init__( self, dim: int, heads: Optional[int], head_dim: Optional[int], kernel_size_on_heads: Dict[int, int] = { 3: 2, 5: 3, 7: 3 }, # From: https://github.com/mlpc-ucsd/CoaT/blob/main/src/models/coat.py#L358 use_cls: bool = True, ) -> None: super().__init__() head_list = list(kernel_size_on_heads.values()) if heads is None and head_dim is None: if any([True if h is None or h <= 0 else False for h in head_list]): raise ValueError( "Please specify exact number (integers that are greater than 0) of heads for each kernel size when `heads` and `head_dim` are None." ) self.heads = sum(head_list) else: self.heads = heads or dim // head_dim self.head_dim = head_dim or dim // self.heads assert (dim // self.heads == self.head_dim), name_with_msg( f"`dim` ({dim}) can't be divided by `heads` ({self.heads}). Please check `heads`, `head_dim`, or `kernel_size_on_heads`." ) self.depth_wise_conv_list = nn.ModuleList([ nn.Conv2d( self.head_dim * num_heads, self.head_dim * num_heads, kernel_size=kernel_size, stride=1, padding=kernel_size // 2, groups=self.head_dim * num_heads, ) for kernel_size, num_heads in kernel_size_on_heads.items() ]) self.split_list = [ num_heads * self.head_dim for num_heads in kernel_size_on_heads.values() ] self.use_cls = use_cls
def __init__( self, depth_scale: float, width_scale: float, resolution: int, up_sampling_mode: Optional[ str] = None, # Check out: https://pytorch.org/docs/stable/generated/torch.nn.Upsample.html?highlight=up%20sample#torch.nn.Upsample return_feature_maps: bool = False, num_layers: Optional[List[int]] = None, channels: Optional[List[int]] = None, kernel_sizes: Optional[List[int]] = None, strides: Optional[List[int]] = None, expand_scales: Optional[List[Optional[int]]] = None, se_scales: Optional[List[Optional[int]]] = None, se_scale: Optional[float] = 0.25, ) -> None: super().__init__() # Table 1. from the official paper (all stages) self.num_layers = self.scale_and_round_layers( num_layers if num_layers is not None else [1, 1, 2, 2, 3, 3, 4, 1, 1], depth_scale) self.channels = self.scale_and_round_channels( channels if channels is not None else [32, 16, 24, 40, 80, 112, 192, 320, 1280], width_scale) self.kernel_sizes = kernel_sizes if kernel_sizes is not None else [ 3, 3, 3, 5, 3, 5, 5, 3, 1 ] self.strides = strides if strides is not None else [ 1, 2, 1, 2, 2, 2, 1, 2, 1 ] self.expand_scales = expand_scales if expand_scales is not None else [ None, 1, 6, 6, 6, 6, 6, 6, None ] assert (se_scales is not None or se_scale is not None), name_with_msg( "Either `se_scales` or `se_scale` should be specified") self.se_scales = se_scales if se_scales is not None else [ None, *((se_scale, ) * 7), None ] self.resolution = resolution # From: https://github.com/tensorflow/tpu/blob/3679ca6b979349dde6da7156be2528428b000c7c/models/official/efficientnet/preprocessing.py#L88 # The default for resizing is `bicubic` self.up_sampling_mode = up_sampling_mode self.return_feature_maps = return_feature_maps
def __init__(self, input_channel, channel_in_between, num_res_blocks_in_between, vit_input_size, **kwargs): super().__init__() assert len(channel_in_between) >= 1, name_with_msg( self, "Please specify the number of channels for at least 1 layer.") channel_in_between = [input_channel] + channel_in_between self.layers = nn.ModuleList([ TransUNetEncoderConvBlock(channel_in_between[idx], channel_in_between[idx + 1], num_res_blocks_in_between[idx]) for idx in range(len(channel_in_between) - 1) ]) self.vit = TransUNetViT(image_size=vit_input_size, image_channel=channel_in_between[-1], **kwargs)
def forward(self, *args: List[torch.Tensor], sizes: Tuple[Tuple[int, int]]) -> List[torch.Tensor]: num_inputs = len(args) assert (num_inputs == len(self.serial_block_list)), name_with_msg( self, f"The number of inputs ({num_inputs}) should be aligned with the number of feature maps ({len(self.serial_block_list)})" ) # args = [ conv_position_encoder(x, H, W) for x, H, W, conv_position_encoder in zip( args, sizes, self.conv_position_encoder) ] # args = [norm(x) for x, norm in zip(args, self.norm_0)] args = [ conv_attn_module(x, H, W) for x, H, W, conv_attn_module in zip( args, sizes, self.conv_attn_module) ] for idx in range(num_inputs): args[idx] = torch.stack( [self.interpolate(x, size=sizes[idx]) for x in args], dim=0).sum(dim=0) args = [ x + path_dropout(x) for x, path_dropout in zip(args, self.path_dropout_0) ] # args = [norm(x) for x, norm in zip(args, self.norm_1)] args = [ff_block(x) for x, ff_block in zip(args, self.ff_block)] args = [ x + path_dropout(x) for x, path_dropout in zip(args, self.path_dropout_1) ] return args
def __init__( self, in_channel: int, out_channel: Optional[int] = None, dimension: int = 2, ) -> None: super().__init__() self.in_channel = in_channel self.out_channel = out_channel if out_channel is not None else in_channel assert ((0 < dimension) and (dimension < 4)), name_with_msg( self, "`dimension` must be larger than 0 and smaller than 4") self.dimension = dimension if self.dimension == 1: self.conv = nn.Conv1d elif self.dimension == 2: self.conv = nn.Conv2d else: self.conv = nn.Conv3d
def __init__( self, image_channel: int, patch_size: int, dim: int, ) -> None: super().__init__() assert (log2(patch_size) == int(log2(patch_size))), name_with_msg( f"`patch_size: {patch_size} can't be divided by 2") base_dimension_scale = 1 / (patch_size // 2) num_layers = int(log2(patch_size)) self.proj = nn.Sequential(*[ nn.Conv2d(int(dim * base_dimension_scale * 2**((idx // 2) - 1)) if idx != 0 else image_channel, int(dim * base_dimension_scale * 2**(idx // 2)), kernel_size=3, stride=2, padding=1) if idx % 2 == 0 else nn.GELU() for idx in range(num_layers * 2 - 1) ])
def __init__( self, in_channel: int, out_channel: Optional[int] = None, expand_channel: Optional[int] = None, expand_scale: Optional[int] = None, kernel_size: int = 3, stride: int = 1, padding: int = 1, norm_layer_name: str = "BatchNorm2d", act_fnc_name: str = "SiLU", se_scale: Optional[float] = None, se_act_fnc_name: str = "SiLU", dimension: int = 2, path_dropout: float = 0., expansion_head_type: Literal["pixel_depth", "fused"] = "pixel_depth", ** kwargs # For example: `eps` and `elementwise_affine` for `nn.LayerNorm` ): super().__init__(in_channel, out_channel, dimension=dimension) assert ( expand_channel is not None or expand_scale is not None ), name_with_msg( self, "Either `expand_channel` or `expand_scale` should be specified") expand_channel = expand_channel if expand_channel is not None else in_channel * expand_scale assert ( isinstance(expansion_head_type, str) and expansion_head_type in ["pixel_depth", "fused"] ), name_with_msg( f"The specified `expansion_head_type` - {expansion_head_type} ({type(expansion_head_type)}) doesn't exist.\n \ Please choose from here: ['pixel_depth', 'fused']") # Expansion Head if expansion_head_type == "pixel_depth": pixel_wise_conv_0 = nn.Sequential( self.conv(self.in_channel, expand_channel, kernel_size=1, bias=False), get_attr_if_exists(nn, norm_layer_name)(expand_channel, **kwargs), get_attr_if_exists(nn, act_fnc_name)()) depth_wise_conv = nn.Sequential( self.conv(expand_channel, expand_channel, kernel_size, stride=stride, padding=padding, groups=expand_channel, bias=False), get_attr_if_exists(nn, norm_layer_name)(expand_channel, **kwargs), get_attr_if_exists(nn, act_fnc_name)()) self.expansion_head = nn.Sequential(pixel_wise_conv_0, depth_wise_conv) else: self.expansion_head = nn.Sequential( nn.Conv2d(self.in_channel, expand_channel, kernel_size, stride=stride, padding=padding, bias=False), get_attr_if_exists(nn, norm_layer_name)(expand_channel, **kwargs), get_attr_if_exists(nn, act_fnc_name)()) # self.se_block = None if se_scale is not None: bottleneck_channel = int(expand_channel * se_scale) self.se_block = SEConvXd( expand_channel, bottleneck_channel, se_act_fnc_name=se_act_fnc_name, ) # self.pixel_wise_conv_1 = nn.Sequential( self.conv( expand_channel, self.out_channel, kernel_size=1, bias=False, ), get_attr_if_exists(nn, norm_layer_name)(self.out_channel, **kwargs)) # From: https://github.com/tensorflow/tpu/blob/3679ca6b979349dde6da7156be2528428b000c7c/models/official/efficientnet/utils.py#L276 # It's a batch-wise dropout self.path_dropout = PathDropout(path_dropout) self.skip = True if self.in_channel == self.out_channel and stride == 1 else False