예제 #1
0
    def __init__(self,
                 img_size=224,
                 patch_size=16,
                 in_chans=3,
                 in_dim=48,
                 stride=4):
        super().__init__()
        img_size = to_2tuple(img_size)
        patch_size = to_2tuple(patch_size)
        # grid_size property necessary for resizing positional embedding
        self.grid_size = (img_size[0] // patch_size[0],
                          img_size[1] // patch_size[1])
        num_patches = (self.grid_size[0]) * (self.grid_size[1])
        self.img_size = img_size
        self.num_patches = num_patches
        self.in_dim = in_dim
        new_patch_size = [math.ceil(ps / stride) for ps in patch_size]
        self.new_patch_size = new_patch_size

        self.proj = nn.Conv2d(in_chans,
                              self.in_dim,
                              kernel_size=7,
                              padding=3,
                              stride=stride)
        self.unfold = nn.Unfold(kernel_size=new_patch_size,
                                stride=new_patch_size)
예제 #2
0
 def __init__(self,
              patch_size=16,
              stride=16,
              padding=0,
              in_chans=3,
              embed_dim=768,
              norm_layer=None):
     super().__init__()
     patch_size = to_2tuple(patch_size)
     stride = to_2tuple(stride)
     padding = to_2tuple(padding)
     self.proj = nn.Conv2d(in_chans,
                           embed_dim,
                           kernel_size=patch_size,
                           stride=stride,
                           padding=padding)
     self.norm = norm_layer(embed_dim) if norm_layer else nn.Identity()
    def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=768, norm_layer=None,exist_overlap:bool=False,slide_step=None):
        super().__init__()
        img_size = to_2tuple(img_size)
        patch_size = to_2tuple(patch_size)
        self.img_size = img_size
        if exist_overlap:
                self.num_patches = ((img_size[0] - patch_size[0]) // slide_step + 1) * ((img_size[1] - patch_size[1]) // slide_step + 1)
                self.proj = nn.Conv2d(in_channels=in_chans,
                                        out_channels=embed_dim,
                                        kernel_size=patch_size,
                                        stride=(slide_step, slide_step))
        else:
                
                self.patch_size = patch_size
                self.grid_size = (img_size[0] // patch_size[0], img_size[1] // patch_size[1])
                self.num_patches = self.grid_size[0] * self.grid_size[1]

                self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=patch_size)
                                      
        self.norm = norm_layer(embed_dim) if norm_layer else nn.Identity()