def __init__(self, opts: dict = DefaultEncoderOpts): super().__init__() self._opts = opts self.cur_in_ch = 64 self.block_sizes = get_block_sizes(opts["resnet_size"]) self.block_type = get_block_type(opts["resnet_size"]) self.conv1 = nn.Conv2d( opts["input_channels"], out_channels=64, kernel_size=opts["conv_kernel_size"], stride=(2, 2), padding=get_padding(opts["conv_kernel_size"]), bias=False, ) self.bn1 = nn.BatchNorm2d(self.cur_in_ch) self.relu1 = nn.ReLU(inplace=True) if opts["max_pool"] == 1: self.max_pool = nn.MaxPool2d( kernel_size=3, stride=2, padding=get_padding(3) ) stride1 = (1, 1) elif opts["max_pool"] == 0: self.max_pool = None stride1 = (2, 2) elif opts["max_pool"] == 2: self.max_pool = None stride1 = (1, 1) self.layer1 = self.make_layer(self.block_type, 64, self.block_sizes[0], stride1) self.layer2 = self.make_layer(self.block_type, 128, self.block_sizes[1], (2, 2)) self.layer3 = self.make_layer(self.block_type, 256, self.block_sizes[2], (2, 2)) self.layer4 = self.make_layer(self.block_type, 512, self.block_sizes[3], (2, 2))
def __init__(self, in_ch, out_ch, stride=1, downsample=None, upsample=None, mid_ch=None): super().__init__() # This is for the upsampling case in the decoder if mid_ch is None: mid_ch = out_ch if downsample is not None and upsample is not None: raise ValueError("Either downsample or upsample has to be None") self.shortcut = None if upsample is not None or downsample is not None: assert ( downsample is None or upsample is None ), "Only can downsample (encoder) or upsample (decoder) using the shortcut" self.shortcut = downsample if downsample is not None else upsample self.conv1 = nn.Conv2d(in_ch, mid_ch, kernel_size=1, bias=False) self.bn1 = nn.BatchNorm2d(mid_ch) self.relu1 = nn.ReLU(inplace=True) if upsample is not None: self.conv2 = nn.ConvTranspose2d( in_channels=mid_ch, out_channels=mid_ch, kernel_size=3, stride=stride, padding=get_padding(3), output_padding=get_padding(stride), bias=False, ) else: # Use normal convolution for downsampling or no upsampling self.conv2 = nn.Conv2d( in_channels=mid_ch, out_channels=mid_ch, kernel_size=3, stride=stride, padding=get_padding(3), bias=False, ) self.bn2 = nn.BatchNorm2d(mid_ch) self.relu2 = nn.ReLU(inplace=True) # For a better graph self.conv3 = nn.Conv2d( in_channels=mid_ch, out_channels=out_ch * self.expansion, kernel_size=1, stride=1, padding=get_padding(1), bias=False, ) self.bn3 = nn.BatchNorm2d(out_ch * self.expansion) self.relu3 = nn.ReLU(inplace=True)
def make_layer(self, block, out_ch, size, stride=1, shortcut="downsample"): """ block: instance of class BasicBlock or Bottleneck Makes a residual layer. Could also upsample instead of downsample for the first block. """ layers = [] stride_mean = stride if isinstance(stride, tuple): stride_mean = sum(stride) / len(stride) # "upsample" case: the output channels only apply to the last block if shortcut == "upsample" and ( stride_mean > 1 or self.cur_in_ch != out_ch * block.expansion): shortcut = nn.ConvTranspose2d( in_channels=self.cur_in_ch, out_channels=out_ch * block.expansion, kernel_size=3, stride=stride, padding=get_padding(3), output_padding=get_padding(stride), bias=False, ) layers.append( block( in_ch=self.cur_in_ch, out_ch=out_ch, mid_ch=self.cur_in_ch // block.expansion, stride=stride, upsample=shortcut, )) elif shortcut == "downsample" and ( stride_mean > 1 or self.cur_in_ch != out_ch * block.expansion): shortcut = nn.Sequential( nn.Conv2d( in_channels=self.cur_in_ch, out_channels=out_ch * block.expansion, kernel_size=1, stride=stride, bias=False, ), nn.BatchNorm2d(out_ch * block.expansion), ) layers.append( block(self.cur_in_ch, out_ch, stride, downsample=shortcut)) else: layers.append(block(self.cur_in_ch, out_ch)) self.cur_in_ch = out_ch * block.expansion for _ in range(1, size): layers.append(block(self.cur_in_ch, out_ch)) return nn.Sequential(*layers)
def make_layer(self, block, out_ch, size, stride=1, shortcut="downsample"): layers = [] stride_mean = stride if isinstance(stride, tuple): stride_mean = sum(stride) / len(stride) if shortcut == "upsample" and ( stride_mean > 1 or self.cur_in_ch != out_ch * block.expansion ): shortcut = nn.ConvTranspose2d( in_channels=self.cur_in_ch, out_channels=out_ch * block.expansion, kernel_size=3, stride=stride, padding=get_padding(3), output_padding=get_padding(stride), bias=False, ) layers.append( block( in_ch=self.cur_in_ch, out_ch=out_ch, mid_ch=self.cur_in_ch // block.expansion, stride=stride, upsample=shortcut, ) ) elif shortcut == "downsample" and ( stride_mean > 1 or self.cur_in_ch != out_ch * block.expansion ): shortcut = nn.Sequential( nn.Conv2d( in_channels=self.cur_in_ch, out_channels=out_ch * block.expansion, kernel_size=1, stride=stride, bias=False, ), nn.BatchNorm2d(out_ch * block.expansion), ) layers.append(block(self.cur_in_ch, out_ch, stride, downsample=shortcut)) else: layers.append(block(self.cur_in_ch, out_ch)) self.cur_in_ch = out_ch * block.expansion for _ in range(1, size): layers.append(block(self.cur_in_ch, out_ch)) return nn.Sequential(*layers)
def __init__(self, in_ch, out_ch, stride=1, downsample=None, upsample=None, mid_ch=None): super().__init__() # stride=2 only applies to the first block of a stage, when moving into a new stage # For the upsampling, in decoder if mid_ch is None: mid_ch = out_ch if downsample is not None and upsample is not None: raise ValueError("Either downsample or upsample has to be None") if upsample is None: self.shortcut = downsample # encoder, nn.Conv2d() + nn.BatchNorm2d() self.conv1 = nn.Conv2d( in_ch, mid_ch, kernel_size=3, stride=stride, padding=get_padding(3), bias=False, ) else: self.shortcut = upsample # decoder - nn.ConvTranspose2d(): instantiated in make_layer(); no BatchNorm2d self.conv1 = nn.ConvTranspose2d( in_ch, # 512 mid_ch, # 512 kernel_size=3, stride=stride, # (2,2) padding=get_padding(3), # 3 // 2 = 1 output_padding=get_padding(stride), # 2 // 2 = 1 bias=False, ) self.bn1 = nn.BatchNorm2d(mid_ch) self.relu1 = nn.ReLU(inplace=True) self.conv2 = nn.Conv2d(mid_ch, out_ch, kernel_size=3, stride=1, padding=get_padding(3), bias=False) self.bn2 = nn.BatchNorm2d(out_ch) self.relu2 = nn.ReLU(inplace=True)
def __init__(self, in_ch, out_ch, stride=1, downsample=None, upsample=None, mid_ch=None): super().__init__() # This is for the upsampling case in the decoder if mid_ch is None: mid_ch = out_ch if downsample is not None and upsample is not None: raise ValueError("Either downsample or upsample has to be None") if upsample is None: self.shortcut = downsample self.conv1 = nn.Conv2d( in_ch, mid_ch, kernel_size=3, stride=stride, padding=get_padding(3), bias=False, ) else: self.shortcut = upsample self.conv1 = nn.ConvTranspose2d( in_ch, mid_ch, kernel_size=3, stride=stride, padding=get_padding(3), output_padding=get_padding(stride), bias=False, ) self.bn1 = nn.BatchNorm2d(mid_ch) self.relu1 = nn.ReLU(inplace=True) self.conv2 = nn.Conv2d(mid_ch, out_ch, kernel_size=3, stride=1, padding=get_padding(3), bias=False) self.bn2 = nn.BatchNorm2d(out_ch) self.relu2 = nn.ReLU(inplace=True) # For a better graph
def __init__(self, opts: dict = DefaultDecoderOpts): super().__init__() self._opts = opts #self._layer_output = dict() self.cur_in_ch = opts["input_channels"] self.block_sizes = get_block_sizes(opts["resnet_size"]) self.block_type = get_block_type( opts["resnet_size"]) # BasicBlock or Bottleneck self.hidden_layer = nn.Conv2d( 4, out_channels=512, kernel_size=1, stride=(1, 1), padding=get_padding(1), bias=False, ) # Upsample ResNet layers with transposed convolution # block_size in resnet 18 is [2, 2, 2, 2] """option 1 (used in killer whale): [256, 128, 64, 64]""" #self.layer1 = self.make_layer( # self.block_type, 256, self.block_sizes[3], (2, 2), "upsample" #) # 512 -> 256 #self.layer2 = self.make_layer( # self.block_type, 128, self.block_sizes[2], (2, 2), "upsample" #) # 256 -> 128 #self.layer3 = self.make_layer( # self.block_type, 64, self.block_sizes[1], (2, 2), "upsample" #) # 128 -> 64 """option 2: [512, 256, 128, 64]""" self.layer1 = self.make_layer( self.block_type, 512, self.block_sizes[3], (2, 2), "upsample" # stride = (2,2) --> padding = (0,0) ) # 512 -> 512 self.layer2 = self.make_layer(self.block_type, 256, self.block_sizes[2], (2, 2), "upsample") # 512 -> 256 self.layer3 = self.make_layer( self.block_type, 128, self.block_sizes[1], (2, 2), "upsample") # 256 -> 128; what to do when if DefaultEncoderOpts["max_pool"] == 1: self.layer4 = self.make_layer( self.block_type, 64, self.block_sizes[0], (2, 2), "upsample" ) # 64 -> 64; use stride 2 instead of nn.MaxUnpool2d() self.conv_out = nn.ConvTranspose2d( in_channels=64 * self.block_type.expansion, out_channels=opts["output_channels"], kernel_size=opts["conv_kernel_size"], padding=get_padding(opts["conv_kernel_size"]), output_padding=get_padding(opts["output_stride"]), stride=opts["output_stride"], bias=False, ) # TODO integrate also original encoder = decoder shapes in case of no max pooling by using stride2 in the last conv_out layer and stride (1,1) in the fourth residual layer # (current variant is better because transposed convs with strid 2 in the last layer bring artifacts) if opts["output_activation"].lower() == "sigmoid": self.activation_out = nn.Sigmoid() elif opts["output_activation"].lower() == "relu": self.activation_out = nn.ReLU(inplace=True) elif opts["output_activation"].lower() == "tanh": self.activation_out = nn.Tanh() elif opts["output_activation"].lower() == "none": self.activation_out = lambda x: x else: raise NotImplementedError( "Unsupported output activation: {}".format( opts["output_activation"]))
def make_layer(self, block, out_ch, size, stride=1, shortcut="downsample"): """ block: instance of class BasicBlock or Bottleneck out_ch: number of filters that come out of the block size: block size - how many times the basic blocks are repeated shortcut='downsample' in encoder, shortcut='upsample' in decoder ResNet 18 block is BasicBlock and size = 2 for each block """ layers = [] stride_mean = stride if isinstance(stride, tuple): stride_mean = sum(stride) / len(stride) if shortcut == "upsample" and ( stride_mean > 1 or self.cur_in_ch != out_ch * block.expansion): shortcut = nn.ConvTranspose2d( in_channels=self.cur_in_ch, out_channels=out_ch * block.expansion, kernel_size=3, stride=stride, padding=get_padding(3), output_padding=get_padding(stride), bias=False, ) # the instantiated shortcut here goes into layers.append(block()) # treat BasicBlock and Bottleneck block differently to match encoder number of filters if block == BasicBlock: layers.append( block( in_ch=self.cur_in_ch, out_ch=out_ch, stride=stride, upsample=shortcut, )) else: layers.append( block( in_ch=self.cur_in_ch, out_ch=out_ch, mid_ch=self.cur_in_ch // block.expansion, stride=stride, upsample=shortcut, )) # for constructing shortcut, stride=2 is not enough; # e.g. 1st Bottleneck block in ResNet 50 has stride=1, yet number of channels increased by 4 at the output elif shortcut == "downsample" and ( stride_mean > 1 or self.cur_in_ch != out_ch * block.expansion): # shortcut/identity is defined as a Conv2D (kernel_size=1) and BN shortcut = nn.Sequential( nn.Conv2d( in_channels=self.cur_in_ch, out_channels=out_ch * block.expansion, kernel_size=1, stride=stride, bias=False, ), nn.BatchNorm2d(out_ch * block.expansion), ) layers.append( block(self.cur_in_ch, out_ch, stride, downsample=shortcut)) else: layers.append(block(self.cur_in_ch, out_ch)) self.cur_in_ch = out_ch * block.expansion # for block 2 and above in a stage # the first block is taken care of above and excluded here for _ in range(1, size): layers.append(block(self.cur_in_ch, out_ch)) return nn.Sequential(*layers)