Ejemplo n.º 1
0
    def __init__(self, sa_type, block, layers, kernels, num_classes):
        super(SAN, self).__init__()
        c = 64
        self.conv_in, self.bn_in = conv1x1(3, c), nn.BatchNorm2d(c)
        self.conv0, self.bn0 = conv1x1(c, c), nn.BatchNorm2d(c)
        self.layer0 = make_layer(sa_type, block, c, layers[0], kernels[0])

        c *= 4
        self.conv1, self.bn1 = conv1x1(c // 4, c), nn.BatchNorm2d(c)
        self.layer1 = make_layer(sa_type, block, c, layers[1], kernels[1])

        c *= 2
        self.conv2, self.bn2 = conv1x1(c // 2, c), nn.BatchNorm2d(c)
        self.layer2 = make_layer(sa_type, block, c, layers[2], kernels[2])

        c *= 2
        self.conv3, self.bn3 = conv1x1(c // 2, c), nn.BatchNorm2d(c)
        self.layer3 = make_layer(sa_type, block, c, layers[3], kernels[3])

        c *= 2
        self.conv4, self.bn4 = conv1x1(c // 2, c), nn.BatchNorm2d(c)
        self.layer4 = make_layer(sa_type, block, c, layers[4], kernels[4])

        self.relu = nn.ReLU(inplace=True)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(c, num_classes)

        init_weights(self)
Ejemplo n.º 2
0
def _shortcut(inplanes: int, outplanes: int,
              stride: int) -> Optional[nn.Sequential]:
    shortcut = None
    if stride != 1 or inplanes != outplanes:
        shortcut = nn.Sequential(
            torchres.conv1x1(inplanes, outplanes, stride),
            nn.BatchNorm2d(outplanes),
        )
    init_weights(shortcut)
    return shortcut
Ejemplo n.º 3
0
 def __init__(self, inplanes: int, outplanes: int) -> None:
     transition = [
         nn.BatchNorm2d(inplanes),
         nn.ReLU(inplace=True),
         nn.MaxPool2d(kernel_size=2, stride=2),
         nn.Conv2d(inplanes, outplanes, kernel_size=1, stride=1, bias=False)
     ]
     init_weights(transition[0])
     init_weights(transition[3])
     super().__init__(*transition)
Ejemplo n.º 4
0
def _resnetStem():
    # stem for preactivation version of resnet, no bn->relu
    outplanes = 64
    conv1 = nn.Conv2d(3,
                      outplanes,
                      kernel_size=7,
                      stride=2,
                      padding=3,
                      bias=False)
    maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
    init_weights(conv1)
    return nn.Sequential(conv1, maxpool)
Ejemplo n.º 5
0
def _make_backbone(layer_nums: Iterable[int], layers: list[int],
                   layer_types: list[str], widths: list[int],
                   sa_type: Optional[int],
                   added_nl_blocks: Optional[list[int]], nl_mode: str,
                   in_widths: list[int], last_layer_num: int) -> nn.Sequential:
    backbone = []

    for i, stage_type, blocks, added_nl, _inplanes, _outplanes in zip(
            layer_nums, layer_types, layers, added_nl_blocks, in_widths,
            widths):
        layer = []
        if stage_type == 'san':
            layer.append(san.TransitionLayer(_inplanes, _outplanes))
            kernel_size = 3 if i == 0 else 7
            if added_nl > 0:
                layer.append(
                    SanNLLayer(blocks,
                               _outplanes,
                               added_nl,
                               sa_type,
                               nl_mode,
                               kernel_size=kernel_size))
            else:
                layer.append(
                    SANLayer(sa_type,
                             _outplanes,
                             blocks,
                             kernel_size=kernel_size))
        elif stage_type == 'res':
            stride = 1 if i == 0 else 2  # first layer resnet type does not reduce resolution
            if added_nl > 0:
                layer.append(
                    ResNetNLLayer(blocks,
                                  _inplanes,
                                  _outplanes,
                                  added_nl,
                                  nl_mode,
                                  stride=stride))
            else:
                layer.append(
                    ResNetLayer(blocks, _inplanes, _outplanes, stride=stride))
        elif stage_type == 'nl':
            layer.append(san.TransitionLayer(_inplanes, _outplanes))
            layer.append(nl.NLLayer(blocks, _outplanes, mode=nl_mode))
        else:
            raise ValueError('stage_type argument includes "' + stage_type +
                             '" invalid argument.')

        if i == last_layer_num:  # bn->relu at end of backbone
            layer.extend([nn.BatchNorm2d(widths[-1]), nn.ReLU(inplace=True)])
            init_weights(layer[-2])
        backbone.append(nn.Sequential(*layer))
    return nn.Sequential(*backbone)
Ejemplo n.º 6
0
 def __init__(self,
              sa_type,
              in_planes,
              rel_planes,
              mid_planes,
              out_planes,
              share_planes=8,
              kernel_size=7,
              stride=1):
     super(Bottleneck, self).__init__()
     self.bn1 = nn.BatchNorm2d(in_planes)
     self.sam = SAM(sa_type, in_planes, rel_planes, mid_planes,
                    share_planes, kernel_size, stride)
     self.bn2 = nn.BatchNorm2d(mid_planes)
     self.conv = nn.Conv2d(mid_planes, out_planes, kernel_size=1)
     self.relu = nn.ReLU(inplace=True)
     self.stride = stride
     init_weights(self)
Ejemplo n.º 7
0
    def __init__(
            self,
            inplanes: int,
            outplanes: int,
            stride: int = 1,
            downsample: Optional[nn.Module] = None,
            groups: int = 1,
            base_width: int = 64,
            dilation: int = 1,
            norm_layer: Optional[Callable[..., nn.Module]] = None) -> None:
        """Initializes a resnet bottleneck block with full preactivation. Based on torchvision postactivation implementation.

        This implementation places the stride for downsampling at 3x3 convolution(self.c
        while original implementation places the stride at the first 1x1 convolution(sel
        according to "Deep residual learning for image recognition"https://arxiv.org/abs
        This is based off of ResNet V1.5 (postactivation) and improves accuracy accordin
        https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch.

        Args:
            inplanes (int): input channels.
            outplanes (int): output channels. Must be divisible by self.expansion factor (4).
            stride (int): stride for middle convolution. This dictates spatial resolution downsampling.
            downsample (Optional[nn.Module]): module responsible for downsampling. Out dimensions must match with block output.
            groups (int): groups for middle convolution
            dilation (int): dilation for middle convolution
            norm_layer (Optional[Callable[..., nn.Module]]): batch normalization layer to be used. Defaults to torch.nn.BatchNorm2d
        """
        super(Bottleneck, self).__init__()
        if norm_layer is None:
            norm_layer = nn.BatchNorm2d
        assert outplanes % self.expansion == 0
        planes = outplanes // self.expansion
        width = int(planes * (base_width / 64.)) * groups
        # Both self.conv2 and self.downsample layers downsample the input when stride != 1
        self.bn1 = norm_layer(inplanes)
        self.conv1 = conv1x1(inplanes, width)
        self.bn2 = norm_layer(width)
        self.conv2 = conv3x3(width, width, stride, groups, dilation)
        self.bn3 = norm_layer(width)
        self.conv3 = conv1x1(width, planes * self.expansion)
        self.relu = nn.ReLU(inplace=True)
        self.downsample = downsample
        self.stride = stride
        init_weights(self)
Ejemplo n.º 8
0
    def __init__(self,
                 in_channels: int,
                 inter_channels: Optional[int] = None,
                 g_channels: Optional[int] = None,
                 mode: str = 'dot',
                 sub_sample: bool = True) -> None:
        """Implementation of Non-Local Block. Includes subsampling trick.
        Unlike original non-local block, we use a bottleneck design similar to that of SANet, and allow separate channel reduction for g function,
        instead of using the same for theta, phi and g. Also, block follows a preactivation scheme, rather than a post activation one.
        In total, one batchnorm layer and two ReLU activations are added (over the version with batchnorm), which were not in original design.
        These changes are intended to make design and parameter count closer to SAN block for comparison.
        1x1 convolutions are initialized with kaiming normal.

        args:
            in_channels: input channel size
            inter_channels: channel size inside the block for theta and phi. Reduced to 1/8 of the input channel size if not specified
            g_channel: channel size inside the block for g function. Reduced to 1/4 of the input channel size if not specified
            mode: currently supports only dot product
            sub_sample: whether to use sub sampling trick described in paper.
        """
        super(NLBlock, self).__init__()

        if mode != 'dot':
            raise NotImplementedError()
        self.mode = mode

        self.sub_sample = sub_sample

        self.in_channels = in_channels
        self.inter_channels = inter_channels
        self.g_channels = g_channels

        # by default, channel size inside block is reduced by factor of 8 for theta and phi
        if self.inter_channels is None:
            self.inter_channels = in_channels // 8
            if self.inter_channels == 0:
                self.inter_channels = 1
        # by default, channel size inside block is reduced by factor of 4 for g
        if self.g_channels is None:
            self.g_channels = in_channels // 4
            if self.g_channels == 0:
                self.g_channels = 1

        self.bn = nn.BatchNorm2d(self.in_channels)
        self.relu = nn.ReLU(inplace=True)

        self.g = nn.Conv2d(in_channels=self.in_channels,
                           out_channels=self.g_channels,
                           kernel_size=1)
        self.theta = nn.Conv2d(in_channels=self.in_channels,
                               out_channels=self.inter_channels,
                               kernel_size=1)
        self.phi = nn.Conv2d(in_channels=self.in_channels,
                             out_channels=self.inter_channels,
                             kernel_size=1)

        self.W_z = nn.Sequential(
            nn.BatchNorm2d(self.g_channels), nn.ReLU(inplace=True),
            nn.Conv2d(in_channels=self.g_channels,
                      out_channels=self.in_channels,
                      kernel_size=1))

        max_pool_layer = nn.MaxPool2d(kernel_size=(2, 2))
        if self.sub_sample:
            self.g = nn.Sequential(self.g, max_pool_layer)
            self.phi = nn.Sequential(self.phi, max_pool_layer)

        # since we're doing preactivation, initialization of last batchnorm weights can no longer define
        # initial state of non-local block as identity mapping, like in section 4.1 of the paper.
        # we opt for initializing the whole block the same way we do resnet and san blocks
        init_weights(self)
Ejemplo n.º 9
0
def _sanStem():
    outplanes = 64
    conv_in = nn.Conv2d(3, outplanes, kernel_size=1, bias=False)
    init_weights(conv_in)
    return conv_in