Ejemplo n.º 1
0
    def __init__(self, cfg, input_shape: List[ShapeSpec]):
        """
        SOD Base Head.
        """
        super().__init__()
        # fmt: off
        self.instance_in_features = cfg.MODEL.SOD.INSTANCE_IN_FEATURES
        self.num_in_channels = cfg.MODEL.SOD.INSTANCE_IN_CHANNELS  # = fpn.
        self.num_channels = cfg.MODEL.SOD.BASE_CHANNELS
        self.num_conv = cfg.MODEL.SOD.NUM_BASE_CONVS
        self.norm = cfg.MODEL.SOD.NORM
        self.with_coord = cfg.MODEL.SOD.WITH_COORD
        self.num_levels = len(input_shape)
        assert self.num_levels == len(self.instance_in_features), \
            print("Input shape should match the features.")
        # fmt: on

        head_configs = {
            "base": (self.num_conv, self.with_coord, False),  # leave for DCN.
        }

        in_channels = [s.channels for s in input_shape]
        assert len(set(in_channels)) == 1, \
            print("Each level must have the same channel!")
        in_channels = in_channels[0]
        assert in_channels == self.num_in_channels, \
            print("In channels should equal to tower in channels!")

        for head in head_configs:
            tower = []
            num_convs, use_coord, use_deformable = head_configs[head]
            for i in range(num_convs):
                # with coord or not.
                if i == 0:
                    if use_coord:
                        chn = self.num_in_channels + 2
                    else:
                        chn = self.num_in_channels
                else:
                    chn = self.num_channels
                # use deformable conv or not.
                if use_deformable and i == num_convs - 1:
                    raise NotImplementedError
                else:
                    conv_func = nn.Conv2d
                tower.append(
                    conv_func(chn,
                              self.num_channels,
                              kernel_size=3,
                              stride=1,
                              padding=1,
                              bias=self.norm is None))
                if self.norm == "GN":
                    tower.append(nn.GroupNorm(32, self.num_channels))
                tower.append(nn.ReLU(inplace=True))
            self.add_module('{}_tower'.format(head), nn.Sequential(*tower))

        # init.
        for l in self.base_tower:
            if isinstance(l, nn.Conv2d):
                nn.init.normal_(l.weight, std=0.01)
                if l.bias is not None:
                    nn.init.constant_(l.bias, 0)
Ejemplo n.º 2
0
 def gn_helper(planes):
     return nn.GroupNorm(group_norm, planes)
Ejemplo n.º 3
0
    def __init__(self,
                 in_dim,
                 cout,
                 nf=64,
                 activation=nn.Tanh,
                 requires_grad=True):
        super(FaceModelNet, self).__init__()
        prenet = [nn.Linear(in_dim, nf), nn.ReLU(inplace=True)]
        self.prenet = nn.Sequential(*prenet)

        network = [
            nn.ConvTranspose2d(nf,
                               nf * 8,
                               kernel_size=4,
                               stride=1,
                               padding=0,
                               bias=False),  # 1x1 -> 4x4
            nn.ReLU(inplace=True),
            nn.Conv2d(nf * 8,
                      nf * 8,
                      kernel_size=3,
                      stride=1,
                      padding=1,
                      bias=False),
            nn.ReLU(inplace=True),
            nn.ConvTranspose2d(nf * 8,
                               nf * 4,
                               kernel_size=4,
                               stride=2,
                               padding=1,
                               bias=False),  # 4x4 -> 8x8
            nn.GroupNorm(16 * 4, nf * 4),
            nn.ReLU(inplace=True),
            nn.Conv2d(nf * 4,
                      nf * 4,
                      kernel_size=3,
                      stride=1,
                      padding=1,
                      bias=False),
            nn.GroupNorm(16 * 4, nf * 4),
            nn.ReLU(inplace=True),
            nn.ConvTranspose2d(nf * 4,
                               nf * 2,
                               kernel_size=4,
                               stride=2,
                               padding=1,
                               bias=False),  # 8x8 -> 16x16
            nn.GroupNorm(16 * 2, nf * 2),
            nn.ReLU(inplace=True),
            nn.Conv2d(nf * 2,
                      nf * 2,
                      kernel_size=3,
                      stride=1,
                      padding=1,
                      bias=False),
            nn.GroupNorm(16 * 2, nf * 2),
            nn.ReLU(inplace=True),
            nn.ConvTranspose2d(nf * 2,
                               nf,
                               kernel_size=4,
                               stride=2,
                               padding=1,
                               bias=False),  # 16x16 -> 32x32
            nn.GroupNorm(16, nf),
            nn.ReLU(inplace=True),
            nn.Conv2d(nf, nf, kernel_size=3, stride=1, padding=1, bias=False),
            nn.GroupNorm(16, nf),
            nn.ReLU(inplace=True),
            nn.Upsample(scale_factor=2, mode='nearest'),  # 32x32 -> 64x64
            nn.Conv2d(nf, nf, kernel_size=3, stride=1, padding=1, bias=False),
            nn.GroupNorm(16, nf),
            nn.ReLU(inplace=True),
            nn.Conv2d(nf, nf, kernel_size=5, stride=1, padding=2, bias=False),
            nn.GroupNorm(16, nf),
            nn.ReLU(inplace=True),
            nn.Conv2d(nf, cout, kernel_size=5, stride=1, padding=2, bias=False)
        ]
        if activation is not None:
            network += [activation()]
        self.network = nn.Sequential(*network)

        if not requires_grad:
            for param in self.parameters():
                param.requires_grad = False
Ejemplo n.º 4
0
    def __init__(self, num_classes, num_queries, num_feature_levels):
        """Initializes the model.

        Args:
            num_classes (int): number of object classes
            num_queries (int): number of object queries
            num_feature_levels (int): num feature lecels
        """
        super().__init__()

        # create ResNet50 backbone
        position_embedding = PositionEmbeddingSine(HIDDEN_DIM // 2,
                                                   normalize=True)
        backbone = Joniner(Backbone(), position_embedding)

        # create deformable transformer
        transformer = DeformableTransformer(HIDDEN_DIM, NHEADS, ENC_LAYERS,
                                            DEC_LAYERS, DIM_FEEDFORWARD,
                                            DROPOUT, True, NUM_FEATURE_LEVELS,
                                            DEC_N_POINTS, ENC_N_POINTS)

        self.num_queries = num_queries
        self.transformer = transformer
        hidden_dim = transformer.d_model
        self.class_embed = nn.Linear(hidden_dim, num_classes)
        self.bbox_embed = MLP(hidden_dim, hidden_dim, 4, 3)
        self.num_feature_levels = num_feature_levels  # 使用的backbone特征层数,如果大于backbone提供的stage数,则使用卷积继续推进
        self.query_embed = nn.Embedding(num_queries, hidden_dim * 2)

        num_backbone_outs = len(backbone.strides)
        input_proj_list = []
        for _ in range(num_backbone_outs):
            in_channels = backbone.num_channels[_]
            input_proj_list.append(
                nn.Sequential(
                    nn.Conv2d(in_channels, hidden_dim, kernel_size=1),
                    nn.GroupNorm(32, hidden_dim),
                ))  # 将不同stage的输出通道映射到相同大小
        for _ in range(
                num_feature_levels -
                num_backbone_outs):  # 初始的in_channels即backbone的最后输出层channel
            input_proj_list.append(
                nn.Sequential(
                    nn.Conv2d(in_channels,
                              hidden_dim,
                              kernel_size=3,
                              stride=2,
                              padding=1),
                    nn.GroupNorm(32, hidden_dim),
                ))
            in_channels = hidden_dim  # 使用一层卷积层构建后续的特征金字塔
        self.input_proj = nn.ModuleList(input_proj_list)
        self.backbone = backbone

        prior_prob = 0.01
        bias_value = -math.log((1 - prior_prob) / prior_prob)
        self.class_embed.bias.data = torch.ones(num_classes) * bias_value
        nn.init.constant_(self.bbox_embed.layers[-1].weight.data, 0)
        nn.init.constant_(self.bbox_embed.layers[-1].bias.data, 0)
        for proj in self.input_proj:
            nn.init.xavier_uniform_(proj[0].weight, gain=1)
            nn.init.constant_(proj[0].bias, 0)

        num_pred = transformer.decoder.num_layers
        nn.init.constant_(self.bbox_embed.layers[-1].bias.data[2:], -2.0)
        self.class_embed = nn.ModuleList(
            [self.class_embed for _ in range(num_pred)])
        self.bbox_embed = nn.ModuleList(
            [self.bbox_embed for _ in range(num_pred)])
        self.transformer.decoder.bbox_embed = None
Ejemplo n.º 5
0
    def __init__(self,
                 in_nfeat=3,
                 num_stack=4,
                 norm_type='group',
                 hg_down='ave_pool',
                 num_hourglass=2,
                 hourglass_dim=256):
        super(HGFilter, self).__init__()

        self.num_modules = num_stack
        self.norm_type = norm_type
        self.hg_down = hg_down
        self.num_hourglass = num_hourglass
        self.hourglass_dim = hourglass_dim

        # Base part
        self.conv1 = nn.Conv2d(in_nfeat,
                               64,
                               kernel_size=7,
                               stride=2,
                               padding=3)

        if self.norm_type == 'batch':
            self.bn1 = nn.BatchNorm2d(64)
        elif self.norm_type == 'group':
            self.bn1 = nn.GroupNorm(32, 64)

        if self.hg_down == 'conv64':
            self.conv2 = ConvBlock(64, 64, self.norm_type)
            self.down_conv2 = nn.Conv2d(64,
                                        128,
                                        kernel_size=3,
                                        stride=2,
                                        padding=1)
        elif self.hg_down == 'conv128':
            self.conv2 = ConvBlock(64, 128, self.norm_type)
            self.down_conv2 = nn.Conv2d(128,
                                        128,
                                        kernel_size=3,
                                        stride=2,
                                        padding=1)
        elif self.hg_down == 'ave_pool':
            self.conv2 = ConvBlock(64, 128, self.norm_type)
        else:
            raise NameError('Unknown Fan Filter setting!')

        self.conv3 = ConvBlock(128, 128, self.norm_type)
        self.conv4 = ConvBlock(128, 256, self.norm_type)

        # Stacking part
        for hg_module in range(self.num_modules):
            self.add_module(
                'm' + str(hg_module),
                HourGlass(1, self.num_hourglass, 256, self.norm_type))

            self.add_module('top_m_' + str(hg_module),
                            ConvBlock(256, 256, self.norm_type))
            self.add_module(
                'conv_last' + str(hg_module),
                nn.Conv2d(256, 256, kernel_size=1, stride=1, padding=0))
            if self.norm_type == 'batch':
                self.add_module('bn_end' + str(hg_module), nn.BatchNorm2d(256))
            elif self.norm_type == 'group':
                self.add_module('bn_end' + str(hg_module),
                                nn.GroupNorm(32, 256))

            self.add_module(
                'l' + str(hg_module),
                nn.Conv2d(256,
                          self.hourglass_dim,
                          kernel_size=1,
                          stride=1,
                          padding=0))

            if hg_module < self.num_modules - 1:
                self.add_module(
                    'bl' + str(hg_module),
                    nn.Conv2d(256, 256, kernel_size=1, stride=1, padding=0))
                self.add_module(
                    'al' + str(hg_module),
                    nn.Conv2d(self.hourglass_dim,
                              256,
                              kernel_size=1,
                              stride=1,
                              padding=0))
def Norm(planes):
    return nn.GroupNorm(32, planes)
Ejemplo n.º 7
0
def custom_cnn(input_channels, specification, input_name='input',
               output_name='output', default_nonlin='relu', batch_norm=False):
    """
    Creates a CNN for the given number of input channels, with an architecture
    defined as a comma-separated string of layer definitions. Supported layer
    definitions are (with variables in <>, and optional parts in []):
    - pad1d:<method>@<size>
    - pad2d:<method>@<size>
    - crop1d:<size>
    - crop2d:<size>
    - conv1d:<channels>@<size>[s<stride>][p<pad>][d<dilation>][g<groups>]
    - conv2d:<channels>@<size0>x<size1>[s<stride>][p<pad>][d<dilation>][g<groups>]
    - pool1d:<method>@<size>[s<stride>][p<pad>][d<dilation]
    - pool2d:<method>@<size0>x<size1>[s<stride>][p<pad>][d<dilation>]
    - globalpool1d:<method>
    - globalpool2d:<method>
    - globallmepool:<alpha>[t<trainable>][c<channelwise>][e<exponentiated>]
    - bn1d
    - bn2d
    - groupnorm:<groups>
    - dropout:<drop_probability>
    - relu
    - lrelu
    - sigm
    - swish
    - mish
    - bipol:<nonlin>
    - shift:<amount>
    - bypass (does nothing)
    - squeeze:<dim>
    - cat[layers1|layers2|...] (apply stacks to same input, then concat)
    - add[layers1|layers2|...] (apply stacks to same input, then add)
    - shake[layers1|layers2|...] (apply stacks to same input, then shake-shake)
    If there is a batch normalization one or two layers after a convolution,
    the convolution will not have a bias term.
    """
    def read_layers(s):
        """
        Yields all layer definitions (as separated by , | [ or ]) as tuples
        of the definition string and the following delimiter.
        """
        pos = 0
        for match in re.finditer(r'[,|[\]]', s):
            yield s[pos:match.start()], s[match.start():match.end()]
            pos = match.end()
        yield s[pos:], None


    def read_size(s, t=int, expect_remainder=True):
        """
        Read and parse a size (e.g., 1, 1x1, 1x1x1) at the beginning of `s`,
        with elements of type `t`. If `expect_remainder`, returns the
        remainder, otherwise tries to parse the complete `s` as a size.
        """
        if expect_remainder:
            # yes, we could use a precompiled regular expression...
            p = next((i for i, c in enumerate(s) if c not in '0123456789x'),
                        len(s))
            remainder = s[p:]
            s = s[:p]
        size = tuple(map(t, s.split('x')))
        if len(size) == 1:
            size = size[0]
        if expect_remainder:
            return size, remainder
        else:
            return size


    def size_string(size):
        """
        Convert a size integer or tuple back into its string form.
        """
        try:
            return 'x'.join(map(str, size))
        except TypeError:
            return str(size)


    def read_extra_sizes(s, prefixes, t=int):
        """
        Read and parse any extra size definitions prefixed by any of the
        allowed prefixes, and returns them as a dictionary. If `prefixes` is
        a dictionary, the prefixes (keys) will be translated to the expanded
        names (values) in the returned dictionary. Values will be converted
        from strings to `t`.
        """
        if not isinstance(prefixes, dict):
            prefixes = {prefix: prefix for prefix in prefixes}
        result = {}
        while s:
            for prefix, return_key in prefixes.items():
                if s.startswith(prefix):
                    size, s = read_size(s[len(prefix):], t)
                    result[return_key] = size
                    break
            else:
                raise ValueError("unrecognized part in layer definition: "
                                 "%r" % s)
        return result


    stack = []
    layers = []
    if input_name:
        layers = [PickDictKey(input_name)]
    # track receptive field for the full network
    receptive_field = ReceptiveField()
    # split specification string into definition, delimiter tuples
    specification = list(read_layers(specification))
    # iterate over it (in a way that allows us to expand macro definitions)
    while specification:
        layer_def, delim = specification.pop(0)
        layer_def = layer_def.split(':')
        kind = layer_def[0]
        if kind in ('pad1d', 'pad2d'):
            method, size = layer_def[1].split('@')
            size = read_size(size, expect_remainder=False)
            cls = {'reflectpad1d': nn.ReflectionPad1d,
                   'reflectpad2d': nn.ReflectionPad2d}[method + kind]
            layers.append(cls(size))
            receptive_field *= ReceptiveField(padding=size)
        elif kind in ('crop1d', 'crop2d'):
            size = int(layer_def[1])
            dimensionality = int(kind[-2])
            layers.append(Crop(dimensionality, size))
            receptive_field *= ReceptiveField(padding=-size)
        elif kind in ('conv1d', 'conv2d'):
            channels, remainder = layer_def[1].split('@')
            channels = int(channels)
            size, remainder = read_size(remainder)
            params = dict(stride=1, padding=0, dilation=1, groups=1)
            params.update(read_extra_sizes(
                    remainder, dict(s='stride', p='padding', d='dilation',
                                    g='groups')))
            cls = {'conv1d': nn.Conv1d, 'conv2d': nn.Conv2d}[kind]
            layers.append(cls(input_channels, channels, size, **params))
            input_channels = channels
            # effective kernel size:
            size = (np.array(size) - 1) * params['dilation'] + 1
            receptive_field *= ReceptiveField(size, params['stride'],
                                              params['padding'])
        elif kind in ('pool1d', 'pool2d'):
            method, size = layer_def[1].split('@')
            size, remainder = read_size(size)
            params = dict(stride=None, padding=0, dilation=1)
            params.update(read_extra_sizes(
                    remainder, dict(s='stride', p='padding', d='dilation')))
            cls = {'maxpool1d': nn.MaxPool1d, 'meanpool1d': nn.AvgPool1d,
                   'maxpool2d': nn.MaxPool2d, 'meanpool2d': nn.AvgPool2d}[method + kind]
            layers.append(cls(size, **params))
            # effective kernel size:
            size = (np.array(size) - 1) * params['dilation'] + 1
            if params['stride'] is None:
                params['stride'] = size
            receptive_field *= ReceptiveField(size, params['stride'],
                                              params['padding'])
        elif kind in ('globalpool1d', 'globalpool2d'):
            method = layer_def[1]
            cls = {'maxglobalpool1d': nn.AdaptiveMaxPool1d,
                   'meanglobalpool1d': nn.AdaptiveAvgPool1d,
                   'maxglobalpool2d': nn.AdaptiveMaxPool2d,
                   'meanglobalpool2d': nn.AdaptiveAvgPool2d}[method + kind]
            layers.append(cls(output_size=1))
            # we do not adjust the receptive field; it spans the whole input
        elif kind == 'globallmepool':
            alpha, remainder = read_size(layer_def[1], float)
            params = read_extra_sizes(
                remainder, dict(t='trainable', c='per_channel', e='exp'),
                t=lambda s: bool(int(s)))
            layers.append(SpatialLogMeanExp(alpha, in_channels=input_channels,
                                            keepdim=True, **params))
            # we do not adjust the receptive field; it spans the whole input
        elif kind == 'bn1d':
            if len(layers) >= 1 and hasattr(layers[-1], 'bias'):
                layers[-1].register_parameter('bias', None)
            elif len(layers) >=2 and hasattr(layers[-2], 'bias'):
                layers[-2].register_parameter('bias', None)
            layers.append(nn.BatchNorm1d(input_channels))
        elif kind == 'bn2d':
            if len(layers) >= 1 and hasattr(layers[-1], 'bias'):
                layers[-1].register_parameter('bias', None)
            elif len(layers) >= 2 and hasattr(layers[-2], 'bias'):
                layers[-2].register_parameter('bias', None)
            layers.append(nn.BatchNorm2d(input_channels))
        elif kind == 'groupnorm':
            groups = int(layer_def[1])
            layers.append(nn.GroupNorm(groups, input_channels))
        elif kind == 'dropout':
            p = float(layer_def[1])
            layers.append(nn.Dropout(p))
        elif kind == 'squeeze':
            dim = int(layer_def[1])
            layers.append(Squeeze(dim))
        elif kind == 'shift':
            amount = float(layer_def[1])
            layers.append(Shift(amount))
        elif kind == 'bypass':
            layers.append(nn.Identity())
        elif kind == 'cat':
            stack.append((layers, input_channels, receptive_field))
            stack.append((Cat(), input_channels, receptive_field))
            layers = []
            receptive_field = ReceptiveField()
        elif kind == 'add':
            stack.append((layers, input_channels, receptive_field))
            stack.append((Add(), input_channels, receptive_field))
            layers = []
            receptive_field = ReceptiveField()
        elif kind == 'mul':
            stack.append((layers, input_channels, receptive_field))
            stack.append((Mul(), input_channels, receptive_field))
            layers = []
            receptive_field = ReceptiveField()
        elif kind == 'shake':
            stack.append((layers, input_channels, receptive_field))
            stack.append((ShakeShake(), input_channels, receptive_field))
            layers = []
            receptive_field = ReceptiveField()
        elif kind == '':
            pass
        elif kind == 'mbconv2d':
            # mobile inverted bottleneck convolution layer from MobileNetV2
            channels, remainder = layer_def[1].split('@')
            channels = int(channels)
            size, remainder = read_size(remainder)
            params = dict(stride=1, dilation=1, groups=1, expansion=1,
                          size=size, channels=channels)
            params.update(read_extra_sizes(
                    remainder, dict(s="stride", d="dilation", g="groups",
                                    e="expansion")))
            hidden_channels = int(input_channels * params['expansion'])
            # define layers
            macro = []
            # 1x1 channel expansion
            if hidden_channels != input_channels:
                macro.append('conv2d:%d@1x1g%d' %
                             (hidden_channels, params['groups']))
                if batch_norm:
                    macro.append('bn2d')
                macro.append(default_nonlin)
            # channelwise convolution
            macro.append('conv2d:%d@%ss%sd%sg%d' %
                         (hidden_channels, size_string(size),
                          size_string(params['stride']),
                          size_string(params['dilation']),
                          hidden_channels))
            if batch_norm:
                macro.append('bn2d')
            macro.append(default_nonlin)
            # linear projection
            macro.append('conv2d:%d@1x1g%d' % (channels, params['groups']))
            # residual shortcut, if applicable
            macro = ','.join(macro)
            if params['stride'] == 1 and channels == input_channels:
                crop = ((np.array(size) - 1) * params['dilation'] + 1) // 2
                macro = 'add[%s|%s]' % ('crop2d:%d' % crop[0], macro)
            # push to beginning of remaining layer specifications
            specification[:0] = read_layers(macro)
        elif kind == 'bipol':
            layers.append(nonlinearity('bipol:' + layer_def[1]))
        else:
            try:
                layers.append(nonlinearity(kind))
            except KeyError:
                raise ValueError('Unknown layer type "%s"' % kind)
        if delim is not None and delim in '|]':
            if isinstance(layers, list):
                layers = nn.Sequential(*layers) if len(layers) > 1 else layers[0]
            layers.receptive_field = receptive_field
            layers.out_channels = input_channels
            # append layers to Cat() or Add()
            stack[-1][0].append(layers)
            if delim == '|':
                # reset input_channels to match input of Cat() or Add()
                input_channels = stack[-1][1]
                # we expect another set of layers
                layers = []
                receptive_field = ReceptiveField()
            elif delim == ']':
                # take the Cat() or Add() from the stack
                layers, _, receptive_field = stack.pop()
                # append it to what we were building before
                stack[-1][0].append(layers)
                # and continue there
                if isinstance(layers, Cat):
                    input_channels = sum(path.out_channels for path in layers)
                receptive_field *= sum(path.receptive_field for path in layers)
                layers, _, _ = stack.pop()
    if stack:
        raise ValueError('There seems to be a missing "]" bracket.')
    if output_name:
        layers.append(PutDictKey(output_name))
    if isinstance(layers, list):
        layers = nn.Sequential(*layers)
    layers.receptive_field = receptive_field
    layers.out_channels = input_channels
    return layers
Ejemplo n.º 8
0
    def __init__(self, config):
        super(Segtran2d, self).__init__(config)
        self.config = config
        self.device = config.device
        self.trans_in_dim = config.trans_in_dim
        self.trans_out_dim = config.trans_out_dim
        self.num_translayers = config.num_translayers
        self.bb_feat_upsize = config.bb_feat_upsize
        self.G = config.G
        self.use_global_bias = config.use_global_bias
        if not self.use_global_bias:
            self.voxel_fusion = SegtranFusionEncoder(config, 'Fusion')
            self.vfeat_bias = None
            self.vfeat_bias_norm_layer = nn.Identity()
        else:
            self.vfeat_bias = Parameter(torch.randn(1, 1, self.trans_out_dim))
            self.vfeat_bias_norm_layer = nn.LayerNorm(self.trans_out_dim,
                                                      elementwise_affine=True)

        self.backbone_type = config.backbone_type
        self.use_pretrained = config.use_pretrained
        self.pos_embed_every_layer = config.pos_embed_every_layer
        if self.backbone_type.startswith('resnet'):
            self.backbone = resnet.__dict__[self.backbone_type](
                pretrained=self.use_pretrained,
                do_pool1=not self.bb_feat_upsize)
            print("%s created" % self.backbone_type)
        elif self.backbone_type.startswith('resibn'):
            mat = re.search(r"resibn(\d+)", self.backbone_type)
            backbone_type = 'resnet{}_ibn_a'.format(mat.group(1))
            self.backbone = resnet_ibn.__dict__[backbone_type](
                pretrained=self.use_pretrained,
                do_pool1=not self.bb_feat_upsize)
            print("%s created" % backbone_type)
        elif self.backbone_type.startswith('eff'):
            backbone_type = self.backbone_type.replace("eff", "efficientnet")
            stem_stride = 1 if self.bb_feat_upsize else 2
            advprop = True
            if self.use_pretrained:
                self.backbone = EfficientNet.from_pretrained(
                    backbone_type,
                    advprop=advprop,
                    ignore_missing_keys=True,
                    stem_stride=stem_stride)
            else:
                self.backbone = EfficientNet.from_name(backbone_type,
                                                       stem_stride=stem_stride)
            print("{} created (stem_stride={}, advprop={})".format(
                backbone_type, stem_stride, advprop))

        self.in_fpn_use_bn = config.in_fpn_use_bn
        self.in_fpn_layers = config.in_fpn_layers
        self.in_fpn_scheme = config.in_fpn_scheme

        # FPN output resolution is determined by the smallest number (lowest layer).
        pool_stride = 2**np.min(self.in_fpn_layers)
        if not self.bb_feat_upsize:
            pool_stride *= 2
        self.mask_pool = nn.AvgPool2d((pool_stride, pool_stride))

        self.bb_feat_dims = config.bb_feat_dims
        self.in_fpn23_conv = nn.Conv2d(self.bb_feat_dims[2],
                                       self.bb_feat_dims[3], 1)
        self.in_fpn34_conv = nn.Conv2d(self.bb_feat_dims[3],
                                       self.bb_feat_dims[4], 1)
        # Default in_fpn_layers: 34. last_in_fpn_layer_idx: 4.
        last_in_fpn_layer_idx = self.in_fpn_layers[-1]
        if self.bb_feat_dims[last_in_fpn_layer_idx] != self.trans_in_dim:
            self.in_fpn_bridgeconv = nn.Conv2d(
                self.bb_feat_dims[last_in_fpn_layer_idx], self.trans_in_dim, 1)
        else:
            self.in_fpn_bridgeconv = nn.Identity()

        # in_bn4b/in_gn4b normalizes in_fpn43_conv(layer 4 features),
        # so the feature dim = dim of layer 3.
        # in_bn3b/in_gn3b normalizes in_fpn32_conv(layer 3 features),
        # so the feature dim = dim of layer 2.
        if self.in_fpn_use_bn:
            self.in_bn3b = nn.BatchNorm2d(self.bb_feat_dims[3])
            self.in_bn4b = nn.BatchNorm2d(self.bb_feat_dims[4])
            self.in_fpn_norms = [None, None, None, self.in_bn3b, self.in_bn4b]
        else:
            self.in_gn3b = nn.GroupNorm(self.G, self.bb_feat_dims[3])
            self.in_gn4b = nn.GroupNorm(self.G, self.bb_feat_dims[4])
            self.in_fpn_norms = [None, None, None, self.in_gn3b, self.in_gn4b]

        self.in_fpn_convs = [
            None, None, self.in_fpn23_conv, self.in_fpn34_conv
        ]

        self.num_classes = config.num_classes
        self.num_modalities = config.num_modalities
        if self.num_modalities > 0:
            self.mod_fuse_conv = nn.Conv2d(self.num_modalities, 1, 1)

        self.out_fpn_use_bn = config.out_fpn_use_bn
        self.out_fpn_layers = config.out_fpn_layers
        self.out_fpn_scheme = config.out_fpn_scheme
        self.out_fpn_do_dropout = config.out_fpn_do_dropout
        self.posttrans_use_bn = config.posttrans_use_bn

        if self.out_fpn_layers != self.in_fpn_layers:
            self.do_out_fpn = True

            self.out_fpn12_conv = nn.Conv2d(self.bb_feat_dims[1],
                                            self.bb_feat_dims[2], 1)
            self.out_fpn23_conv = nn.Conv2d(self.bb_feat_dims[2],
                                            self.bb_feat_dims[3], 1)
            self.out_fpn34_conv = nn.Conv2d(self.bb_feat_dims[3],
                                            self.bb_feat_dims[4], 1)
            # Default in_fpn_layers: 34, out_fpn_layers: 1234. last_out_fpn_layer_idx: 3.
            last_out_fpn_layer_idx = self.out_fpn_layers[-len(self.
                                                              in_fpn_layers)]
            if self.bb_feat_dims[last_out_fpn_layer_idx] != self.trans_out_dim:
                self.out_fpn_bridgeconv = nn.Conv2d(
                    self.bb_feat_dims[last_out_fpn_layer_idx],
                    self.trans_out_dim, 1)
            else:
                self.out_fpn_bridgeconv = nn.Identity()

            # out_bn3b/out_gn3b normalizes out_fpn23_conv(layer 3 features),
            # so the feature dim = dim of layer 2.
            # out_bn2b/out_gn2b normalizes out_fpn12_conv(layer 2 features),
            # so the feature dim = dim of layer 1.
            if self.out_fpn_use_bn:
                self.out_bn2b = nn.BatchNorm2d(self.bb_feat_dims[2])
                self.out_bn3b = nn.BatchNorm2d(self.bb_feat_dims[3])
                self.out_bn4b = nn.BatchNorm2d(self.bb_feat_dims[4])
                self.out_fpn_norms = [
                    None, None, self.out_bn2b, self.out_bn3b, self.out_bn4b
                ]
            else:
                self.out_gn2b = nn.GroupNorm(self.G, self.bb_feat_dims[2])
                self.out_gn3b = nn.GroupNorm(self.G, self.bb_feat_dims[3])
                self.out_gn4b = nn.GroupNorm(self.G, self.bb_feat_dims[4])
                self.out_fpn_norms = [
                    None, None, self.out_gn2b, self.out_gn3b, self.out_gn4b
                ]

            self.out_fpn_convs = [
                None, self.out_fpn12_conv, self.out_fpn23_conv,
                self.out_fpn34_conv
            ]
            self.out_conv = nn.Conv2d(self.trans_out_dim, self.num_classes, 1)
            self.out_fpn_dropout = nn.Dropout(config.hidden_dropout_prob)
        # out_fpn_layers = in_fpn_layers, no need to do fpn at the output end.
        # Output class scores directly.
        else:
            self.do_out_fpn = False
            if '2' in self.in_fpn_layers:
                # Output resolution is 1/4 of input already. No need to do upsampling here.
                self.out_conv = nn.Conv2d(config.trans_out_dim,
                                          self.num_classes, 1)
            else:
                # Output resolution is 1/8 of input. Do upsampling to make resolution x 2
                self.out_conv = nn.ConvTranspose2d(config.trans_out_dim,
                                                   self.num_classes, 2, 2)

        self.apply(self.init_weights)
        # tie_qk() has to be executed after weight initialization.
        self.apply(self.tie_qk)
        self.apply(self.add_identity_bias)
        # Initialize mod_fuse_conv weights and bias.
        # Set all modalities to have equal weights.
        if self.num_modalities > 0:
            self.mod_fuse_conv.weight.data.fill_(1 / self.num_modalities)
            self.mod_fuse_conv.bias.data.zero_()

        self.scales_printed = False
        self.translayer_dims = config.translayer_dims
        if not self.use_global_bias:
            self.num_vis_layers = 1 + 2 * self.num_translayers
        else:
            self.num_vis_layers = 1
Ejemplo n.º 9
0
    def __init__(self,
                 backbone,
                 transformer,
                 num_classes,
                 num_queries,
                 num_feature_levels,
                 aux_loss=True,
                 with_box_refine=False,
                 two_stage=False):
        """ Initializes the model.
        Parameters:
            backbone: torch module of the backbone to be used. See backbone.py
            transformer: torch module of the transformer architecture. See transformer.py
            num_classes: number of object classes
            num_queries: number of object queries, ie detection slot. This is the maximal number of objects
                         DETR can detect in a single image. For COCO, we recommend 100 queries.
            aux_loss: True if auxiliary decoding losses (loss at each decoder layer) are to be used.
            with_box_refine: iterative bounding box refinement
            two_stage: two-stage Deformable DETR
        """
        super().__init__()
        self.num_queries = num_queries
        self.transformer = transformer
        hidden_dim = transformer.d_model
        self.class_embed = nn.Linear(hidden_dim, num_classes)
        self.bbox_embed = MLP(hidden_dim, hidden_dim, 4, 3)
        self.num_feature_levels = num_feature_levels
        if not two_stage:
            self.query_embed = nn.Embedding(num_queries, hidden_dim * 2)
        if num_feature_levels > 1:
            num_backbone_outs = len(backbone.strides)
            input_proj_list = []
            for _ in range(num_backbone_outs):
                in_channels = backbone.num_channels[_]
                input_proj_list.append(
                    nn.Sequential(
                        nn.Conv2d(in_channels, hidden_dim, kernel_size=1),
                        nn.GroupNorm(32, hidden_dim),
                    ))
            for _ in range(num_feature_levels - num_backbone_outs):
                input_proj_list.append(
                    nn.Sequential(
                        nn.Conv2d(in_channels,
                                  hidden_dim,
                                  kernel_size=3,
                                  stride=2,
                                  padding=1),
                        nn.GroupNorm(32, hidden_dim),
                    ))
                in_channels = hidden_dim
            self.input_proj = nn.ModuleList(input_proj_list)
        else:
            self.input_proj = nn.ModuleList([
                nn.Sequential(
                    nn.Conv2d(backbone.num_channels[0],
                              hidden_dim,
                              kernel_size=1),
                    nn.GroupNorm(32, hidden_dim),
                )
            ])
        self.backbone = backbone
        self.aux_loss = aux_loss
        self.with_box_refine = with_box_refine
        self.two_stage = two_stage

        prior_prob = 0.01
        bias_value = -math.log((1 - prior_prob) / prior_prob)
        self.class_embed.bias.data = torch.ones(num_classes) * bias_value
        nn.init.constant_(self.bbox_embed.layers[-1].weight.data, 0)
        nn.init.constant_(self.bbox_embed.layers[-1].bias.data, 0)
        for proj in self.input_proj:
            nn.init.xavier_uniform_(proj[0].weight, gain=1)
            nn.init.constant_(proj[0].bias, 0)

        # if two-stage, the last class_embed and bbox_embed is for region proposal generation
        num_pred = (transformer.decoder.num_layers +
                    1) if two_stage else transformer.decoder.num_layers
        if with_box_refine:
            self.class_embed = _get_clones(self.class_embed, num_pred)
            self.bbox_embed = _get_clones(self.bbox_embed, num_pred)
            nn.init.constant_(self.bbox_embed[0].layers[-1].bias.data[2:],
                              -2.0)
            # hack implementation for iterative bounding box refinement
            self.transformer.decoder.bbox_embed = self.bbox_embed
        else:
            nn.init.constant_(self.bbox_embed.layers[-1].bias.data[2:], -2.0)
            self.class_embed = nn.ModuleList(
                [self.class_embed for _ in range(num_pred)])
            self.bbox_embed = nn.ModuleList(
                [self.bbox_embed for _ in range(num_pred)])
            self.transformer.decoder.bbox_embed = None
        if two_stage:
            # hack implementation for two-stage
            self.transformer.decoder.class_embed = self.class_embed
            for box_embed in self.bbox_embed:
                nn.init.constant_(box_embed.layers[-1].bias.data[2:], 0.0)
Ejemplo n.º 10
0
    def __init__(self,
                 input_dim,
                 output_dim,
                 kernel_size,
                 stride,
                 padding=0,
                 norm='none',
                 activation='relu',
                 pad_type='zero'):
        super(Conv2dBlock, self).__init__()
        self.use_bias = True
        # initialize padding
        if pad_type == 'reflect':
            self.pad = nn.ReflectionPad2d(padding)
        elif pad_type == 'replicate':
            self.pad = nn.ReplicationPad2d(padding)
        elif pad_type == 'zero':
            self.pad = nn.ZeroPad2d(padding)
        else:
            assert 0, "Unsupported padding type: {}".format(pad_type)

        # initialize normalization
        norm_dim = output_dim
        if norm == 'bn':
            self.norm = nn.BatchNorm2d(norm_dim)
        elif norm == 'gn':
            self.norm = nn.GroupNorm(2, norm_dim)
        elif norm == 'in':
            #self.norm = nn.InstanceNorm2d(norm_dim, track_running_stats=True)
            self.norm = nn.InstanceNorm2d(norm_dim)
        elif norm == 'ln':
            self.norm = LayerNorm(norm_dim)
        elif norm == 'adain':
            self.norm = AdaptiveInstanceNorm2d(norm_dim)
        elif norm == 'none':
            self.norm = None
        else:
            assert 0, "Unsupported normalization: {}".format(norm)

        # initialize activation
        if activation == 'relu':
            self.activation = nn.ReLU(inplace=True)
        elif activation == 'lrelu':
            self.activation = nn.LeakyReLU(0.2, inplace=True)
        elif activation == 'prelu':
            self.activation = nn.PReLU()
        elif activation == 'selu':
            self.activation = nn.SELU(inplace=True)
        elif activation == 'tanh':
            self.activation = nn.Tanh()
        elif activation == 'none':
            self.activation = None
        else:
            assert 0, "Unsupported activation: {}".format(activation)

        # initialize convolution
        self.conv = nn.Conv2d(input_dim,
                              output_dim,
                              kernel_size,
                              stride,
                              bias=self.use_bias)
Ejemplo n.º 11
0
    def _get_conv_bn_layer(
        self,
        in_channels,
        out_channels,
        kernel_size=11,
        stride=1,
        dilation=1,
        padding=0,
        bias=False,
        groups=1,
        heads=-1,
        separable=False,
        normalization="batch",
        norm_groups=1,
    ):
        if norm_groups == -1:
            norm_groups = out_channels

        if separable:
            layers = [
                self._get_conv(
                    in_channels,
                    in_channels,
                    kernel_size,
                    stride=stride,
                    dilation=dilation,
                    padding=padding,
                    bias=bias,
                    groups=in_channels,
                    heads=heads,
                ),
                self._get_conv(
                    in_channels,
                    out_channels,
                    kernel_size=1,
                    stride=1,
                    dilation=1,
                    padding=0,
                    bias=bias,
                    groups=groups,
                ),
            ]
        else:
            layers = [
                self._get_conv(
                    in_channels,
                    out_channels,
                    kernel_size,
                    stride=stride,
                    dilation=dilation,
                    padding=padding,
                    bias=bias,
                    groups=groups,
                )
            ]

        if normalization == "group":
            layers.append(nn.GroupNorm(num_groups=norm_groups, num_channels=out_channels))
        elif normalization == "instance":
            layers.append(nn.GroupNorm(num_groups=out_channels, num_channels=out_channels))
        elif normalization == "layer":
            layers.append(nn.GroupNorm(num_groups=1, num_channels=out_channels))
        elif normalization == "batch":
            layers.append(nn.BatchNorm1d(out_channels, eps=1e-3, momentum=0.1))
        else:
            raise ValueError(
                f"Normalization method ({normalization}) does not match" f" one of [batch, layer, group, instance]."
            )

        if groups > 1:
            layers.append(GroupShuffle(groups, out_channels))
        return layers
Ejemplo n.º 12
0
 def __init__(self, layers, num_groups=32):
     super().__init__(layers,
                      norm_layer=lambda x: nn.GroupNorm(num_groups, x))
def groupNorm(num_channels, eps=1e-5, momentum=0.1, affine=True):
    return nn.GroupNorm(min(32, num_channels),
                        num_channels,
                        eps=eps,
                        affine=affine)
Ejemplo n.º 14
0
    def __init__(self, cfg, input_shape: List[ShapeSpec]):
        """
        SOD Instance Head.
        """
        super().__init__()
        # fmt: off
        self.num_classes = cfg.MODEL.SOD.NUM_CLASSES  # without background.
        self.num_kernels = cfg.MODEL.SOD.NUM_KERNELS
        self.instance_in_features = cfg.MODEL.SOD.INSTANCE_IN_FEATURES
        self.num_in_channels = cfg.MODEL.SOD.INSTANCE_IN_CHANNELS  # = fpn.
        self.num_channels = cfg.MODEL.SOD.INSTANCE_CHANNELS
        self.num_grids = cfg.MODEL.SOD.NUM_GRIDS
        self.strides = cfg.MODEL.SOD.FPN_INSTANCE_STRIDES
        self.fc_dim = cfg.MODEL.SOD.FC_DIM
        self.with_coord = cfg.MODEL.SOD.WITH_COORD
        self.type_att = cfg.MODEL.SOD.TYPE_ATTENTION
        self.norm = cfg.MODEL.SOD.NORM
        self.center_symmetry = cfg.MODEL.SOD.CENTER_SYMMETRY
        self.pe_on = cfg.MODEL.SOD.PE_ON  # use positional encoding or not.

        self.use_base = cfg.MODEL.SOD.USE_BASE  # use dense2sparse or not.
        self.num_conv_before = cfg.MODEL.SOD.NUM_INSTANCE_CONVS_BEFORE
        self.num_conv_after = cfg.MODEL.SOD.NUM_INSTANCE_CONVS_AFTER
        self.rescale_first = cfg.MODEL.SOD.RESCALE_FIRST
        self.max_pool = cfg.MODEL.SOD.MAX_POOL

        # Convolutions to use in the towers
        self.num_levels = len(self.instance_in_features)
        assert self.num_levels == len(self.strides), \
            print("Strides should match the features.")
        assert len(set(self.num_grids)) == 1, \
            print("The grid among different stages should be same.")
        # fmt: on
        if self.pe_on:
            num_ins = torch.tensor(self.num_grids).pow(2).sum()
            self.ins_embed = nn.Embedding(num_ins, self.num_in_channels)

        in_channels = [s.channels for s in input_shape]
        assert len(set(in_channels)) == 1, \
            print("Each level must have the same channel!")
        in_channels = in_channels[0]
        assert in_channels == self.num_in_channels, \
            print("In channels should equal to tower in channels!")

        head_configs = {
            "ins_before":
            (self.num_conv_before, self.with_coord, False),  # leave for DCN.
            "ins_after": (self.num_conv_after, self.with_coord, False)
        }

        # shared conv.
        for head in head_configs:
            tower = []
            num_convs, use_coord, use_deformable = head_configs[head]
            for i in range(num_convs):
                # with coord or not.
                if i == 0:
                    if use_coord:
                        chn = self.num_in_channels + 2
                    else:
                        chn = self.num_in_channels
                else:
                    chn = self.num_channels
                # use deformable conv or not.
                if use_deformable and i == num_convs - 1:
                    raise NotImplementedError
                else:
                    conv_func = nn.Conv2d
                tower.append(
                    conv_func(chn,
                              self.num_channels,
                              kernel_size=3,
                              stride=1,
                              padding=1,
                              bias=self.norm is None))
                if self.norm == "GN":
                    tower.append(nn.GroupNorm(32, self.num_channels))
                tower.append(nn.ReLU(inplace=True))
            self.add_module('{}_tower'.format(head), nn.Sequential(*tower))

        # att conv.
        if self.use_base:
            self.base_att = nn.Conv2d(self.num_channels,
                                      self.num_kernels,
                                      kernel_size=1,
                                      stride=1,
                                      padding=0)
            self.ins_att = nn.Conv2d(self.num_channels,
                                     self.num_kernels,
                                     kernel_size=1,
                                     stride=1,
                                     padding=0)

        # individual fc.
        cls_tower = []
        bbox_tower = []
        self._output_size = self.num_channels
        for k, fc_dim in enumerate(self.fc_dim):
            cls_tower.append(nn.Linear(self._output_size, fc_dim))
            cls_tower.append(nn.ReLU(inplace=True))
            bbox_tower.append(nn.Linear(self._output_size, fc_dim))
            bbox_tower.append(nn.ReLU(inplace=True))
            self._output_size = fc_dim
        self.add_module('cls_tower', nn.Sequential(*cls_tower))
        self.add_module('bbox_tower', nn.Sequential(*bbox_tower))

        # pred layer.
        self.cls_pred = nn.Linear(self._output_size, self.num_classes + 1)
        self.bbox_pred = nn.Linear(self._output_size, 4)

        # init.
        conv_modules = [self.ins_before_tower, self.ins_after_tower]
        if self.use_base:
            conv_modules += [self.base_att, self.ins_att]
        for modules in conv_modules:
            for l in modules.modules():
                if isinstance(l, nn.Conv2d):
                    nn.init.normal_(l.weight, std=0.01)
                    if l.bias is not None:
                        nn.init.constant_(l.bias, 0)

        for modules in [self.cls_tower, self.bbox_tower]:
            for l in modules.modules():
                if isinstance(l, nn.Linear):
                    weight_init.c2_xavier_fill(l)

        nn.init.normal_(self.cls_pred.weight, std=0.01)
        nn.init.normal_(self.bbox_pred.weight, std=0.001)
        for l in [self.cls_pred, self.bbox_pred]:
            if l.bias is not None:
                nn.init.constant_(l.bias, 0)
        # initialize the bias for scale.
        prior_prob = cfg.MODEL.SOD.PRIOR_PROB
        bias_value = -math.log((1 - prior_prob) / prior_prob)
        nn.init.constant_(self.bbox_pred.bias[2:], bias_value)
Ejemplo n.º 15
0
    def __init__(self, cfg, input_shape: List[ShapeSpec]):
        """
        Arguments:
            in_channels (int): number of channels of the input feature
        """
        super(FCOSRepPointsHead, self).__init__()
        # TODO: Implement the sigmoid version first.
        # fmt: off
        in_channels = input_shape[0].channels
        num_classes = cfg.MODEL.FCOS.NUM_CLASSES
        self.fpn_strides = cfg.MODEL.FCOS.FPN_STRIDES
        self.norm_reg_targets = cfg.MODEL.FCOS.NORM_REG_TARGETS
        self.centerness_on_reg = cfg.MODEL.FCOS.CENTERNESS_ON_REG
        self.use_dcn_in_tower = cfg.MODEL.FCOS.USE_DCN_IN_TOWER
        self.use_dcn_v2 = cfg.MODEL.FCOS.USE_DCN_V2
        # fmt: on

        cls_tower = []
        bbox_tower = []
        for i in range(cfg.MODEL.FCOS.NUM_CONVS):
            use_dcn = False
            use_v2 = True
            if self.use_dcn_in_tower and i == cfg.MODEL.FCOS.NUM_CONVS - 1:
                conv_func = DFConv2d
                bias = False
                use_dcn = True
                if not self.use_dcn_v2:
                    use_v2 = False
            else:
                conv_func = nn.Conv2d
                bias = True

            if use_dcn and not use_v2:
                cls_tower.append(
                    conv_func(in_channels,
                              in_channels,
                              with_modulated_dcn=False,
                              kernel_size=3,
                              stride=1,
                              padding=1,
                              bias=bias))
            else:
                cls_tower.append(
                    conv_func(in_channels,
                              in_channels,
                              kernel_size=3,
                              stride=1,
                              padding=1,
                              bias=bias))
            cls_tower.append(nn.GroupNorm(32, in_channels))
            cls_tower.append(nn.ReLU())

            if use_dcn and not use_v2:
                bbox_tower.append(
                    conv_func(in_channels,
                              in_channels,
                              with_modulated_dcn=False,
                              kernel_size=3,
                              stride=1,
                              padding=1,
                              bias=bias))
            else:
                bbox_tower.append(
                    conv_func(in_channels,
                              in_channels,
                              kernel_size=3,
                              stride=1,
                              padding=1,
                              bias=bias))
            bbox_tower.append(nn.GroupNorm(32, in_channels))
            bbox_tower.append(nn.ReLU())

        self.add_module('cls_tower', nn.Sequential(*cls_tower))
        self.add_module('bbox_tower', nn.Sequential(*bbox_tower))

        # rep part
        self.point_feat_channels = in_channels
        self.num_points = 9
        self.dcn_kernel = int(np.sqrt(self.num_points))
        self.dcn_pad = int((self.dcn_kernel - 1) / 2)
        self.cls_out_channels = num_classes
        self.gradient_mul = 0.1
        dcn_base = np.arange(-self.dcn_pad,
                             self.dcn_pad + 1).astype(np.float64)
        dcn_base_y = np.repeat(dcn_base, self.dcn_kernel)
        dcn_base_x = np.tile(dcn_base, self.dcn_kernel)
        dcn_base_offset = np.stack([dcn_base_y, dcn_base_x], axis=1).reshape(
            (-1))
        dcn_base_offset = torch.tensor(dcn_base_offset,
                                       dtype=torch.float32).view(1, -1, 1, 1)
        self.register_buffer("dcn_base_offset", dcn_base_offset)

        self.deform_cls_conv = DeformConv(self.point_feat_channels,
                                          self.point_feat_channels,
                                          self.dcn_kernel, 1, self.dcn_pad)
        self.deform_reg_conv = DeformConv(self.point_feat_channels,
                                          self.point_feat_channels,
                                          self.dcn_kernel, 1, self.dcn_pad)

        points_out_dim = 2 * self.num_points
        self.offsets_init = nn.Sequential(
            nn.Conv2d(self.point_feat_channels, self.point_feat_channels, 3, 1,
                      1), nn.ReLU(inplace=True),
            nn.Conv2d(self.point_feat_channels, points_out_dim, 1, 1, 0))

        self.offsets_refine = nn.Sequential(
            nn.ReLU(),
            nn.Conv2d(self.point_feat_channels, points_out_dim, 1, 1, 0))
        self.logits = nn.Sequential(
            nn.ReLU(),
            nn.Conv2d(self.point_feat_channels, self.cls_out_channels, 1, 1,
                      0))
        #        self.cls_logits = nn.Conv2d(in_channels, num_classes, kernel_size=3, stride=1, padding=1)
        #        self.bbox_pred = nn.Conv2d(in_channels, 4, kernel_size=3, stride=1, padding=1)
        self.centerness = nn.Conv2d(in_channels,
                                    1,
                                    kernel_size=3,
                                    stride=1,
                                    padding=1)

        # initialization
        for modules in [
                self.cls_tower,
                self.bbox_tower,
                #                        self.cls_logits, self.bbox_pred,
                self.offsets_init,
                self.offsets_refine,
                self.deform_cls_conv,
                self.deform_reg_conv,
                self.centerness
        ]:
            for l in modules.modules():
                if isinstance(l, nn.Conv2d):
                    torch.nn.init.normal_(l.weight, std=0.01)
                    torch.nn.init.constant_(l.bias, 0)

        # initialize the bias for focal loss
        prior_prob = cfg.MODEL.FCOS.PRIOR_PROB
        bias_value = -math.log((1 - prior_prob) / prior_prob)
        #        torch.nn.init.constant_(self.cls_logits.bias, bias_value)
        for module in self.logits.modules():
            if hasattr(module, 'bias') and module.bias is not None:
                torch.nn.init.constant_(module.bias, bias_value)

        self.scales = nn.ModuleList([Scale(init_value=1.0) for _ in range(5)])
def gnnorm2d(num_channels, num_groups=32):
    if num_groups > 0:
        return nn.GroupNorm(num_groups, num_channels)
    else:
        return nn.BatchNorm2d(num_channels)
Ejemplo n.º 17
0
 def get_gn(num_channels):
     return nn.GroupNorm(32, num_channels)
Ejemplo n.º 18
0
    def __init__(self,
                 in_channels,
                 out_channels,
                 kernel_size,
                 stride=1,
                 padding=0,
                 dilation=1,
                 groups=1,
                 bias=True,
                 padding_mode='zeros',
                 norm='BN',
                 groups_size=16,
                 conv_last=False):
        super(ConvNorm, self).__init__()

        if norm not in [
                None, 'BN', 'IN', 'GN', 'LN', 'WN', 'SN', 'MSN', 'MSNTReLU',
                'WNTReLU'
        ]:
            raise ValueError(
                "Undefined norm value. Must be one of "
                "[None,'BN', 'IN', 'GN', 'LN', 'WN', 'SN','MSN', 'MSNTReLU', 'WNTReLU']"
            )
        layers = []
        if norm in ['MSN', 'MSNTReLU']:
            conv2d = MeanSpectralNormConv2d(in_channels, out_channels,
                                            kernel_size, stride, padding,
                                            dilation, groups, bias,
                                            padding_mode)
            layers += [conv2d]
        elif norm == 'SN':
            conv2d = SpectralNormConv2d(in_channels, out_channels, kernel_size,
                                        stride, padding, dilation, groups,
                                        bias, padding_mode)
            layers += [conv2d]
        elif norm in ['WN', 'WNTReLU']:
            conv2d = MeanWeightNormConv2d(in_channels, out_channels,
                                          kernel_size, stride, padding,
                                          dilation, groups, bias, padding_mode)
            layers += [conv2d]
        elif norm == 'IN':
            conv2d = nn.Conv2d(in_channels, out_channels, kernel_size, stride,
                               padding, dilation, groups, bias, padding_mode)
            layers += [conv2d, nn.InstanceNorm2d(out_channels)]
        elif norm == 'GN':
            conv2d = nn.Conv2d(in_channels, out_channels, kernel_size, stride,
                               padding, dilation, groups, bias, padding_mode)
            layers += [conv2d, nn.GroupNorm(groups_size, out_channels)]
        elif norm == 'LN':
            conv2d = nn.Conv2d(in_channels, out_channels, kernel_size, stride,
                               padding, dilation, groups, bias, padding_mode)
            layers += [conv2d, nn.LayerNorm(out_channels)]
        elif norm == 'BN':
            conv2d = nn.Conv2d(in_channels, out_channels, kernel_size, stride,
                               padding, dilation, groups, bias, padding_mode)
            layers += [conv2d, nn.BatchNorm2d(out_channels)]
        else:
            conv2d = nn.Conv2d(in_channels, out_channels, kernel_size, stride,
                               padding, dilation, groups, bias, padding_mode)
            layers += [conv2d]
        """
            conv_last is a flag to change the order of operations from
                Conv2D+ BN to BN+Con2D
            This is frequently used in DenseNet & ResNet architectures.
            So to change the order, we simply rotate the array by 1 to the 
            left and change the num_features to the in_channels size
        """
        if conv_last and norm not in [
                None, 'MSN', 'SN', 'WN', 'WNTReLU', 'MSNTReLU'
        ]:
            layers = layers[1:] + layers[:1]
            # Reinitialize the batchnorm layer or its variants
            layers[0].__init__(in_channels)

        self.layers = nn.Sequential(*layers)
Ejemplo n.º 19
0
    def __init__(self, cfg, input_shape: List[ShapeSpec]):
        super().__init__()
        # fmt: off
        in_channels = input_shape[0].channels
        num_classes = cfg.MODEL.FCOS.NUM_CLASSES
        num_convs = cfg.MODEL.FCOS.NUM_CONVS
        prior_prob = cfg.MODEL.FCOS.PRIOR_PROB
        self.fpn_strides = cfg.MODEL.FCOS.FPN_STRIDES
        self.centerness_on_reg = cfg.MODEL.FCOS.CENTERNESS_ON_REG
        self.norm_reg_targets = cfg.MODEL.FCOS.NORM_REG_TARGETS
        # fmt: on

        cls_subnet = []
        bbox_subnet = []
        for _ in range(num_convs):
            cls_subnet.append(
                nn.Conv2d(in_channels,
                          in_channels,
                          kernel_size=3,
                          stride=1,
                          padding=1))
            cls_subnet.append(nn.GroupNorm(32, in_channels))
            cls_subnet.append(nn.ReLU())
            bbox_subnet.append(
                nn.Conv2d(in_channels,
                          in_channels,
                          kernel_size=3,
                          stride=1,
                          padding=1))
            bbox_subnet.append(nn.GroupNorm(32, in_channels))
            bbox_subnet.append(nn.ReLU())

        self.cls_subnet = nn.Sequential(*cls_subnet)
        self.bbox_subnet = nn.Sequential(*bbox_subnet)

        self.cls_score = nn.Conv2d(in_channels,
                                   num_classes,
                                   kernel_size=3,
                                   stride=1,
                                   padding=1)
        self.bbox_pred = nn.Conv2d(in_channels,
                                   4,
                                   kernel_size=3,
                                   stride=1,
                                   padding=1)
        self.centerness = nn.Conv2d(in_channels,
                                    1,
                                    kernel_size=3,
                                    stride=1,
                                    padding=1)

        self.add_module("border_cls_subnet", BorderBranch(in_channels, 256))
        self.add_module("border_bbox_subnet", BorderBranch(in_channels, 128))

        self.border_cls_score = nn.Conv2d(in_channels,
                                          num_classes,
                                          kernel_size=1,
                                          stride=1)
        self.border_bbox_pred = nn.Conv2d(in_channels,
                                          4,
                                          kernel_size=1,
                                          stride=1)

        # Initialization
        for modules in [
                self.cls_subnet, self.bbox_subnet, self.cls_score,
                self.bbox_pred, self.centerness, self.border_cls_subnet,
                self.border_bbox_subnet, self.border_cls_score,
                self.border_bbox_pred
        ]:
            for layer in modules.modules():
                if isinstance(layer, nn.Conv2d):
                    torch.nn.init.normal_(layer.weight, mean=0, std=0.01)
                    torch.nn.init.constant_(layer.bias, 0)
                if isinstance(layer, nn.GroupNorm):
                    torch.nn.init.constant_(layer.weight, 1)
                    torch.nn.init.constant_(layer.bias, 0)

        # Use prior in model initialization to improve stability
        bias_value = -math.log((1 - prior_prob) / prior_prob)
        torch.nn.init.constant_(self.cls_score.bias, bias_value)
        torch.nn.init.constant_(self.border_cls_score.bias, bias_value)

        self.scales = nn.ModuleList(
            [Scale(init_value=1.0) for _ in range(len(self.fpn_strides))])
Ejemplo n.º 20
0
    def __init__(self, cfg, in_channels):
        """
        Arguments:
            in_channels (int): number of channels of the input feature
        """
        super(FCOSHead, self).__init__()
        # TODO: Implement the sigmoid version first.
        num_classes = cfg.MODEL.NUM_CLASSES - 1
        self.fpn_strides = cfg.MODEL.FCOS.FPN_STRIDES
        self.norm_reg_targets = cfg.MODEL.FCOS.NORM_REG_TARGETS
        self.centerness_on_reg = cfg.MODEL.FCOS.CENTERNESS_ON_REG
        self.use_dcn_in_tower = cfg.MODEL.FCOS.USE_DCN_IN_TOWER

        cls_tower = []
        bbox_tower = []
        for i in range(cfg.MODEL.FCOS.NUM_CONVS):
            if self.use_dcn_in_tower and \
                    i == cfg.MODEL.FCOS.NUM_CONVS - 1:
                conv_func = DFConv2d
            else:
                conv_func = nn.Conv2d

            cls_tower.append(
                conv_func(in_channels,
                          in_channels,
                          kernel_size=3,
                          stride=1,
                          padding=1,
                          bias=True))
            cls_tower.append(nn.GroupNorm(32, in_channels))
            cls_tower.append(nn.ReLU(True))
            bbox_tower.append(
                conv_func(in_channels,
                          in_channels,
                          kernel_size=3,
                          stride=1,
                          padding=1,
                          bias=True))
            bbox_tower.append(nn.GroupNorm(32, in_channels))
            bbox_tower.append(nn.ReLU(True))

        self.add_module('cls_tower', nn.Sequential(*cls_tower))
        self.add_module('bbox_tower', nn.Sequential(*bbox_tower))
        self.cls_logits = nn.Conv2d(in_channels,
                                    num_classes,
                                    kernel_size=3,
                                    stride=1,
                                    padding=1)
        self.bbox_pred = nn.Conv2d(in_channels,
                                   4,
                                   kernel_size=3,
                                   stride=1,
                                   padding=1)
        self.centerness = nn.Conv2d(in_channels,
                                    1,
                                    kernel_size=3,
                                    stride=1,
                                    padding=1)

        # initialization
        for modules in [
                self.cls_tower, self.bbox_tower, self.cls_logits,
                self.bbox_pred, self.centerness
        ]:
            for l in modules.modules():
                if isinstance(l, nn.Conv2d):
                    torch.nn.init.normal_(l.weight, std=0.01)
                    torch.nn.init.constant_(l.bias, 0)

        # initialize the bias for focal loss
        prior_prob = cfg.MODEL.FCOS.PRIOR_PROB
        bias_value = -math.log((1 - prior_prob) / prior_prob)
        torch.nn.init.constant_(self.cls_logits.bias, bias_value)

        self.scales = nn.ModuleList([Scale(init_value=1.0) for _ in range(4)])
Ejemplo n.º 21
0
def norm(dim):
    return nn.GroupNorm(min(32, dim), dim)
Ejemplo n.º 22
0
    def __init__(self, cfg, in_channels):
        super(ATSSHead, self).__init__()
        self.cfg = cfg
        num_classes = cfg.MODEL.ATSS.NUM_CLASSES
        num_anchors = len(cfg.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS[0]) * len(
            cfg.MODEL.ANCHOR_GENERATOR.SIZES[0])

        head_configs = {
            "cls": (cfg.MODEL.ATSS.NUM_CONVS, False),
            "bbox":
            (cfg.MODEL.ATSS.NUM_CONVS, cfg.MODEL.ATSS.USE_DCN_IN_TOWER),
        }
        norm = None if cfg.MODEL.ATSS.NORM == "none" else cfg.MODEL.ATSS.NORM

        for head in head_configs:
            tower = []
            num_convs, use_deformable = head_configs[head]
            if use_deformable:
                conv_func = DFConv2d
            else:
                conv_func = nn.Conv2d
            for i in range(num_convs):
                tower.append(
                    conv_func(in_channels,
                              in_channels,
                              kernel_size=3,
                              stride=1,
                              padding=1,
                              bias=True))
                if norm == "GN":
                    tower.append(nn.GroupNorm(32, in_channels))
                elif norm is not None:
                    tower.append(get_norm(norm, in_channels))

                tower.append(nn.ReLU())
            self.add_module('{}_tower'.format(head), nn.Sequential(*tower))

        self.cls_logits = nn.Conv2d(in_channels,
                                    num_anchors * num_classes,
                                    kernel_size=3,
                                    stride=1,
                                    padding=1)
        self.bbox_pred = nn.Conv2d(in_channels,
                                   num_anchors * 4,
                                   kernel_size=3,
                                   stride=1,
                                   padding=1)

        # initialization
        for modules in [
                self.cls_tower,
                self.bbox_tower,
                self.cls_logits,
                self.bbox_pred,
        ]:
            for l in modules.modules():
                if isinstance(l, nn.Conv2d):
                    torch.nn.init.normal_(l.weight, std=0.01)
                    torch.nn.init.constant_(l.bias, 0)

        # initialize the bias for focal loss
        prior_prob = cfg.MODEL.ATSS.PRIOR_PROB
        bias_value = -math.log((1 - prior_prob) / prior_prob)
        torch.nn.init.constant_(self.cls_logits.bias, bias_value)
        if self.cfg.MODEL.ATSS.REGRESSION_TYPE == 'POINT':
            assert num_anchors == 1, "regressing from a point only support num_anchors == 1"
            torch.nn.init.constant_(self.bbox_pred.bias, 4)

        self.scales = nn.ModuleList([Scale(init_value=1.0) for _ in range(5)])
Ejemplo n.º 23
0
def BatchNorm2d(num_features):
    return nn.GroupNorm(num_channels=num_features, num_groups=32)
Ejemplo n.º 24
0
def create_conv(in_channels,
                out_channels,
                kernel_size,
                order,
                num_groups,
                padding=1):
    """
    Create a list of modules with together constitute a single conv layer with non-linearity
    and optional batchnorm/groupnorm.

    Args:
        in_channels (int): number of input channels
        out_channels (int): number of output channels
        order (string): order of things, e.g.
            'cr' -> conv + ReLU
            'crg' -> conv + ReLU + groupnorm
            'cl' -> conv + LeakyReLU
            'ce' -> conv + ELU
        num_groups (int): number of groups for the GroupNorm
        padding (int): add zero-padding to the input

    Return:
        list of tuple (name, module)
    """
    assert 'c' in order, "Conv layer MUST be present"
    assert order[
        0] not in 'rle', 'Non-linearity cannot be the first operation in the layer'

    modules = []
    for i, char in enumerate(order):
        if char == 'r':
            modules.append(('ReLU', nn.ReLU(inplace=True)))
        elif char == 'l':
            modules.append(
                ('LeakyReLU', nn.LeakyReLU(negative_slope=0.1, inplace=True)))
        elif char == 'e':
            modules.append(('ELU', nn.ELU(inplace=True)))
        elif char == 'c':
            # add learnable bias only in the absence of gatchnorm/groupnorm
            bias = not ('g' in order or 'b' in order)
            modules.append(('conv',
                            conv3d(in_channels,
                                   out_channels,
                                   kernel_size,
                                   bias,
                                   padding=padding)))
        elif char == 'g':
            is_before_conv = i < order.index('c')
            assert not is_before_conv, 'GroupNorm MUST go after the Conv3d'
            # number of groups must be less or equal the number of channels
            if out_channels < num_groups:
                num_groups = out_channels
            modules.append(('groupnorm',
                            nn.GroupNorm(num_groups=num_groups,
                                         num_channels=out_channels)))
        elif char == 'b':
            is_before_conv = i < order.index('c')
            if is_before_conv:
                modules.append(('batchnorm', nn.BatchNorm3d(in_channels)))
            else:
                modules.append(('batchnorm', nn.BatchNorm3d(out_channels)))
        else:
            raise ValueError(
                f"Unsupported layer type '{char}'. MUST be one of ['b', 'g', 'r', 'l', 'e', 'c']"
            )

    return modules
Ejemplo n.º 25
0
    def __init__(self, conv_body_func, fpn_level_info, P2only=False):
        super().__init__()
        self.fpn_level_info = fpn_level_info
        self.P2only = P2only

        self.dim_out = fpn_dim = cfg.FPN.DIM
        min_level, max_level = get_min_max_levels()
        self.num_backbone_stages = len(
            fpn_level_info.blobs) - (min_level - LOWEST_BACKBONE_LVL)
        fpn_dim_lateral = fpn_level_info.dims
        self.spatial_scale = []  # a list of scales for FPN outputs

        #
        # Step 1: recursively build down starting from the coarsest backbone level
        #
        # For the coarest backbone level: 1x1 conv only seeds recursion
        self.conv_top = nn.Conv2d(fpn_dim_lateral[0], fpn_dim, 1, 1, 0)
        if cfg.FPN.USE_GN:
            self.conv_top = nn.Sequential(
                nn.Conv2d(fpn_dim_lateral[0], fpn_dim, 1, 1, 0, bias=False),
                nn.GroupNorm(net_utils.get_group_gn(fpn_dim),
                             fpn_dim,
                             eps=cfg.GROUP_NORM.EPSILON))
        else:
            self.conv_top = nn.Conv2d(fpn_dim_lateral[0], fpn_dim, 1, 1, 0)
        self.topdown_lateral_modules = nn.ModuleList()
        self.posthoc_modules = nn.ModuleList()

        # For other levels add top-down and lateral connections
        for i in range(self.num_backbone_stages - 1):
            self.topdown_lateral_modules.append(
                topdown_lateral_module(fpn_dim, fpn_dim_lateral[i + 1]))

        # Post-hoc scale-specific 3x3 convs
        for i in range(self.num_backbone_stages):
            if cfg.FPN.USE_GN:
                self.posthoc_modules.append(
                    nn.Sequential(
                        nn.Conv2d(fpn_dim, fpn_dim, 3, 1, 1, bias=False),
                        nn.GroupNorm(net_utils.get_group_gn(fpn_dim),
                                     fpn_dim,
                                     eps=cfg.GROUP_NORM.EPSILON)))
            else:
                self.posthoc_modules.append(
                    nn.Conv2d(fpn_dim, fpn_dim, 3, 1, 1))

            self.spatial_scale.append(fpn_level_info.spatial_scales[i])

        #
        # Step 2: build up starting from the coarsest backbone level
        #
        # Check if we need the P6 feature map
        if not cfg.FPN.EXTRA_CONV_LEVELS and max_level == HIGHEST_BACKBONE_LVL + 1:
            # Original FPN P6 level implementation from our CVPR'17 FPN paper
            # Use max pooling to simulate stride 2 subsampling
            self.maxpool_p6 = nn.MaxPool2d(kernel_size=1, stride=2, padding=0)
            self.spatial_scale.insert(0, self.spatial_scale[0] * 0.5)

        # Coarser FPN levels introduced for RetinaNet
        if cfg.FPN.EXTRA_CONV_LEVELS and max_level > HIGHEST_BACKBONE_LVL:
            self.extra_pyramid_modules = nn.ModuleList()
            dim_in = fpn_level_info.dims[0]
            for i in range(HIGHEST_BACKBONE_LVL + 1, max_level + 1):
                self.extra_pyramid_modules(nn.Conv2d(dim_in, fpn_dim, 3, 2, 1))
                dim_in = fpn_dim
                self.spatial_scale.insert(0, self.spatial_scale[0] * 0.5)

        if self.P2only:
            # use only the finest level
            self.spatial_scale = self.spatial_scale[-1]

        self._init_weights()

        # Deliberately add conv_body after _init_weights.
        # conv_body has its own _init_weights function
        self.conv_body = conv_body_func()  # e.g resnet
Ejemplo n.º 26
0
    def __init__(self, cfg, input_shape: List[ShapeSpec]):
        """
        Arguments:
            in_channels (int): number of channels of the input feature
        """
        super().__init__()
        # TODO: Implement the sigmoid version first.
        self.num_classes = cfg.MODEL.FCOS.NUM_CLASSES
        self.fpn_strides = cfg.MODEL.FCOS.FPN_STRIDES
        head_configs = {
            "cls":
            (cfg.MODEL.FCOS.NUM_CLS_CONVS, cfg.MODEL.FCOS.USE_DEFORMABLE),
            "bbox":
            (cfg.MODEL.FCOS.NUM_BOX_CONVS, cfg.MODEL.FCOS.USE_DEFORMABLE),
            "share": (cfg.MODEL.FCOS.NUM_SHARE_CONVS, False)
        }
        norm = None if cfg.MODEL.FCOS.NORM == "none" else cfg.MODEL.FCOS.NORM
        self.num_levels = len(input_shape)

        in_channels = [s.channels for s in input_shape]
        assert len(
            set(in_channels)) == 1, "Each level must have the same channel!"
        in_channels = in_channels[0]

        for head in head_configs:
            tower = []
            num_convs, use_deformable = head_configs[head]
            for i in range(num_convs):
                if use_deformable and i == num_convs - 1:
                    conv_func = DFConv2d
                else:
                    conv_func = nn.Conv2d
                tower.append(
                    conv_func(in_channels,
                              in_channels,
                              kernel_size=3,
                              stride=1,
                              padding=1,
                              bias=True))
                if norm == "GN":
                    tower.append(nn.GroupNorm(32, in_channels))
                elif norm == "NaiveGN":
                    tower.append(NaiveGroupNorm(32, in_channels))
                elif norm == "BN":
                    tower.append(
                        ModuleListDial([
                            nn.BatchNorm2d(in_channels)
                            for _ in range(self.num_levels)
                        ]))
                elif norm == "SyncBN":
                    tower.append(
                        ModuleListDial([
                            NaiveSyncBatchNorm(in_channels)
                            for _ in range(self.num_levels)
                        ]))
                tower.append(nn.ReLU())
            self.add_module('{}_tower'.format(head), nn.Sequential(*tower))

        self.cls_logits = nn.Conv2d(in_channels,
                                    self.num_classes,
                                    kernel_size=3,
                                    stride=1,
                                    padding=1)
        self.bbox_pred = nn.Conv2d(in_channels,
                                   4,
                                   kernel_size=3,
                                   stride=1,
                                   padding=1)
        self.ctrness = nn.Conv2d(in_channels,
                                 1,
                                 kernel_size=3,
                                 stride=1,
                                 padding=1)

        if cfg.MODEL.FCOS.USE_SCALE:
            self.scales = nn.ModuleList(
                [Scale(init_value=1.0) for _ in range(self.num_levels)])
        else:
            self.scales = None

        for modules in [
                self.cls_tower, self.bbox_tower, self.share_tower,
                self.cls_logits, self.bbox_pred, self.ctrness
        ]:
            for l in modules.modules():
                if isinstance(l, nn.Conv2d):
                    torch.nn.init.normal_(l.weight, std=0.01)
                    torch.nn.init.constant_(l.bias, 0)

        # initialize the bias for focal loss
        prior_prob = cfg.MODEL.FCOS.PRIOR_PROB
        bias_value = -math.log((1 - prior_prob) / prior_prob)
        torch.nn.init.constant_(self.cls_logits.bias, bias_value)
Ejemplo n.º 27
0
    def __init__(self, cin, cout, zdim=128, nf=64):
        super(ConfNet, self).__init__()
        ## downsampling
        network = [
            nn.Conv2d(cin, nf, kernel_size=4, stride=2, padding=1,
                      bias=False),  # 64x64 -> 32x32
            nn.GroupNorm(16, nf),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv2d(nf,
                      nf * 2,
                      kernel_size=4,
                      stride=2,
                      padding=1,
                      bias=False),  # 32x32 -> 16x16
            nn.GroupNorm(16 * 2, nf * 2),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv2d(nf * 2,
                      nf * 4,
                      kernel_size=4,
                      stride=2,
                      padding=1,
                      bias=False),  # 16x16 -> 8x8
            nn.GroupNorm(16 * 4, nf * 4),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv2d(nf * 4,
                      nf * 8,
                      kernel_size=4,
                      stride=2,
                      padding=1,
                      bias=False),  # 8x8 -> 4x4
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv2d(nf * 8,
                      zdim,
                      kernel_size=4,
                      stride=1,
                      padding=0,
                      bias=False),  # 4x4 -> 1x1
            nn.ReLU(inplace=True)
        ]
        ## upsampling
        network += [
            nn.ConvTranspose2d(zdim,
                               nf * 8,
                               kernel_size=4,
                               padding=0,
                               bias=False),  # 1x1 -> 4x4
            nn.ReLU(inplace=True),
            nn.ConvTranspose2d(nf * 8,
                               nf * 4,
                               kernel_size=4,
                               stride=2,
                               padding=1,
                               bias=False),  # 4x4 -> 8x8
            nn.GroupNorm(16 * 4, nf * 4),
            nn.ReLU(inplace=True),
            nn.ConvTranspose2d(nf * 4,
                               nf * 2,
                               kernel_size=4,
                               stride=2,
                               padding=1,
                               bias=False),  # 8x8 -> 16x16
            nn.GroupNorm(16 * 2, nf * 2),
            nn.ReLU(inplace=True)
        ]
        self.network = nn.Sequential(*network)

        out_net1 = [
            nn.ConvTranspose2d(nf * 2,
                               nf,
                               kernel_size=4,
                               stride=2,
                               padding=1,
                               bias=False),  # 16x16 -> 32x32
            nn.GroupNorm(16, nf),
            nn.ReLU(inplace=True),
            nn.ConvTranspose2d(nf,
                               nf,
                               kernel_size=4,
                               stride=2,
                               padding=1,
                               bias=False),  # 32x32 -> 64x64
            nn.GroupNorm(16, nf),
            nn.ReLU(inplace=True),
            nn.Conv2d(nf, 2, kernel_size=5, stride=1, padding=2,
                      bias=False),  # 64x64
            nn.Softplus()
        ]
        self.out_net1 = nn.Sequential(*out_net1)

        out_net2 = [
            nn.Conv2d(nf * 2,
                      2,
                      kernel_size=3,
                      stride=1,
                      padding=1,
                      bias=False),  # 16x16
            nn.Softplus()
        ]
        self.out_net2 = nn.Sequential(*out_net2)
Ejemplo n.º 28
0
 def gn_helper(planes):
     return nn.GroupNorm(8, planes)
Ejemplo n.º 29
0
 def __init__(self, cin, cout, zdim=128, nf=64, activation=nn.Tanh):
     super(EDDeconv, self).__init__()
     ## downsampling
     network = [
         nn.Conv2d(cin, nf, kernel_size=4, stride=2, padding=1,
                   bias=False),  # 64x64 -> 32x32
         nn.GroupNorm(16, nf),
         nn.LeakyReLU(0.2, inplace=True),
         nn.Conv2d(nf,
                   nf * 2,
                   kernel_size=4,
                   stride=2,
                   padding=1,
                   bias=False),  # 32x32 -> 16x16
         nn.GroupNorm(16 * 2, nf * 2),
         nn.LeakyReLU(0.2, inplace=True),
         nn.Conv2d(nf * 2,
                   nf * 4,
                   kernel_size=4,
                   stride=2,
                   padding=1,
                   bias=False),  # 16x16 -> 8x8
         nn.GroupNorm(16 * 4, nf * 4),
         nn.LeakyReLU(0.2, inplace=True),
         nn.Conv2d(nf * 4,
                   nf * 8,
                   kernel_size=4,
                   stride=2,
                   padding=1,
                   bias=False),  # 8x8 -> 4x4
         nn.LeakyReLU(0.2, inplace=True),
         nn.Conv2d(nf * 8,
                   zdim,
                   kernel_size=4,
                   stride=1,
                   padding=0,
                   bias=False),  # 4x4 -> 1x1
         nn.ReLU(inplace=True)
     ]
     ## upsampling
     network += [
         nn.ConvTranspose2d(zdim,
                            nf * 8,
                            kernel_size=4,
                            stride=1,
                            padding=0,
                            bias=False),  # 1x1 -> 4x4
         nn.ReLU(inplace=True),
         nn.Conv2d(nf * 8,
                   nf * 8,
                   kernel_size=3,
                   stride=1,
                   padding=1,
                   bias=False),
         nn.ReLU(inplace=True),
         nn.ConvTranspose2d(nf * 8,
                            nf * 4,
                            kernel_size=4,
                            stride=2,
                            padding=1,
                            bias=False),  # 4x4 -> 8x8
         nn.GroupNorm(16 * 4, nf * 4),
         nn.ReLU(inplace=True),
         nn.Conv2d(nf * 4,
                   nf * 4,
                   kernel_size=3,
                   stride=1,
                   padding=1,
                   bias=False),
         nn.GroupNorm(16 * 4, nf * 4),
         nn.ReLU(inplace=True),
         nn.ConvTranspose2d(nf * 4,
                            nf * 2,
                            kernel_size=4,
                            stride=2,
                            padding=1,
                            bias=False),  # 8x8 -> 16x16
         nn.GroupNorm(16 * 2, nf * 2),
         nn.ReLU(inplace=True),
         nn.Conv2d(nf * 2,
                   nf * 2,
                   kernel_size=3,
                   stride=1,
                   padding=1,
                   bias=False),
         nn.GroupNorm(16 * 2, nf * 2),
         nn.ReLU(inplace=True),
         nn.ConvTranspose2d(nf * 2,
                            nf,
                            kernel_size=4,
                            stride=2,
                            padding=1,
                            bias=False),  # 16x16 -> 32x32
         nn.GroupNorm(16, nf),
         nn.ReLU(inplace=True),
         nn.Conv2d(nf, nf, kernel_size=3, stride=1, padding=1, bias=False),
         nn.GroupNorm(16, nf),
         nn.ReLU(inplace=True),
         nn.Upsample(scale_factor=2, mode='nearest'),  # 32x32 -> 64x64
         nn.Conv2d(nf, nf, kernel_size=3, stride=1, padding=1, bias=False),
         nn.GroupNorm(16, nf),
         nn.ReLU(inplace=True),
         nn.Conv2d(nf, nf, kernel_size=5, stride=1, padding=2, bias=False),
         nn.GroupNorm(16, nf),
         nn.ReLU(inplace=True),
         nn.Conv2d(nf, cout, kernel_size=5, stride=1, padding=2, bias=False)
     ]
     if activation is not None:
         network += [activation()]
     self.network = nn.Sequential(*network)
Ejemplo n.º 30
0
 def test_groupnorm(self):
     self._check_one_layer(nn.GroupNorm(4, 16), torch.randn(64, 16, 10))
     self._check_one_layer(nn.GroupNorm(4, 16), torch.randn(64, 16, 10, 9))
     self._check_one_layer(nn.GroupNorm(4, 16),
                           torch.randn(64, 16, 10, 9, 8))