コード例 #1
0
ファイル: pointnet2_partseg.py プロジェクト: diviswen/PMP-Net
    def __init__(self,
                 mlp: List[int],
                 n_points=None,
                 radius=None,
                 n_samples=None,
                 bn=True,
                 use_xyz=True):
        super().__init__()

        self.n_points = n_points

        self.groupers = nn.ModuleList()
        if self.n_points is not None:
            self.sampler = FurthestPointSampler(n_points)
            self.groupers.append(BallQueryGrouper(radius, n_samples, use_xyz))
        else:
            self.groupers.append(GroupAll(use_xyz))

        self.mlps = nn.ModuleList()
        self.mlps.append(self.build_mlps(mlp, use_xyz))
コード例 #2
0
ファイル: pointconv_utils.py プロジェクト: diviswen/PMP-Net
    def __init__(self, npoint, nsample, in_channel, mlp, bandwidth, group_all):
        super(PointConvDensitySetAbstraction, self).__init__()
        self.npoint = npoint
        self.nsample = nsample
        self.mlp_convs = nn.ModuleList()
        self.mlp_bns = nn.ModuleList()
        last_channel = in_channel
        for out_channel in mlp:
            self.mlp_convs.append(nn.Conv(last_channel, out_channel, 1))
            self.mlp_bns.append(nn.BatchNorm(out_channel))
            last_channel = out_channel

        self.weightnet = WeightNet(3, 16)
        self.densitynet = DensityNet()

        self.linear = nn.Linear(16 * mlp[-1], mlp[-1])
        self.bn_linear = nn.BatchNorm1d(mlp[-1])
        self.group_all = group_all
        self.bandwidth = bandwidth
        self.relu = nn.ReLU()
コード例 #3
0
    def __init__(self, cfg, in_channels):
        """
        Arguments:
            in_channels (int): number of channels of the input feature
        """
        super(FCOSSharedHead, self).__init__()
        # TODO: Implement the sigmoid version first.
        num_classes = cfg.MODEL.FCOS.NUM_CLASSES - 1
        self.identity = cfg.MODEL.FCOS.RESIDUAL_CONNECTION
        shared_tower = []
        for i in range(cfg.MODEL.FCOS.NUM_CONVS):
            shared_tower.append(
                nn.Conv2d(in_channels,
                          in_channels,
                          kernel_size=3,
                          stride=1,
                          padding=1))
            shared_tower.append(nn.GroupNorm(32, in_channels))
            shared_tower.append(nn.ReLU())

        setattr(self, 'shared_tower', nn.Sequential(*shared_tower))
        self.dense_points = cfg.MODEL.FCOS.DENSE_POINTS
        self.cls_logits = nn.Conv(in_channels,
                                  num_classes * self.dense_points,
                                  kernel_size=3,
                                  stride=1,
                                  padding=1)
        self.bbox_pred = nn.Conv(in_channels,
                                 4 * self.dense_points,
                                 kernel_size=3,
                                 stride=1,
                                 padding=1)
        self.centerness = nn.Conv(in_channels,
                                  1 * self.dense_points,
                                  kernel_size=3,
                                  stride=1,
                                  padding=1)

        # initialization
        for modules in [
                self.shared_tower, self.cls_logits, self.bbox_pred,
                self.centerness
        ]:
            for l in modules.modules():
                if isinstance(l, nn.Conv):
                    nn.init.gauss_(l.weight, std=0.01)
                    nn.init.constant_(l.bias, 0)

        # initialize the bias for focal loss
        prior_prob = cfg.MODEL.FCOS.PRIOR_PROB
        bias_value = -math.log((1 - prior_prob) / prior_prob)
        nn.init.constant_(self.cls_logits.bias, bias_value)

        self.scales = nn.ModuleList(*[Scale(init_value=1.0) for _ in range(5)])
コード例 #4
0
    def _compile(self, C, op_names, indices, concat, reduction):
        assert len(op_names) == len(indices)
        self._steps = len(op_names) // 2
        self._concat = concat
        self.multiplier = len(concat)

        self._ops = nn.ModuleList()
        for name, index in zip(op_names, indices):
            stride = 2 if reduction and index < 2 else 1
            op = OPS[name](C, stride, True)
            self._ops.append(op)
        self._indices = indices
コード例 #5
0
    def __init__(self, inplanes, planes, stride=1, dilation=1, downsample=None, baseWidth=26, scale = 4, stype='normal'):
        """ Constructor
        Args:
            inplanes: input channel dimensionality
            planes: output channel dimensionality
            stride: conv stride. Replaces pooling layer.
            downsample: None when stride = 1
            baseWidth: basic width of conv3x3
            scale: number of scale.
            type: 'normal': normal set. 'stage': first block of a new stage.
        """
        super(Bottle2neck, self).__init__()

        width = int(math.floor(planes * (baseWidth/64.0)))
        self.conv1 = nn.Conv(inplanes, width*scale, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm(width*scale)
        assert scale > 1, 'Res2Net degenerates to ResNet when scales = 1.'
        if scale == 1:
          self.nums = 1
        else:
          self.nums = scale -1
        if stype == 'stage':
            self.pool = nn.Pool(kernel_size=3, stride = stride, padding=1, op='mean')
        self.convs = nn.ModuleList()
        self.bns = nn.ModuleList()
        for i in range(self.nums):
          self.convs.append(nn.Conv(width, width, kernel_size=3, stride = stride, dilation=dilation, padding=dilation, bias=False))
          self.bns.append(nn.BatchNorm(width))

        self.conv3 = nn.Conv(width*scale, planes * self.expansion, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm(planes * self.expansion)

        self.relu = nn.ReLU()
        self.downsample = downsample
        self.stype = stype
        self.scale = scale
        self.width  = width
        self.stride = stride
        self.dilation = dilation
コード例 #6
0
    def __init__(self, light_mode='surface',
                 intensity_ambient=0.5, color_ambient=[1,1,1],
                 intensity_directionals=0.5, color_directionals=[1,1,1],
                 directions=[0,1,0]):
        super(Lighting, self).__init__()

        if light_mode not in ['surface', 'vertex']:
            raise ValueError('Lighting mode only support surface and vertex')

        self.light_mode = light_mode
        self.ambient = AmbientLighting(intensity_ambient, color_ambient)
        self.directionals = nn.ModuleList([DirectionalLighting(intensity_directionals,
                                                               color_directionals,
                                                               directions)])
コード例 #7
0
    def __init__(self,
                 D=8,
                 W=256,
                 input_ch=3,
                 input_ch_views=3,
                 output_ch=4,
                 skips=[4],
                 use_viewdirs=False):
        """ 
        """
        super(NeRF, self).__init__()
        self.D = D
        self.W = W
        self.input_ch = input_ch
        self.input_ch_views = input_ch_views
        self.skips = skips
        self.use_viewdirs = use_viewdirs

        self.pts_linears = nn.ModuleList([nn.Linear(input_ch, W)] + [
            nn.Linear(W, W) if i not in
            self.skips else nn.Linear(W + input_ch, W) for i in range(D - 1)
        ])

        ### Implementation according to the official code release (https://github.com/bmild/nerf/blob/master/run_nerf_helpers.py#L104-L105)
        self.views_linears = nn.ModuleList(
            [nn.Linear(input_ch_views + W, W // 2)])

        ### Implementation according to the paper
        # self.views_linears = nn.ModuleList(
        #     [nn.Linear(input_ch_views + W, W//2)] + [nn.Linear(W//2, W//2) for i in range(D//2)])

        if use_viewdirs:
            self.feature_linear = nn.Linear(W, W)
            self.alpha_linear = nn.Linear(W, 1)
            self.rgb_linear = nn.Linear(W // 2, 3)
        else:
            self.output_linear = nn.Linear(W, output_ch)
コード例 #8
0
ファイル: hrfpn.py プロジェクト: li-xl/detectron.jittor
    def __init__(self,
                 in_channels,
                 out_channels,
                 normalize=None,
                 pooling='AVG',
                 share_conv=False,
                 conv_stride=1,
                 num_level=5,
                 with_checkpoint=False):
        super(HRFPN, self).__init__()
        assert isinstance(in_channels, list)
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.num_ins = len(in_channels)
        self.with_bias = normalize is None
        self.share_conv = share_conv
        self.num_level = num_level
        self.reduction_conv = nn.Sequential(
            nn.Conv(in_channels=sum(in_channels),
                    out_channels=out_channels,
                    kernel_size=1), )

        if self.share_conv:
            self.fpn_conv = nn.Conv(
                in_channels=out_channels,
                out_channels=out_channels,
                kernel_size=3,
                stride=conv_stride,
                padding=1,
            )
        else:
            self.fpn_conv = nn.ModuleList()
            for i in range(self.num_level):
                self.fpn_conv.append(
                    nn.Conv(in_channels=out_channels,
                            out_channels=out_channels,
                            kernel_size=3,
                            stride=conv_stride,
                            padding=1))
        if pooling == 'MAX':
            self.pooling = 'maximum'
        else:
            self.pooling = 'mean'
        self.with_checkpoint = with_checkpoint
コード例 #9
0
    def build_model(self):
        self.pointnet_modules = nn.ModuleList()

        self.pointnet_modules.append(
            PointnetModule(
                n_points=512,
                radius=0.2,
                n_samples=64,
                mlp=[3, 64, 64, 128],
                use_xyz=self.use_xyz,
            )
        )

        self.pointnet_modules.append(
            PointnetModule(
                n_points=128,
                radius=0.4,
                n_samples=64,
                mlp=[128, 128, 128, 256],
                use_xyz=self.use_xyz,
            )
        )

        self.pointnet_modules.append(
            PointnetModule(
                mlp=[256, 256, 512, 1024],
                use_xyz=self.use_xyz,
            )
        )

        self.fc_layer = nn.Sequential(
            nn.Linear(1024, 512, bias=False),
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Linear(512, 256, bias=False),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(256, self.n_classes),
        )
コード例 #10
0
    def _make_transition_layer(self, num_channels_pre_layer,
                               num_channels_cur_layer):
        num_branches_cur = len(num_channels_cur_layer)
        num_branches_pre = len(num_channels_pre_layer)

        transition_layers = []
        for i in range(num_branches_cur):
            if i < num_branches_pre:
                if num_channels_cur_layer[i] != num_channels_pre_layer[i]:
                    transition_layers.append(
                        nn.Sequential(
                            nn.Conv(num_channels_pre_layer[i],
                                    num_channels_cur_layer[i],
                                    3,
                                    1,
                                    1,
                                    bias=False),
                            BatchNorm2d(num_channels_cur_layer[i],
                                        momentum=BN_MOMENTUM), nn.ReLU()))
                else:
                    transition_layers.append(None)
            else:
                conv3x3s = []
                for j in range(i + 1 - num_branches_pre):
                    inchannels = num_channels_pre_layer[-1]
                    outchannels = num_channels_cur_layer[i] \
                        if j == i - num_branches_pre else inchannels
                    conv3x3s.append(
                        nn.Sequential(
                            nn.Conv(inchannels,
                                    outchannels,
                                    3,
                                    2,
                                    1,
                                    bias=False),
                            BatchNorm2d(outchannels, momentum=BN_MOMENTUM),
                            nn.ReLU()))
                transition_layers.append(nn.Sequential(*conv3x3s))

        return nn.ModuleList(transition_layers)
コード例 #11
0
    def __init__(self, cfg, n_class=1000, input_size=224, width_mult=1.):
        super(MobileNetV2, self).__init__()
        block = InvertedResidual
        input_channel = 32
        interverted_residual_setting = [
            # t, c, n, s
            [1, 16, 1, 1],
            [6, 24, 2, 2],
            [6, 32, 3, 2],
            [6, 64, 4, 2],
            [6, 96, 3, 1],
            [6, 160, 3, 2],
            [6, 320, 1, 1],
        ]

        # building first layer
        assert input_size % 32 == 0
        input_channel = int(input_channel * width_mult)
        self.return_features_indices = [3, 6, 13, 17]
        self.return_features_num_channels = []
        self.features = nn.ModuleList(conv_bn(3, input_channel, 2))
        # building inverted residual blocks
        for t, c, n, s in interverted_residual_setting:
            output_channel = int(c * width_mult)
            for i in range(n):
                if i == 0:
                    self.features.append(
                        block(input_channel, output_channel, s,
                              expand_ratio=t))
                else:
                    self.features.append(
                        block(input_channel, output_channel, 1,
                              expand_ratio=t))
                input_channel = output_channel
                if len(self.features) - 1 in self.return_features_indices:
                    self.return_features_num_channels.append(output_channel)

        self._initialize_weights()
        self._freeze_backbone(cfg.MODEL.BACKBONE.FREEZE_CONV_BODY_AT)
コード例 #12
0
ファイル: backbone.py プロジェクト: li-xl/Yolact.jittor
    def __init__(self, layers=[1, 2, 8, 8, 4], block=DarkNetBlock):
        super().__init__()

        # These will be populated by _make_layer
        self.num_base_layers = len(layers)
        self.layers = nn.ModuleList()
        self.channels = []

        self._preconv = darknetconvlayer(3, 32, kernel_size=3, padding=1)
        self.in_channels = 32

        self._make_layer(block, 32, layers[0])
        self._make_layer(block, 64, layers[1])
        self._make_layer(block, 128, layers[2])
        self._make_layer(block, 256, layers[3])
        self._make_layer(block, 512, layers[4])

        # This contains every module that should be initialized by loading in pretrained weights.
        # Any extra layers added onto this that won't be initialized by init_backbone will not be
        # in this list. That way, Yolact::init_weights knows which backbone weights to initialize
        # with xavier, and which ones to leave alone.
        self.backbone_modules = [
            m for m in self.modules() if isinstance(m, nn.Conv)
        ]
コード例 #13
0
    def __init__(self, c1, c2, k=(1, 3), s=1, equal_ch=True):
        super(MixConv2d, self).__init__()
        groups = len(k)
        if equal_ch:  # equal c_ per group
            i = jt.array(
                np.linspace(0, groups - 1E-6,
                            c2).as_type(np.float32)).floor()  # c2 indices
            c_ = [(i == g).sum()
                  for g in range(groups)]  # intermediate channels
        else:  # equal weight.numel() per group
            b = [c2] + [0] * groups
            a = np.eye(groups + 1, groups, k=-1)
            a -= np.roll(a, 1, axis=1)
            a *= np.array(k)**2
            a[0] = 1
            c_ = np.linalg.lstsq(a, b, rcond=None)[0].round(
            )  # solve for equal weight indices, ax = b

        self.m = nn.ModuleList([
            nn.Conv(c1, int(c_[g]), k[g], s, k[g] // 2, bias=False)
            for g in range(groups)
        ])
        self.bn = nn.BatchNorm(c2)
        self.act = nn.LeakyReLU(0.1)
コード例 #14
0
ファイル: vovnet.py プロジェクト: li-xl/detectron.jittor
    def __init__(self,
                 in_ch,
                 stage_ch,
                 concat_ch,
                 layer_per_block,
                 module_name,
                 SE=False,
                 identity=False,
                 dcn_config={}):

        super(_OSA_module, self).__init__()

        self.identity = identity
        self.layers = nn.ModuleList()
        in_channel = in_ch
        with_dcn = dcn_config.get("stage_with_dcn", False)
        for i in range(layer_per_block):
            if with_dcn:
                deformable_groups = dcn_config.get("deformable_groups", 1)
                with_modulated_dcn = dcn_config.get("with_modulated_dcn",
                                                    False)
                #self.layers.append(nn.Sequential(OrderedDict(DFConv3x3(in_channel, stage_ch, module_name, i,
                #    with_modulated_dcn=with_modulated_dcn, deformable_groups=deformable_groups))))
            else:
                self.layers.append(
                    nn.Sequential(
                        OrderedDict(
                            conv3x3(in_channel, stage_ch, module_name, i))))
            in_channel = stage_ch

        # feature aggregation
        in_channel = in_ch + layer_per_block * stage_ch
        self.concat = nn.Sequential(
            OrderedDict(conv1x1(in_channel, concat_ch, module_name, 'concat')))

        self.ese = eSEModule(concat_ch)
コード例 #15
0
    def __init__(self,
                 dlatent_size=512,
                 num_channels=3,
                 resolution=1024,
                 fmap_base=8192,
                 fmap_decay=1.0,
                 fmap_max=512,
                 use_styles=True,
                 const_input_layer=True,
                 use_noise=True,
                 nonlinearity='lrelu',
                 use_wscale=True,
                 use_pixel_norm=False,
                 use_instance_norm=True,
                 blur_filter=None,
                 structure='linear',
                 **kwargs):
        """
        Synthesis network used in the StyleGAN paper.

        :param dlatent_size: Disentangled latent (W) dimensionality.
        :param num_channels: Number of output color channels.
        :param resolution: Output resolution.
        :param fmap_base: Overall multiplier for the number of feature maps.
        :param fmap_decay: log2 feature map reduction when doubling the resolution.
        :param fmap_max: Maximum number of feature maps in any layer.
        :param use_styles: Enable style inputs?
        :param const_input_layer: First layer is a learned constant?
        :param use_noise: Enable noise inputs?
        # :param randomize_noise: True = randomize noise inputs every time (non-deterministic),
                                  False = read noise inputs from variables.
        :param nonlinearity: Activation function: 'relu', 'lrelu'
        :param use_wscale: Enable equalized learning rate?
        :param use_pixel_norm: Enable pixel_wise feature vector normalization?
        :param use_instance_norm: Enable instance normalization?
        :param blur_filter: Low-pass filter to apply when resampling activations. None = no filtering.
        :param structure: 'fixed' = no progressive growing, 'linear' = human-readable
        :param kwargs: Ignore unrecognized keyword args.
        """

        super().__init__()

        # if blur_filter is None:
        #     blur_filter = [1, 2, 1]

        def nf(stage):
            return min(int(fmap_base / (2.0**(stage * fmap_decay))), fmap_max)

        self.structure = structure

        resolution_log2 = int(np.log2(resolution))
        assert resolution == 2**resolution_log2 and resolution >= 4
        self.depth = resolution_log2 - 1

        self.num_layers = resolution_log2 * 2 - 2
        self.num_styles = self.num_layers if use_styles else 1

        act, gain = {
            'relu': (nn.ReLU(), np.sqrt(2)),
            'lrelu': (nn.LeakyReLU(scale=0.2), np.sqrt(2))
        }[nonlinearity]

        # Early layers.
        self.init_block = InputBlock(nf(1), dlatent_size, const_input_layer,
                                     gain, use_wscale, use_noise,
                                     use_pixel_norm, use_instance_norm,
                                     use_styles, act)
        # create the ToRGB layers for various outputs
        rgb_converters = [
            EqualizedConv2d(nf(1),
                            num_channels,
                            1,
                            gain=1,
                            use_wscale=use_wscale)
        ]

        # Building blocks for remaining layers.
        blocks = []
        for res in range(3, resolution_log2 + 1):
            last_channels = nf(res - 2)
            channels = nf(res - 1)
            # name = '{s}x{s}'.format(s=2 ** res)
            blocks.append(
                GSynthesisBlock(last_channels, channels, blur_filter,
                                dlatent_size, gain, use_wscale, use_noise,
                                use_pixel_norm, use_instance_norm, use_styles,
                                act))
            rgb_converters.append(
                EqualizedConv2d(channels,
                                num_channels,
                                1,
                                gain=1,
                                use_wscale=use_wscale))

        self.blocks = nn.ModuleList(blocks)
        self.to_rgb = nn.ModuleList(rgb_converters)

        # register the temporary upsampler
        # self.temporaryUpsampler = lambda x: interpolate(x, scale_factor=2)
        self.temporaryUpsampler = lambda x: nn.interpolate(
            x, scale_factor=2, mode='nearest')
コード例 #16
0
    def __init__(self,
                 resolution,
                 num_channels=3,
                 fmap_base=8192,
                 fmap_decay=1.0,
                 fmap_max=512,
                 nonlinearity='lrelu',
                 use_wscale=True,
                 mbstd_group_size=4,
                 mbstd_num_features=1,
                 blur_filter=None,
                 structure='linear',
                 **kwargs):
        """
        Discriminator used in the StyleGAN paper.

        :param num_channels: Number of input color channels. Overridden based on dataset.
        :param resolution: Input resolution. Overridden based on dataset.
        # label_size=0,  # Dimensionality of the labels, 0 if no labels. Overridden based on dataset.
        :param fmap_base: Overall multiplier for the number of feature maps.
        :param fmap_decay: log2 feature map reduction when doubling the resolution.
        :param fmap_max: Maximum number of feature maps in any layer.
        :param nonlinearity: Activation function: 'relu', 'lrelu'
        :param use_wscale: Enable equalized learning rate?
        :param mbstd_group_size: Group size for the mini_batch standard deviation layer, 0 = disable.
        :param mbstd_num_features: Number of features for the mini_batch standard deviation layer.
        :param blur_filter: Low-pass filter to apply when resampling activations. None = no filtering.
        :param structure: 'fixed' = no progressive growing, 'linear' = human-readable
        :param kwargs: Ignore unrecognized keyword args.
        """
        super(Discriminator, self).__init__()

        def nf(stage):
            return min(int(fmap_base / (2.0**(stage * fmap_decay))), fmap_max)

        self.mbstd_num_features = mbstd_num_features
        self.mbstd_group_size = mbstd_group_size
        self.structure = structure
        # if blur_filter is None:
        #     blur_filter = [1, 2, 1]

        resolution_log2 = int(np.log2(resolution))
        assert resolution == 2**resolution_log2 and resolution >= 4
        self.depth = resolution_log2 - 1

        act, gain = {
            'relu': (nn.ReLU(), np.sqrt(2)),
            'lrelu': (nn.LeakyReLU(scale=0.2), np.sqrt(2))
        }[nonlinearity]

        # create the remaining layers
        blocks = []
        from_rgb = []
        for res in range(resolution_log2, 2, -1):
            # name = '{s}x{s}'.format(s=2 ** res)
            blocks.append(
                DiscriminatorBlock(nf(res - 1),
                                   nf(res - 2),
                                   gain=gain,
                                   use_wscale=use_wscale,
                                   activation_layer=act,
                                   blur_kernel=blur_filter))
            # create the fromRGB layers for various inputs:
            from_rgb.append(
                EqualizedConv2d(num_channels,
                                nf(res - 1),
                                kernel_size=1,
                                gain=gain,
                                use_wscale=use_wscale))
        self.blocks = nn.ModuleList(blocks)

        # Building the final block.
        self.final_block = DiscriminatorTop(self.mbstd_group_size,
                                            self.mbstd_num_features,
                                            in_channels=nf(2),
                                            intermediate_channels=nf(2),
                                            gain=gain,
                                            use_wscale=use_wscale,
                                            activation_layer=act)
        from_rgb.append(
            EqualizedConv2d(num_channels,
                            nf(2),
                            kernel_size=1,
                            gain=gain,
                            use_wscale=use_wscale))
        self.from_rgb = nn.ModuleList(from_rgb)

        # register the temporary downSampler
        # self.temporaryDownsampler = nn.AvgPool2d(2)
        self.temporaryDownsampler = nn.Pool(kernel_size=2, op='mean')
コード例 #17
0
    def __init__(self, cfg, in_channels):
        """
        Arguments:
            in_channels (int): number of channels of the input feature
        """
        super(FCOSHead, self).__init__()
        # TODO: Implement the sigmoid version first.
        num_classes = cfg.MODEL.FCOS.NUM_CLASSES - 1
        self.fpn_strides = cfg.MODEL.FCOS.FPN_STRIDES
        self.norm_reg_targets = cfg.MODEL.FCOS.NORM_REG_TARGETS
        self.centerness_on_reg = cfg.MODEL.FCOS.CENTERNESS_ON_REG
        self.use_dcn_in_tower = cfg.MODEL.FCOS.USE_DCN_IN_TOWER

        cls_tower = []
        bbox_tower = []
        for i in range(cfg.MODEL.FCOS.NUM_CONVS):
            cls_tower.append(
                nn.Conv(in_channels,
                        in_channels,
                        kernel_size=3,
                        stride=1,
                        padding=1))
            cls_tower.append(nn.GroupNorm(32, in_channels))
            cls_tower.append(nn.ReLU())
            bbox_tower.append(
                nn.Conv(in_channels,
                        in_channels,
                        kernel_size=3,
                        stride=1,
                        padding=1))
            bbox_tower.append(nn.GroupNorm(32, in_channels))
            bbox_tower.append(nn.ReLU())

        setattr(self, 'cls_tower', nn.Sequential(*cls_tower))
        setattr(self, 'bbox_tower', nn.Sequential(*bbox_tower))
        self.dense_points = cfg.MODEL.FCOS.DENSE_POINTS
        self.cls_logits = nn.Conv(in_channels,
                                  num_classes * self.dense_points,
                                  kernel_size=3,
                                  stride=1,
                                  padding=1)
        self.bbox_pred = nn.Conv(in_channels,
                                 4 * self.dense_points,
                                 kernel_size=3,
                                 stride=1,
                                 padding=1)
        self.centerness = nn.Conv(in_channels,
                                  1 * self.dense_points,
                                  kernel_size=3,
                                  stride=1,
                                  padding=1)

        # initialization
        for modules in [
                self.cls_tower, self.bbox_tower, self.cls_logits,
                self.bbox_pred, self.centerness
        ]:
            for l in modules.modules():
                if isinstance(l, nn.Conv):
                    nn.init.gauss_(l.weight, std=0.01)
                    nn.init.constant_(l.bias, 0)

        # initialize the bias for focal loss
        prior_prob = cfg.MODEL.FCOS.PRIOR_PROB
        bias_value = -math.log((1 - prior_prob) / prior_prob)
        nn.init.constant_(self.cls_logits.bias, bias_value)
        self.cfg = cfg
        self.scales = nn.ModuleList(*[Scale(init_value=1.0) for _ in range(5)])
コード例 #18
0
    def __init__(self):
        super().__init__()

        self.backbone = construct_backbone(cfg.backbone)

        if cfg.freeze_bn:
            self.freeze_bn()

        # Compute mask_dim here and add it back to the config. Make sure Yolact's constructor is called early!
        if cfg.mask_type == mask_type.direct:
            cfg.mask_dim = cfg.mask_size**2
        elif cfg.mask_type == mask_type.lincomb:
            if cfg.mask_proto_use_grid:
                self.grid = jt.Tensor(np.load(cfg.mask_proto_grid_file))
                self.num_grids = self.grid.shape[0]
            else:
                self.num_grids = 0

            self.proto_src = cfg.mask_proto_src

            if self.proto_src is None: in_channels = 3
            elif cfg.fpn is not None: in_channels = cfg.fpn.num_features
            else: in_channels = self.backbone.channels[self.proto_src]
            in_channels += self.num_grids

            # The include_last_relu=false here is because we might want to change it to another function
            self.proto_net, cfg.mask_dim = make_net(in_channels,
                                                    cfg.mask_proto_net,
                                                    include_last_relu=False)

            if cfg.mask_proto_bias:
                cfg.mask_dim += 1

        self.selected_layers = cfg.backbone.selected_layers
        src_channels = self.backbone.channels

        if cfg.use_maskiou:
            self.maskiou_net = FastMaskIoUNet()

        if cfg.fpn is not None:
            # Some hacky rewiring to accomodate the FPN
            self.fpn = FPN([src_channels[i] for i in self.selected_layers])
            self.selected_layers = list(
                range(len(self.selected_layers) + cfg.fpn.num_downsample))
            src_channels = [cfg.fpn.num_features] * len(self.selected_layers)

        self.prediction_layers = nn.ModuleList()
        cfg.num_heads = len(self.selected_layers)

        for idx, layer_idx in enumerate(self.selected_layers):
            # If we're sharing prediction module weights, have every module's parent be the first one
            parent = None
            if cfg.share_prediction_module and idx > 0:
                parent = self.prediction_layers[0]

            pred = PredictionModule(
                src_channels[layer_idx],
                src_channels[layer_idx],
                aspect_ratios=cfg.backbone.pred_aspect_ratios[idx],
                scales=cfg.backbone.pred_scales[idx],
                parent=parent,
                index=idx)
            self.prediction_layers.append(pred)

        # Extra parameters for the extra losses
        if cfg.use_class_existence_loss:
            # This comes from the smallest layer selected
            # Also note that cfg.num_classes includes background
            self.class_existence_fc = nn.Linear(src_channels[-1],
                                                cfg.num_classes - 1)

        if cfg.use_semantic_segmentation_loss:
            self.semantic_seg_conv = nn.Conv(src_channels[0],
                                             cfg.num_classes - 1,
                                             kernel_size=1)

        # For use in evaluation
        self.detect = Detect(cfg.num_classes,
                             bkg_label=0,
                             top_k=cfg.nms_top_k,
                             conf_thresh=cfg.nms_conf_thresh,
                             nms_thresh=cfg.nms_thresh)
コード例 #19
0
    def __init__(self, nets, extra_params):
        super().__init__()

        self.nets = nn.ModuleList(nets)
        self.extra_params = extra_params
コード例 #20
0
ファイル: backbone.py プロジェクト: li-xl/Yolact.jittor
    def __init__(self,
                 layers,
                 dcn_layers=[0, 0, 0, 0],
                 dcn_interval=1,
                 atrous_layers=[],
                 block=Bottleneck,
                 norm_layer=nn.BatchNorm):
        super().__init__()

        # These will be populated by _make_layer
        self.num_base_layers = len(layers)
        self.layers = nn.ModuleList()
        self.channels = []
        self.norm_layer = norm_layer
        self.dilation = 1
        self.atrous_layers = atrous_layers

        # From torchvision.models.resnet.Resnet
        self.inplanes = 64

        self.conv1 = nn.Conv(3,
                             64,
                             kernel_size=7,
                             stride=2,
                             padding=3,
                             bias=False)
        self.bn1 = norm_layer(64)
        self.relu = nn.ReLU()
        self.maxpool = nn.Pool(kernel_size=3,
                               stride=2,
                               padding=1,
                               op='maximum')

        self._make_layer(block,
                         64,
                         layers[0],
                         dcn_layers=dcn_layers[0],
                         dcn_interval=dcn_interval)
        self._make_layer(block,
                         128,
                         layers[1],
                         stride=2,
                         dcn_layers=dcn_layers[1],
                         dcn_interval=dcn_interval)
        self._make_layer(block,
                         256,
                         layers[2],
                         stride=2,
                         dcn_layers=dcn_layers[2],
                         dcn_interval=dcn_interval)
        self._make_layer(block,
                         512,
                         layers[3],
                         stride=2,
                         dcn_layers=dcn_layers[3],
                         dcn_interval=dcn_interval)

        # This contains every module that should be initialized by loading in pretrained weights.
        # Any extra layers added onto this that won't be initialized by init_backbone will not be
        # in this list. That way, Yolact::init_weights knows which backbone weights to initialize
        # with xavier, and which ones to leave alone.
        self.backbone_modules = [
            m for m in self.modules() if isinstance(m, nn.Conv)
        ]
コード例 #21
0
    def __init__(self, cfg, in_channels):
        """
        Arguments:
            in_channels (int): number of channels of the input feature
        """
        super(EmbedMaskHead, self).__init__()
        # TODO: Implement the sigmoid version first.
        self.fpn_strides = cfg.MODEL.EMBED_MASK.FPN_STRIDES
        self.norm_reg_targets = cfg.MODEL.EMBED_MASK.NORM_REG_TARGETS
        self.centerness_on_reg = cfg.MODEL.EMBED_MASK.CENTERNESS_ON_REG
        self.use_dcn_in_tower = cfg.MODEL.EMBED_MASK.USE_DCN_IN_TOWER

        num_classes = cfg.MODEL.EMBED_MASK.NUM_CLASSES - 1
        embed_dim = cfg.MODEL.EMBED_MASK.EMBED_DIM
        prior_margin = cfg.MODEL.EMBED_MASK.PRIOR_MARGIN
        self.init_sigma_bias = math.log(-math.log(0.5) / (prior_margin**2))

        cls_tower = []
        bbox_tower = []
        mask_tower = []
        for i in range(cfg.MODEL.FCOS.NUM_CONVS):
            if self.use_dcn_in_tower and \
                    i == cfg.MODEL.FCOS.NUM_CONVS - 1:
                #conv_func = DFConv2d
                pass
            else:
                conv_func = nn.Conv

            cls_tower.append(
                conv_func(in_channels,
                          in_channels,
                          kernel_size=3,
                          stride=1,
                          padding=1,
                          bias=True))
            cls_tower.append(nn.GroupNorm(32, in_channels))
            cls_tower.append(nn.ReLU())
            bbox_tower.append(
                conv_func(in_channels,
                          in_channels,
                          kernel_size=3,
                          stride=1,
                          padding=1,
                          bias=True))
            bbox_tower.append(nn.GroupNorm(32, in_channels))
            bbox_tower.append(nn.ReLU())
            mask_tower.append(
                conv_func(in_channels,
                          in_channels,
                          kernel_size=3,
                          stride=1,
                          padding=1,
                          bias=True))
            mask_tower.append(nn.GroupNorm(32, in_channels))
            mask_tower.append(nn.ReLU())

        setattr(self, 'cls_tower', nn.Sequential(*cls_tower))
        setattr(self, 'bbox_tower', nn.Sequential(*bbox_tower))
        self.cls_logits = nn.Conv(in_channels,
                                  num_classes,
                                  kernel_size=3,
                                  stride=1,
                                  padding=1)
        self.bbox_pred = nn.Conv(in_channels,
                                 4,
                                 kernel_size=3,
                                 stride=1,
                                 padding=1)
        self.centerness = nn.Conv(in_channels,
                                  1,
                                  kernel_size=3,
                                  stride=1,
                                  padding=1)
        # initialization
        for modules in [
                self.cls_tower, self.bbox_tower, self.cls_logits,
                self.bbox_pred, self.centerness
        ]:
            for l in modules.modules():
                if isinstance(l, nn.Conv):
                    nn.init.gauss_(l.weight, std=0.01)
                    nn.init.constant_(l.bias, 0)

        # initialize the bias for focal loss
        prior_prob = cfg.MODEL.EMBED_MASK.PRIOR_PROB
        bias_value = -math.log((1 - prior_prob) / prior_prob)
        nn.init.constant_(self.cls_logits.bias, bias_value)

        self.scales = nn.ModuleList(*[Scale(init_value=1.0) for _ in range(5)])

        ########### Mask Predictions ############
        # proposal embedding
        self.proposal_spatial_embed_pred = nn.Conv(in_channels,
                                                   2,
                                                   kernel_size=3,
                                                   stride=1,
                                                   padding=1,
                                                   bias=True)
        self.proposal_other_embed_pred = nn.Conv(in_channels,
                                                 embed_dim - 2,
                                                 kernel_size=3,
                                                 stride=1,
                                                 padding=1,
                                                 bias=True)
        for modules in [
                self.proposal_spatial_embed_pred,
                self.proposal_other_embed_pred
        ]:
            for l in modules.modules():
                if isinstance(l, nn.Conv):
                    nn.init.gauss_(l.weight, std=0.01)
                    nn.init.constant_(l.bias, 0)
        # proposal margin
        self.proposal_margin_pred = nn.Conv(in_channels,
                                            1,
                                            kernel_size=3,
                                            stride=1,
                                            padding=1,
                                            bias=True)
        nn.init.gauss_(self.proposal_margin_pred.weight, std=0.01)
        nn.init.constant_(self.proposal_margin_pred.bias, self.init_sigma_bias)

        # pixel embedding
        setattr(self, 'mask_tower', nn.Sequential(*mask_tower))
        self.pixel_spatial_embed_pred = nn.Conv(in_channels,
                                                2,
                                                kernel_size=3,
                                                stride=1,
                                                padding=1,
                                                bias=True)
        self.pixel_other_embed_pred = nn.Conv(in_channels,
                                              embed_dim - 2,
                                              kernel_size=3,
                                              stride=1,
                                              padding=1,
                                              bias=True)
        for modules in [
                self.mask_tower, self.pixel_spatial_embed_pred,
                self.pixel_other_embed_pred
        ]:
            for l in modules.modules():
                if isinstance(l, nn.Conv):
                    nn.init.gauss_(l.weight, std=0.01)
                    nn.init.constant_(l.bias, 0)

        self.position_scale = Scale(init_value=1.0)