예제 #1
0
 def __init__(self, ch_in, ch_out=128, num_classes=80, conv_num=2):
     super(HMHead, self).__init__()
     head_conv = nn.Sequential()
     for i in range(conv_num):
         name = 'conv.{}'.format(i)
         head_conv.add_sublayer(
             name,
             nn.Conv2D(in_channels=ch_in if i == 0 else ch_out,
                       out_channels=ch_out,
                       kernel_size=3,
                       padding=1,
                       weight_attr=ParamAttr(initializer=Normal(0, 0.01)),
                       bias_attr=ParamAttr(learning_rate=2.,
                                           regularizer=L2Decay(0.))))
         head_conv.add_sublayer(name + '.act', nn.ReLU())
     self.feat = self.add_sublayer('hm_feat', head_conv)
     bias_init = float(-np.log((1 - 0.01) / 0.01))
     self.head = self.add_sublayer(
         'hm_head',
         nn.Conv2D(in_channels=ch_out,
                   out_channels=num_classes,
                   kernel_size=1,
                   weight_attr=ParamAttr(initializer=Normal(0, 0.01)),
                   bias_attr=ParamAttr(learning_rate=2.,
                                       regularizer=L2Decay(0.),
                                       initializer=Constant(bias_init))))
예제 #2
0
 def __init__(self, reg_topk=4, reg_channels=64, add_mean=True):
     super(DGQP, self).__init__()
     self.reg_topk = reg_topk
     self.reg_channels = reg_channels
     self.add_mean = add_mean
     self.total_dim = reg_topk
     if add_mean:
         self.total_dim += 1
     self.reg_conv1 = self.add_sublayer(
         'dgqp_reg_conv1',
         nn.Conv2D(
             in_channels=4 * self.total_dim,
             out_channels=self.reg_channels,
             kernel_size=1,
             weight_attr=ParamAttr(initializer=Normal(
                 mean=0., std=0.01)),
             bias_attr=ParamAttr(initializer=Constant(value=0))))
     self.reg_conv2 = self.add_sublayer(
         'dgqp_reg_conv2',
         nn.Conv2D(
             in_channels=self.reg_channels,
             out_channels=1,
             kernel_size=1,
             weight_attr=ParamAttr(initializer=Normal(
                 mean=0., std=0.01)),
             bias_attr=ParamAttr(initializer=Constant(value=0))))
예제 #3
0
    def __init__(self,
                 head,
                 in_channel,
                 roi_extractor=RoIAlign().__dict__,
                 bbox_assigner='BboxAssigner',
                 with_pool=False,
                 num_classes=80,
                 bbox_weight=[10., 10., 5., 5.]):
        super(BBoxHead, self).__init__()
        self.head = head
        self.roi_extractor = roi_extractor
        if isinstance(roi_extractor, dict):
            self.roi_extractor = RoIAlign(**roi_extractor)
        self.bbox_assigner = bbox_assigner

        self.with_pool = with_pool
        self.num_classes = num_classes
        self.bbox_weight = bbox_weight

        self.bbox_score = nn.Linear(
            in_channel,
            self.num_classes + 1,
            weight_attr=paddle.ParamAttr(initializer=Normal(
                mean=0.0, std=0.01)))

        self.bbox_delta = nn.Linear(
            in_channel,
            4 * self.num_classes,
            weight_attr=paddle.ParamAttr(initializer=Normal(
                mean=0.0, std=0.001)))
        self.assigned_label = None
        self.assigned_rois = None
예제 #4
0
    def __init__(self,
                 ch_in: int,
                 ch_out: int,
                 filter_size: int = 3,
                 stride: int = 1,
                 groups: int = 1,
                 padding: int = 0,
                 act: str = 'leakly',
                 is_test: bool = False):
        super(ConvBNLayer, self).__init__()

        self.conv = nn.Conv2d(
            ch_in,
            ch_out,
            filter_size,
            padding=padding,
            stride=stride,
            groups=groups,
            weight_attr=paddle.ParamAttr(initializer=Normal(0., 0.02)),
            bias_attr=False)

        self.batch_norm = nn.BatchNorm(num_channels=ch_out,
                                       is_test=is_test,
                                       param_attr=paddle.ParamAttr(
                                           initializer=Normal(0., 0.02),
                                           regularizer=L2Decay(0.)))
        self.act = act
예제 #5
0
 def __init__(
     self,
     ch_in,
     ch_out=128,
     num_classes=80,
     conv_num=2,
     dcn_head=False,
     lite_head=False,
     norm_type='bn',
 ):
     super(HMHead, self).__init__()
     head_conv = nn.Sequential()
     for i in range(conv_num):
         name = 'conv.{}'.format(i)
         if lite_head:
             lite_name = 'hm.' + name
             head_conv.add_sublayer(
                 lite_name,
                 LiteConv(in_channels=ch_in if i == 0 else ch_out,
                          out_channels=ch_out,
                          norm_type=norm_type))
         else:
             if dcn_head:
                 head_conv.add_sublayer(
                     name,
                     DeformableConvV2(
                         in_channels=ch_in if i == 0 else ch_out,
                         out_channels=ch_out,
                         kernel_size=3,
                         weight_attr=ParamAttr(
                             initializer=Normal(0, 0.01))))
             else:
                 head_conv.add_sublayer(
                     name,
                     nn.Conv2D(
                         in_channels=ch_in if i == 0 else ch_out,
                         out_channels=ch_out,
                         kernel_size=3,
                         padding=1,
                         weight_attr=ParamAttr(initializer=Normal(0, 0.01)),
                         bias_attr=ParamAttr(learning_rate=2.,
                                             regularizer=L2Decay(0.))))
             head_conv.add_sublayer(name + '.act', nn.ReLU())
     self.feat = head_conv
     bias_init = float(-np.log((1 - 0.01) / 0.01))
     weight_attr = None if lite_head else ParamAttr(
         initializer=Normal(0, 0.01))
     self.head = nn.Conv2D(in_channels=ch_out,
                           out_channels=num_classes,
                           kernel_size=1,
                           weight_attr=weight_attr,
                           bias_attr=ParamAttr(
                               learning_rate=2.,
                               regularizer=L2Decay(0.),
                               initializer=Constant(bias_init)))
예제 #6
0
    def __init__(self,
                 ch_in,
                 ch_out=64,
                 conv_num=2,
                 dcn_head=False,
                 lite_head=False,
                 norm_type='bn'):
        super(WHHead, self).__init__()
        head_conv = nn.Sequential()
        for i in range(conv_num):
            name = 'conv.{}'.format(i)
            if lite_head:
                lite_name = 'wh.' + name
                head_conv.add_sublayer(
                    lite_name,
                    LiteConv(in_channels=ch_in if i == 0 else ch_out,
                             out_channels=ch_out,
                             norm_type=norm_type))
                head_conv.add_sublayer(lite_name + '.act', nn.ReLU6())
            else:
                if dcn_head:
                    head_conv.add_sublayer(
                        name,
                        DeformableConvV2(
                            in_channels=ch_in if i == 0 else ch_out,
                            out_channels=ch_out,
                            kernel_size=3,
                            weight_attr=ParamAttr(
                                initializer=Normal(0, 0.01))))
                else:
                    head_conv.add_sublayer(
                        name,
                        nn.Conv2D(
                            in_channels=ch_in if i == 0 else ch_out,
                            out_channels=ch_out,
                            kernel_size=3,
                            padding=1,
                            weight_attr=ParamAttr(initializer=Normal(0, 0.01)),
                            bias_attr=ParamAttr(learning_rate=2.,
                                                regularizer=L2Decay(0.))))
                head_conv.add_sublayer(name + '.act', nn.ReLU())

        self.feat = head_conv
        self.head = nn.Conv2D(
            in_channels=ch_out,
            out_channels=4,
            kernel_size=1,
            weight_attr=ParamAttr(initializer=Normal(0, 0.001)),
            bias_attr=ParamAttr(learning_rate=2., regularizer=L2Decay(0.)))
예제 #7
0
    def basic_branch(self, num_conv_out_channels, input_ch):
        # the level indexes are defined from fine to coarse,
        # the branch will contain one more part than that of its previous level
        # the sliding step is set to 1
        pyramid_conv_list = nn.LayerList()
        pyramid_fc_list = nn.LayerList()

        idx_levels = 0
        for idx_branches in range(self.num_branches):
            if idx_branches >= sum(self.num_in_each_level[0:idx_levels + 1]):
                idx_levels += 1

            pyramid_conv_list.append(
                nn.Sequential(nn.Conv2D(input_ch, num_conv_out_channels, 1),
                              nn.BatchNorm2D(num_conv_out_channels),
                              nn.ReLU()))

        idx_levels = 0
        for idx_branches in range(self.num_branches):
            if idx_branches >= sum(self.num_in_each_level[0:idx_levels + 1]):
                idx_levels += 1

            fc = nn.Linear(
                in_features=num_conv_out_channels,
                out_features=self.num_classes,
                weight_attr=ParamAttr(initializer=Normal(mean=0., std=0.001)),
                bias_attr=ParamAttr(initializer=Constant(value=0.)))
            pyramid_fc_list.append(fc)
        return pyramid_conv_list, pyramid_fc_list
예제 #8
0
    def __init__(self,
                 num_channels,
                 num_filters,
                 filter_size,
                 stride=1,
                 dilation=1,
                 groups=1,
                 act=None,
                 lr_mult=1.0,
                 name=None,
                 data_format="NCHW"):
        super(ConvBNLayer, self).__init__()
        conv_stdv = filter_size * filter_size * num_filters
        self._conv = nn.Conv2D(in_channels=num_channels,
                               out_channels=num_filters,
                               kernel_size=filter_size,
                               stride=stride,
                               padding=(filter_size - 1) // 2,
                               dilation=dilation,
                               groups=groups,
                               weight_attr=ParamAttr(learning_rate=lr_mult,
                                                     initializer=Normal(
                                                         0,
                                                         math.sqrt(
                                                             2. / conv_stdv))),
                               bias_attr=False,
                               data_format=data_format)

        self._batch_norm = nn.BatchNorm(num_filters,
                                        act=act,
                                        data_layout=data_format)
예제 #9
0
def ConvTranspose2d(in_channels,
                    out_channels,
                    kernel_size,
                    stride=1,
                    padding=0,
                    output_padding=0,
                    groups=1,
                    bias=True,
                    dilation=1,
                    weight_init=Normal(std=0.001),
                    bias_init=Constant(0.)):
    weight_attr = paddle.framework.ParamAttr(initializer=weight_init)
    if bias:
        bias_attr = paddle.framework.ParamAttr(initializer=bias_init)
    else:
        bias_attr = False
    conv = nn.Conv2DTranspose(in_channels,
                              out_channels,
                              kernel_size,
                              stride,
                              padding,
                              output_padding,
                              dilation,
                              groups,
                              weight_attr=weight_attr,
                              bias_attr=bias_attr)
    return conv
    def __init__(self,
                 bbox_feat,
                 in_feat=1024,
                 num_classes=81,
                 cls_agnostic=False,
                 roi_stages=1,
                 with_pool=False,
                 score_stage=[0, 1, 2],
                 delta_stage=[2]):
        super(BBoxHead, self).__init__()
        self.num_classes = num_classes
        self.cls_agnostic = cls_agnostic
        self.delta_dim = 2 if cls_agnostic else num_classes
        self.bbox_feat = bbox_feat
        self.roi_stages = roi_stages
        self.bbox_score_list = []
        self.bbox_delta_list = []
        self.roi_feat_list = [[] for i in range(roi_stages)]
        self.with_pool = with_pool
        self.score_stage = score_stage
        self.delta_stage = delta_stage
        for stage in range(roi_stages):
            score_name = 'bbox_score_{}'.format(stage)
            delta_name = 'bbox_delta_{}'.format(stage)
            lr_factor = 2**stage
            bbox_score = self.add_sublayer(
                score_name,
                nn.Linear(in_feat,
                          1 * self.num_classes,
                          weight_attr=ParamAttr(learning_rate=lr_factor,
                                                initializer=Normal(mean=0.0,
                                                                   std=0.01)),
                          bias_attr=ParamAttr(learning_rate=2. * lr_factor,
                                              regularizer=L2Decay(0.))))

            bbox_delta = self.add_sublayer(
                delta_name,
                nn.Linear(in_feat,
                          4 * self.delta_dim,
                          weight_attr=ParamAttr(learning_rate=lr_factor,
                                                initializer=Normal(mean=0.0,
                                                                   std=0.001)),
                          bias_attr=ParamAttr(learning_rate=2. * lr_factor,
                                              regularizer=L2Decay(0.))))
            self.bbox_score_list.append(bbox_score)
            self.bbox_delta_list.append(bbox_delta)
예제 #11
0
    def __init__(
            self,
            num_classes=1,
            num_identifiers=1,  # defined by dataset.total_identities
            anchor_levels=3,
            anchor_scales=4,
            embedding_dim=512,
            emb_loss='JDEEmbeddingLoss',
            jde_loss='JDELoss'):
        super(JDEEmbeddingHead, self).__init__()
        self.num_classes = num_classes
        self.num_identifiers = num_identifiers
        self.anchor_levels = anchor_levels
        self.anchor_scales = anchor_scales
        self.embedding_dim = embedding_dim
        self.emb_loss = emb_loss
        self.jde_loss = jde_loss

        self.emb_scale = math.sqrt(2) * math.log(
            self.num_identifiers - 1) if self.num_identifiers > 1 else 1

        self.identify_outputs = []
        self.loss_params_cls = []
        self.loss_params_reg = []
        self.loss_params_ide = []
        for i in range(self.anchor_levels):
            name = 'identify_output.{}'.format(i)
            identify_output = self.add_sublayer(
                name,
                nn.Conv2D(in_channels=64 * (2**self.anchor_levels) // (2**i),
                          out_channels=self.embedding_dim,
                          kernel_size=3,
                          stride=1,
                          padding=1,
                          weight_attr=ParamAttr(name=name + '.conv.weights'),
                          bias_attr=ParamAttr(name=name + '.conv.bias',
                                              regularizer=L2Decay(0.))))
            self.identify_outputs.append(identify_output)

            loss_p_cls = self.add_sublayer('cls.{}'.format(i),
                                           LossParam(-4.15))
            self.loss_params_cls.append(loss_p_cls)
            loss_p_reg = self.add_sublayer('reg.{}'.format(i),
                                           LossParam(-4.85))
            self.loss_params_reg.append(loss_p_reg)
            loss_p_ide = self.add_sublayer('ide.{}'.format(i), LossParam(-2.3))
            self.loss_params_ide.append(loss_p_ide)

        self.classifier = self.add_sublayer(
            'classifier',
            nn.Linear(self.embedding_dim,
                      self.num_identifiers,
                      weight_attr=ParamAttr(learning_rate=1.,
                                            initializer=Normal(mean=0.0,
                                                               std=0.01)),
                      bias_attr=ParamAttr(learning_rate=2.,
                                          regularizer=L2Decay(0.))))
 def __init__(self, feat_in=1024, feat_out=1024):
     super(RPNFeat, self).__init__()
     # rpn feat is shared with each level
     self.rpn_conv = nn.Conv2D(in_channels=feat_in,
                               out_channels=feat_out,
                               kernel_size=3,
                               padding=1,
                               weight_attr=paddle.ParamAttr(
                                   initializer=Normal(mean=0., std=0.01)))
예제 #13
0
    def __init__(self,
                 anchor_generator=AnchorGenerator().__dict__,
                 rpn_target_assign=RPNTargetAssign().__dict__,
                 train_proposal=ProposalGenerator(12000, 2000).__dict__,
                 test_proposal=ProposalGenerator().__dict__,
                 in_channel=1024,
                 export_onnx=False):
        super(RPNHead, self).__init__()
        self.anchor_generator = anchor_generator
        self.rpn_target_assign = rpn_target_assign
        self.train_proposal = train_proposal
        self.test_proposal = test_proposal
        self.export_onnx = export_onnx
        if isinstance(anchor_generator, dict):
            self.anchor_generator = AnchorGenerator(**anchor_generator)
        if isinstance(rpn_target_assign, dict):
            self.rpn_target_assign = RPNTargetAssign(**rpn_target_assign)
        if isinstance(train_proposal, dict):
            self.train_proposal = ProposalGenerator(**train_proposal)
        if isinstance(test_proposal, dict):
            self.test_proposal = ProposalGenerator(**test_proposal)

        num_anchors = self.anchor_generator.num_anchors
        self.rpn_feat = RPNFeat(in_channel, in_channel)
        # rpn head is shared with each level
        # rpn roi classification scores
        self.rpn_rois_score = nn.Conv2D(
            in_channels=in_channel,
            out_channels=num_anchors,
            kernel_size=1,
            padding=0,
            weight_attr=paddle.ParamAttr(
                initializer=Normal(mean=0., std=0.01)))
        self.rpn_rois_score.skip_quant = True

        # rpn roi bbox regression deltas
        self.rpn_rois_delta = nn.Conv2D(
            in_channels=in_channel,
            out_channels=4 * num_anchors,
            kernel_size=1,
            padding=0,
            weight_attr=paddle.ParamAttr(
                initializer=Normal(mean=0., std=0.01)))
        self.rpn_rois_delta.skip_quant = True
예제 #14
0
    def __init__(self,
                 head,
                 in_channel,
                 roi_extractor=RoIAlign().__dict__,
                 bbox_assigner='BboxAssigner',
                 num_classes=80,
                 bbox_weight=[[10., 10., 5., 5.], [20.0, 20.0, 10.0, 10.0],
                              [30.0, 30.0, 15.0, 15.0]],
                 num_cascade_stages=3,
                 bbox_loss=None):
        nn.Layer.__init__(self, )
        self.head = head
        self.roi_extractor = roi_extractor
        if isinstance(roi_extractor, dict):
            self.roi_extractor = RoIAlign(**roi_extractor)
        self.bbox_assigner = bbox_assigner

        self.num_classes = num_classes
        self.bbox_weight = bbox_weight
        self.num_cascade_stages = num_cascade_stages
        self.bbox_loss = bbox_loss

        self.bbox_score_list = []
        self.bbox_delta_list = []
        for i in range(num_cascade_stages):
            score_name = 'bbox_score_stage{}'.format(i)
            delta_name = 'bbox_delta_stage{}'.format(i)
            bbox_score = self.add_sublayer(
                score_name,
                nn.Linear(in_channel,
                          self.num_classes + 1,
                          weight_attr=paddle.ParamAttr(
                              initializer=Normal(mean=0.0, std=0.01))))

            bbox_delta = self.add_sublayer(
                delta_name,
                nn.Linear(in_channel,
                          4,
                          weight_attr=paddle.ParamAttr(
                              initializer=Normal(mean=0.0, std=0.001))))
            self.bbox_score_list.append(bbox_score)
            self.bbox_delta_list.append(bbox_delta)
        self.assigned_label = None
        self.assigned_rois = None
예제 #15
0
 def __init__(self, feat_in=1024, feat_out=1024):
     super(RPNFeat, self).__init__()
     # rpn feat is shared with each level
     self.rpn_conv = Conv2D(
         in_channels=feat_in,
         out_channels=feat_out,
         kernel_size=3,
         padding=1,
         weight_attr=ParamAttr(initializer=Normal(mean=0., std=0.01)),
         bias_attr=ParamAttr(learning_rate=2., regularizer=L2Decay(0.)))
예제 #16
0
 def __init__(self, in_channel=1024, out_channel=1024):
     super(RPNFeat, self).__init__()
     # rpn feat is shared with each level
     self.rpn_conv = nn.Conv2D(in_channels=in_channel,
                               out_channels=out_channel,
                               kernel_size=3,
                               padding=1,
                               weight_attr=paddle.ParamAttr(
                                   initializer=Normal(mean=0., std=0.01)))
     self.rpn_conv.skip_quant = True
예제 #17
0
    def __init__(self,
                 ch_in,
                 ch_out,
                 filter_size,
                 stride=1,
                 groups=1,
                 norm_type=None,
                 norm_groups=32,
                 norm_decay=0.,
                 freeze_norm=False,
                 act=None):
        super(ConvNormLayer, self).__init__()
        self.act = act
        norm_lr = 0. if freeze_norm else 1.
        if norm_type is not None:
            assert norm_type in ['bn', 'sync_bn', 'gn'], \
                "norm_type should be one of ['bn', 'sync_bn', 'gn'], but got {}".format(norm_type)
            param_attr = ParamAttr(
                initializer=Constant(1.0),
                learning_rate=norm_lr,
                regularizer=L2Decay(norm_decay),
            )
            bias_attr = ParamAttr(learning_rate=norm_lr,
                                  regularizer=L2Decay(norm_decay))
            global_stats = True if freeze_norm else None
            if norm_type in ['bn', 'sync_bn']:
                self.norm = nn.BatchNorm2D(
                    ch_out,
                    weight_attr=param_attr,
                    bias_attr=bias_attr,
                    use_global_stats=global_stats,
                )
            elif norm_type == 'gn':
                self.norm = nn.GroupNorm(num_groups=norm_groups,
                                         num_channels=ch_out,
                                         weight_attr=param_attr,
                                         bias_attr=bias_attr)
            norm_params = self.norm.parameters()
            if freeze_norm:
                for param in norm_params:
                    param.stop_gradient = True
            conv_bias_attr = False
        else:
            conv_bias_attr = True
            self.norm = None

        self.conv = nn.Conv2D(
            in_channels=ch_in,
            out_channels=ch_out,
            kernel_size=filter_size,
            stride=stride,
            padding=(filter_size - 1) // 2,
            groups=groups,
            weight_attr=ParamAttr(initializer=Normal(mean=0., std=0.001)),
            bias_attr=conv_bias_attr)
예제 #18
0
    def __init__(self,
                 ch_in,
                 ch_out,
                 filter_size,
                 stride=1,
                 norm_type='bn',
                 norm_groups=32,
                 use_dcn=False,
                 norm_decay=0.,
                 freeze_norm=False,
                 act=None,
                 name=None):
        super(ConvNormLayer, self).__init__()
        assert norm_type in ['bn', 'sync_bn', 'gn']

        self.act = act
        self.conv = nn.Conv2D(in_channels=ch_in,
                              out_channels=ch_out,
                              kernel_size=filter_size,
                              stride=stride,
                              padding=(filter_size - 1) // 2,
                              groups=1,
                              weight_attr=ParamAttr(name=name + "_weights",
                                                    initializer=Normal(
                                                        mean=0., std=0.01)),
                              bias_attr=False)

        norm_lr = 0. if freeze_norm else 1.

        norm_name = name + '_bn'
        param_attr = ParamAttr(name=norm_name + "_scale",
                               learning_rate=norm_lr,
                               regularizer=L2Decay(norm_decay))
        bias_attr = ParamAttr(name=norm_name + "_offset",
                              learning_rate=norm_lr,
                              regularizer=L2Decay(norm_decay))
        global_stats = True if freeze_norm else False
        if norm_type in ['bn', 'sync_bn']:
            self.norm = nn.BatchNorm(ch_out,
                                     param_attr=param_attr,
                                     bias_attr=bias_attr,
                                     use_global_stats=global_stats,
                                     moving_mean_name=norm_name + '_mean',
                                     moving_variance_name=norm_name +
                                     '_variance')
        elif norm_type == 'gn':
            self.norm = nn.GroupNorm(num_groups=norm_groups,
                                     num_channels=ch_out,
                                     weight_attr=param_attr,
                                     bias_attr=bias_attr)
        norm_params = self.norm.parameters()
        if freeze_norm:
            for param in norm_params:
                param.stop_gradient = True
예제 #19
0
 def __init__(self, in_channels, out_channels, kernel_size=3, groups=1):
     super(AlignConv, self).__init__()
     self.kernel_size = kernel_size
     self.align_conv = paddle.vision.ops.DeformConv2D(
         in_channels,
         out_channels,
         kernel_size=self.kernel_size,
         padding=(self.kernel_size - 1) // 2,
         groups=groups,
         weight_attr=ParamAttr(initializer=Normal(0, 0.01)),
         bias_attr=None)
예제 #20
0
    def __init__(self,
                 num_classes=80,
                 conv_feat='RetinaFeat',
                 anchor_generator='RetinaAnchorGenerator',
                 bbox_assigner='MaxIoUAssigner',
                 loss_class='FocalLoss',
                 loss_bbox='SmoothL1Loss',
                 nms='MultiClassNMS',
                 prior_prob=0.01,
                 nms_pre=1000,
                 weights=[1., 1., 1., 1.]):
        super(RetinaHead, self).__init__()
        self.num_classes = num_classes
        self.conv_feat = conv_feat
        self.anchor_generator = anchor_generator
        self.bbox_assigner = bbox_assigner
        self.loss_class = loss_class
        self.loss_bbox = loss_bbox
        self.nms = nms
        self.nms_pre = nms_pre
        self.weights = weights

        bias_init_value = -math.log((1 - prior_prob) / prior_prob)
        num_anchors = self.anchor_generator.num_anchors
        self.retina_cls = nn.Conv2D(
            in_channels=self.conv_feat.feat_out,
            out_channels=self.num_classes * num_anchors,
            kernel_size=3,
            stride=1,
            padding=1,
            weight_attr=ParamAttr(initializer=Normal(mean=0.0, std=0.01)),
            bias_attr=ParamAttr(initializer=Constant(value=bias_init_value)))
        self.retina_reg = nn.Conv2D(
            in_channels=self.conv_feat.feat_out,
            out_channels=4 * num_anchors,
            kernel_size=3,
            stride=1,
            padding=1,
            weight_attr=ParamAttr(initializer=Normal(mean=0.0, std=0.01)),
            bias_attr=ParamAttr(initializer=Constant(value=0)))
예제 #21
0
    def __init__(self, rpn_feat, anchor_per_position=15, rpn_channel=1024):
        super(RPNHead, self).__init__()
        self.rpn_feat = rpn_feat
        if isinstance(rpn_feat, dict):
            self.rpn_feat = RPNFeat(**rpn_feat)
        # rpn head is shared with each level
        # rpn roi classification scores
        self.rpn_rois_score = Conv2D(
            in_channels=rpn_channel,
            out_channels=anchor_per_position,
            kernel_size=1,
            padding=0,
            weight_attr=ParamAttr(initializer=Normal(mean=0., std=0.01)),
            bias_attr=ParamAttr(learning_rate=2., regularizer=L2Decay(0.)))

        # rpn roi bbox regression deltas
        self.rpn_rois_delta = Conv2D(
            in_channels=rpn_channel,
            out_channels=4 * anchor_per_position,
            kernel_size=1,
            padding=0,
            weight_attr=ParamAttr(initializer=Normal(mean=0., std=0.01)),
            bias_attr=ParamAttr(learning_rate=2., regularizer=L2Decay(0.)))
예제 #22
0
    def __init__(self,
                 ch_in,
                 ch_out,
                 filter_size,
                 stride,
                 norm_type='bn',
                 norm_groups=32,
                 use_dcn=False,
                 norm_name=None,
                 bias_on=False,
                 lr_scale=1.,
                 name=None):
        super(ConvNormLayer, self).__init__()
        assert norm_type in ['bn', 'sync_bn', 'gn']

        if bias_on:
            bias_attr = ParamAttr(name=name + "_bias",
                                  initializer=Constant(value=0.),
                                  learning_rate=lr_scale)
        else:
            bias_attr = False

        self.conv = nn.Conv2D(in_channels=ch_in,
                              out_channels=ch_out,
                              kernel_size=filter_size,
                              stride=stride,
                              padding=(filter_size - 1) // 2,
                              groups=1,
                              weight_attr=ParamAttr(name=name + "_weight",
                                                    initializer=Normal(
                                                        mean=0., std=0.01),
                                                    learning_rate=1.),
                              bias_attr=bias_attr)

        param_attr = ParamAttr(name=norm_name + "_scale",
                               learning_rate=1.,
                               regularizer=L2Decay(0.))
        bias_attr = ParamAttr(name=norm_name + "_offset",
                              learning_rate=1.,
                              regularizer=L2Decay(0.))
        if norm_type in ['bn', 'sync_bn']:
            self.norm = nn.BatchNorm2D(ch_out,
                                       weight_attr=param_attr,
                                       bias_attr=bias_attr)
        elif norm_type == 'gn':
            self.norm = nn.GroupNorm(num_groups=norm_groups,
                                     num_channels=ch_out,
                                     weight_attr=param_attr,
                                     bias_attr=bias_attr)
예제 #23
0
 def __init__(self, ch_in, ch_out=64, conv_num=2):
     super(WHHead, self).__init__()
     head_conv = nn.Sequential()
     for i in range(conv_num):
         name = 'conv.{}'.format(i)
         head_conv.add_sublayer(
             name,
             nn.Conv2D(in_channels=ch_in if i == 0 else ch_out,
                       out_channels=ch_out,
                       kernel_size=3,
                       padding=1,
                       weight_attr=ParamAttr(initializer=Normal(0, 0.001)),
                       bias_attr=ParamAttr(learning_rate=2.,
                                           regularizer=L2Decay(0.))))
         head_conv.add_sublayer(name + '.act', nn.ReLU())
     self.feat = self.add_sublayer('wh_feat', head_conv)
     self.head = self.add_sublayer(
         'wh_head',
         nn.Conv2D(in_channels=ch_out,
                   out_channels=4,
                   kernel_size=1,
                   weight_attr=ParamAttr(initializer=Normal(0, 0.001)),
                   bias_attr=ParamAttr(learning_rate=2.,
                                       regularizer=L2Decay(0.))))
예제 #24
0
    def initialize_parameters(self):
        Normal(std=0.02)(self.token_embedding.weight)
        Normal(std=0.01)(self.positional_embedding)

        if isinstance(self.visual, ModifiedResNet):
            if self.visual.attnpool is not None:
                std = self.embed_dim ** -0.5
                normal_ = Normal(std=std)
                normal_(self.visual.attnpool.attn.q_proj.weight)
                normal_(self.visual.attnpool.attn.k_proj.weight)
                normal_(self.visual.attnpool.attn.v_proj.weight)
                normal_(self.visual.attnpool.attn.out_proj.weight)

            for resnet_block in [
                self.visual.layer1,
                self.visual.layer2,
                self.visual.layer3,
                self.visual.layer4,
            ]:
                for name, param in resnet_block.named_parameters():
                    if name.endswith("bn3.weight"):
                        Constant(value=0.0)(param)

        proj_std = (self.transformer.width ** -0.5) * (
            (2 * self.transformer.layers) ** -0.5
        )
        attn_std = self.transformer.width ** -0.5
        fc_std = (2 * self.transformer.width) ** -0.5

        for resblock in self.transformer.resblocks:
            normal_ = Normal(std=attn_std)
            normal_(resblock.attn.q_proj.weight)
            normal_(resblock.attn.k_proj.weight)
            normal_(resblock.attn.v_proj.weight)
            Normal(std=proj_std)(resblock.attn.out_proj.weight)
            Normal(std=fc_std)(resblock.mlp.c_fc.weight)
            Normal(std=proj_std)(resblock.mlp.c_proj.weight)

        if self.text_projection is not None:
            Normal(std=self.transformer.width ** -0.5)(self.text_projection)
예제 #25
0
 def __init__(self, layer_num, ch_out, name=None):
     super(ShortCut, self).__init__()
     shortcut_conv = Sequential()
     ch_in = ch_out * 2
     for i in range(layer_num):
         fan_out = 3 * 3 * ch_out
         std = math.sqrt(2. / fan_out)
         in_channels = ch_in if i == 0 else ch_out
         shortcut_name = name + '.conv.{}'.format(i)
         shortcut_conv.add_sublayer(
             shortcut_name,
             Conv2D(in_channels=in_channels,
                    out_channels=ch_out,
                    kernel_size=3,
                    padding=1,
                    weight_attr=ParamAttr(initializer=Normal(0, std)),
                    bias_attr=ParamAttr(learning_rate=2.,
                                        regularizer=L2Decay(0.))))
         if i < layer_num - 1:
             shortcut_conv.add_sublayer(shortcut_name + '.act', ReLU())
     self.shortcut = self.add_sublayer('short', shortcut_conv)
예제 #26
0
 def __init__(self,
              num_channels,
              num_filters,
              filter_size,
              stride=1,
              dilation=1,
              groups=1,
              act=None,
              lr_mult=1.0,
              name=None,
              data_format="NCHW"):
     super(ConvBNLayer, self).__init__()
     conv_stdv = filter_size * filter_size * num_filters
     self._conv = nn.Conv2D(in_channels=num_channels,
                            out_channels=num_filters,
                            kernel_size=filter_size,
                            stride=stride,
                            padding=(filter_size - 1) // 2,
                            dilation=dilation,
                            groups=groups,
                            weight_attr=ParamAttr(name=name + "_weights",
                                                  learning_rate=lr_mult,
                                                  initializer=Normal(
                                                      0,
                                                      math.sqrt(
                                                          2. / conv_stdv))),
                            bias_attr=False,
                            data_format=data_format)
     if name == "conv1":
         bn_name = "bn_" + name
     else:
         bn_name = "bn" + name[3:]
     self._batch_norm = nn.BatchNorm(
         num_filters,
         act=act,
         param_attr=ParamAttr(name=bn_name + "_scale"),
         bias_attr=ParamAttr(bn_name + "_offset"),
         moving_mean_name=bn_name + "_mean",
         moving_variance_name=bn_name + "_variance",
         data_layout=data_format)
예제 #27
0
 def __init__(self,
              layer_num,
              ch_in,
              ch_out,
              norm_type='bn',
              lite_neck=False,
              name=None):
     super(ShortCut, self).__init__()
     shortcut_conv = nn.Sequential()
     for i in range(layer_num):
         fan_out = 3 * 3 * ch_out
         std = math.sqrt(2. / fan_out)
         in_channels = ch_in if i == 0 else ch_out
         shortcut_name = name + '.conv.{}'.format(i)
         if lite_neck:
             shortcut_conv.add_sublayer(
                 shortcut_name,
                 LiteConv(in_channels=in_channels,
                          out_channels=ch_out,
                          with_act=i < layer_num - 1,
                          norm_type=norm_type))
         else:
             shortcut_conv.add_sublayer(
                 shortcut_name,
                 nn.Conv2D(
                     in_channels=in_channels,
                     out_channels=ch_out,
                     kernel_size=3,
                     padding=1,
                     weight_attr=ParamAttr(initializer=Normal(0, std)),
                     bias_attr=ParamAttr(learning_rate=2.,
                                         regularizer=L2Decay(0.))))
             if i < layer_num - 1:
                 shortcut_conv.add_sublayer(shortcut_name + '.act',
                                            nn.ReLU())
     self.shortcut = self.add_sublayer('shortcut', shortcut_conv)
예제 #28
0
    def __init__(
        self,
        levels,
        channels,
        output_stride=32,
        in_chans=3,
        cardinality=1,
        base_width=64,
        block=DlaBottleneck,
        residual_root=False,
        drop_rate=0.0,
        global_pool="avg",
        class_dim=1000,
        with_pool=True,
    ):
        super(DLA, self).__init__()
        self.channels = channels
        self.class_dim = class_dim
        self.with_pool = with_pool
        self.cardinality = cardinality
        self.base_width = base_width
        self.drop_rate = drop_rate
        assert output_stride == 32  # FIXME support dilation

        self.base_layer = nn.Sequential(
            nn.Conv2D(
                in_chans,
                channels[0],
                kernel_size=7,
                stride=1,
                padding=3,
                bias_attr=False,
            ),
            nn.BatchNorm2D(channels[0]),
            nn.ReLU(),
        )
        self.level0 = self._make_conv_level(channels[0], channels[0],
                                            levels[0])
        self.level1 = self._make_conv_level(channels[0],
                                            channels[1],
                                            levels[1],
                                            stride=2)
        cargs = dict(cardinality=cardinality,
                     base_width=base_width,
                     root_residual=residual_root)
        self.level2 = DlaTree(levels[2],
                              block,
                              channels[1],
                              channels[2],
                              2,
                              level_root=False,
                              **cargs)
        self.level3 = DlaTree(levels[3],
                              block,
                              channels[2],
                              channels[3],
                              2,
                              level_root=True,
                              **cargs)
        self.level4 = DlaTree(levels[4],
                              block,
                              channels[3],
                              channels[4],
                              2,
                              level_root=True,
                              **cargs)
        self.level5 = DlaTree(levels[5],
                              block,
                              channels[4],
                              channels[5],
                              2,
                              level_root=True,
                              **cargs)
        self.feature_info = [
            # rare to have a meaningful stride 1 level
            dict(num_chs=channels[0], reduction=1, module="level0"),
            dict(num_chs=channels[1], reduction=2, module="level1"),
            dict(num_chs=channels[2], reduction=4, module="level2"),
            dict(num_chs=channels[3], reduction=8, module="level3"),
            dict(num_chs=channels[4], reduction=16, module="level4"),
            dict(num_chs=channels[5], reduction=32, module="level5"),
        ]

        self.num_features = channels[-1]

        if with_pool:
            self.global_pool = nn.AdaptiveAvgPool2D(1)

        if class_dim > 0:
            self.fc = nn.Conv2D(self.num_features, class_dim, 1)

        for m in self.sublayers():
            if isinstance(m, nn.Conv2D):
                n = m._kernel_size[0] * m._kernel_size[1] * m._out_channels
                normal_ = Normal(mean=0.0, std=math.sqrt(2.0 / n))
                normal_(m.weight)
            elif isinstance(m, nn.BatchNorm2D):
                ones_(m.weight)
                zeros_(m.bias)
예제 #29
0
    def __init__(self,
                 stacked_convs=2,
                 feat_in=256,
                 feat_out=256,
                 num_classes=15,
                 anchor_strides=[8, 16, 32, 64, 128],
                 anchor_scales=[4],
                 anchor_ratios=[1.0],
                 target_means=0.0,
                 target_stds=1.0,
                 align_conv_type='AlignConv',
                 align_conv_size=3,
                 use_sigmoid_cls=True,
                 anchor_assign=RBoxAssigner().__dict__,
                 reg_loss_weight=[1.0, 1.0, 1.0, 1.0, 1.1],
                 cls_loss_weight=[1.1, 1.05],
                 reg_loss_type='l1',
                 is_training=True):
        super(S2ANetHead, self).__init__()
        self.stacked_convs = stacked_convs
        self.feat_in = feat_in
        self.feat_out = feat_out
        self.anchor_list = None
        self.anchor_scales = anchor_scales
        self.anchor_ratios = anchor_ratios
        self.anchor_strides = anchor_strides
        self.anchor_strides = paddle.to_tensor(anchor_strides)
        self.anchor_base_sizes = list(anchor_strides)
        self.means = paddle.ones(shape=[5]) * target_means
        self.stds = paddle.ones(shape=[5]) * target_stds
        assert align_conv_type in ['AlignConv', 'Conv', 'DCN']
        self.align_conv_type = align_conv_type
        self.align_conv_size = align_conv_size

        self.use_sigmoid_cls = use_sigmoid_cls
        self.cls_out_channels = num_classes if self.use_sigmoid_cls else 1
        self.sampling = False
        self.anchor_assign = anchor_assign
        self.reg_loss_weight = reg_loss_weight
        self.cls_loss_weight = cls_loss_weight
        self.alpha = 1.0
        self.beta = 1.0
        self.reg_loss_type = reg_loss_type
        self.is_training = is_training

        self.s2anet_head_out = None

        # anchor
        self.anchor_generators = []
        for anchor_base in self.anchor_base_sizes:
            self.anchor_generators.append(
                S2ANetAnchorGenerator(anchor_base, anchor_scales,
                                      anchor_ratios))

        self.anchor_generators = nn.LayerList(self.anchor_generators)
        self.fam_cls_convs = nn.Sequential()
        self.fam_reg_convs = nn.Sequential()

        for i in range(self.stacked_convs):
            chan_in = self.feat_in if i == 0 else self.feat_out

            self.fam_cls_convs.add_sublayer(
                'fam_cls_conv_{}'.format(i),
                nn.Conv2D(
                    in_channels=chan_in,
                    out_channels=self.feat_out,
                    kernel_size=3,
                    padding=1,
                    weight_attr=ParamAttr(initializer=Normal(0.0, 0.01)),
                    bias_attr=ParamAttr(initializer=Constant(0))))

            self.fam_cls_convs.add_sublayer('fam_cls_conv_{}_act'.format(i),
                                            nn.ReLU())

            self.fam_reg_convs.add_sublayer(
                'fam_reg_conv_{}'.format(i),
                nn.Conv2D(
                    in_channels=chan_in,
                    out_channels=self.feat_out,
                    kernel_size=3,
                    padding=1,
                    weight_attr=ParamAttr(initializer=Normal(0.0, 0.01)),
                    bias_attr=ParamAttr(initializer=Constant(0))))

            self.fam_reg_convs.add_sublayer('fam_reg_conv_{}_act'.format(i),
                                            nn.ReLU())

        self.fam_reg = nn.Conv2D(
            self.feat_out,
            5,
            1,
            weight_attr=ParamAttr(initializer=Normal(0.0, 0.01)),
            bias_attr=ParamAttr(initializer=Constant(0)))
        prior_prob = 0.01
        bias_init = float(-np.log((1 - prior_prob) / prior_prob))
        self.fam_cls = nn.Conv2D(
            self.feat_out,
            self.cls_out_channels,
            1,
            weight_attr=ParamAttr(initializer=Normal(0.0, 0.01)),
            bias_attr=ParamAttr(initializer=Constant(bias_init)))

        if self.align_conv_type == "AlignConv":
            self.align_conv = AlignConv(self.feat_out, self.feat_out,
                                        self.align_conv_size)
        elif self.align_conv_type == "Conv":
            self.align_conv = nn.Conv2D(
                self.feat_out,
                self.feat_out,
                self.align_conv_size,
                padding=(self.align_conv_size - 1) // 2,
                bias_attr=ParamAttr(initializer=Constant(0)))

        elif self.align_conv_type == "DCN":
            self.align_conv_offset = nn.Conv2D(
                self.feat_out,
                2 * self.align_conv_size**2,
                1,
                weight_attr=ParamAttr(initializer=Normal(0.0, 0.01)),
                bias_attr=ParamAttr(initializer=Constant(0)))

            self.align_conv = paddle.vision.ops.DeformConv2D(
                self.feat_out,
                self.feat_out,
                self.align_conv_size,
                padding=(self.align_conv_size - 1) // 2,
                weight_attr=ParamAttr(initializer=Normal(0.0, 0.01)),
                bias_attr=False)

        self.or_conv = nn.Conv2D(
            self.feat_out,
            self.feat_out,
            kernel_size=3,
            padding=1,
            weight_attr=ParamAttr(initializer=Normal(0.0, 0.01)),
            bias_attr=ParamAttr(initializer=Constant(0)))

        # ODM
        self.odm_cls_convs = nn.Sequential()
        self.odm_reg_convs = nn.Sequential()

        for i in range(self.stacked_convs):
            ch_in = self.feat_out
            # ch_in = int(self.feat_out / 8) if i == 0 else self.feat_out

            self.odm_cls_convs.add_sublayer(
                'odm_cls_conv_{}'.format(i),
                nn.Conv2D(
                    in_channels=ch_in,
                    out_channels=self.feat_out,
                    kernel_size=3,
                    stride=1,
                    padding=1,
                    weight_attr=ParamAttr(initializer=Normal(0.0, 0.01)),
                    bias_attr=ParamAttr(initializer=Constant(0))))

            self.odm_cls_convs.add_sublayer('odm_cls_conv_{}_act'.format(i),
                                            nn.ReLU())

            self.odm_reg_convs.add_sublayer(
                'odm_reg_conv_{}'.format(i),
                nn.Conv2D(
                    in_channels=self.feat_out,
                    out_channels=self.feat_out,
                    kernel_size=3,
                    stride=1,
                    padding=1,
                    weight_attr=ParamAttr(initializer=Normal(0.0, 0.01)),
                    bias_attr=ParamAttr(initializer=Constant(0))))

            self.odm_reg_convs.add_sublayer('odm_reg_conv_{}_act'.format(i),
                                            nn.ReLU())

        self.odm_cls = nn.Conv2D(
            self.feat_out,
            self.cls_out_channels,
            3,
            padding=1,
            weight_attr=ParamAttr(initializer=Normal(0.0, 0.01)),
            bias_attr=ParamAttr(initializer=Constant(bias_init)))
        self.odm_reg = nn.Conv2D(
            self.feat_out,
            5,
            3,
            padding=1,
            weight_attr=ParamAttr(initializer=Normal(0.0, 0.01)),
            bias_attr=ParamAttr(initializer=Constant(0)))

        self.featmap_sizes = []
        self.base_anchors_list = []
        self.refine_anchor_list = []
예제 #30
0
    def __init__(self,
                 ch_in: int = 3,
                 class_num: int = 20,
                 ignore_thresh: float = 0.7,
                 valid_thresh: float = 0.005,
                 nms_topk: int = 400,
                 nms_posk: int = 100,
                 nms_thresh: float = 0.45,
                 is_train: bool = True,
                 load_checkpoint: str = None):
        super(YOLOv3, self).__init__()

        self.is_train = is_train
        self.block = DarkNet53_conv_body(ch_in=ch_in,
                                         is_test=not self.is_train)
        self.block_outputs = []
        self.yolo_blocks = []
        self.route_blocks_2 = []
        self.anchor_masks = [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
        self.anchors = [
            10, 13, 16, 30, 33, 23, 30, 61, 62, 45, 59, 119, 116, 90, 156, 198,
            373, 326
        ]
        self.class_num = class_num
        self.ignore_thresh = ignore_thresh
        self.valid_thresh = valid_thresh
        self.nms_topk = nms_topk
        self.nms_posk = nms_posk
        self.nms_thresh = nms_thresh
        ch_in_list = [1024, 768, 384]

        for i in range(3):
            yolo_block = self.add_sublayer(
                "yolo_detecton_block_%d" % (i),
                YoloDetectionBlock(ch_in_list[i],
                                   channel=512 // (2**i),
                                   is_test=not self.is_train))
            self.yolo_blocks.append(yolo_block)

            num_filters = len(self.anchor_masks[i]) * (self.class_num + 5)
            block_out = self.add_sublayer(
                "block_out_%d" % (i),
                nn.Conv2d(
                    1024 // (2**i),
                    num_filters,
                    1,
                    stride=1,
                    padding=0,
                    weight_attr=paddle.ParamAttr(initializer=Normal(0., 0.02)),
                    bias_attr=paddle.ParamAttr(initializer=Constant(0.0),
                                               regularizer=L2Decay(0.))))
            self.block_outputs.append(block_out)

            if i < 2:
                route = self.add_sublayer(
                    "route2_%d" % i,
                    ConvBNLayer(ch_in=512 // (2**i),
                                ch_out=256 // (2**i),
                                filter_size=1,
                                stride=1,
                                padding=0,
                                is_test=(not self.is_train)))
                self.route_blocks_2.append(route)
            self.upsample = Upsample()

        if load_checkpoint is not None:
            model_dict = paddle.load(load_checkpoint)[0]
            self.set_dict(model_dict)
            print("load custom checkpoint success")

        else:
            checkpoint = os.path.join(self.directory,
                                      'yolov3_darknet53_voc.pdparams')
            if not os.path.exists(checkpoint):
                os.system(
                    'wget https://paddlehub.bj.bcebos.com/dygraph/detection/yolov3_darknet53_voc.pdparams -O ' \
                    + checkpoint)
            model_dict = paddle.load(checkpoint)[0]
            self.set_dict(model_dict)
            print("load pretrained checkpoint success")