def __init__(self, ch_in, ch_out=128, num_classes=80, conv_num=2): super(HMHead, self).__init__() head_conv = nn.Sequential() for i in range(conv_num): name = 'conv.{}'.format(i) head_conv.add_sublayer( name, nn.Conv2D(in_channels=ch_in if i == 0 else ch_out, out_channels=ch_out, kernel_size=3, padding=1, weight_attr=ParamAttr(initializer=Normal(0, 0.01)), bias_attr=ParamAttr(learning_rate=2., regularizer=L2Decay(0.)))) head_conv.add_sublayer(name + '.act', nn.ReLU()) self.feat = self.add_sublayer('hm_feat', head_conv) bias_init = float(-np.log((1 - 0.01) / 0.01)) self.head = self.add_sublayer( 'hm_head', nn.Conv2D(in_channels=ch_out, out_channels=num_classes, kernel_size=1, weight_attr=ParamAttr(initializer=Normal(0, 0.01)), bias_attr=ParamAttr(learning_rate=2., regularizer=L2Decay(0.), initializer=Constant(bias_init))))
def __init__(self, reg_topk=4, reg_channels=64, add_mean=True): super(DGQP, self).__init__() self.reg_topk = reg_topk self.reg_channels = reg_channels self.add_mean = add_mean self.total_dim = reg_topk if add_mean: self.total_dim += 1 self.reg_conv1 = self.add_sublayer( 'dgqp_reg_conv1', nn.Conv2D( in_channels=4 * self.total_dim, out_channels=self.reg_channels, kernel_size=1, weight_attr=ParamAttr(initializer=Normal( mean=0., std=0.01)), bias_attr=ParamAttr(initializer=Constant(value=0)))) self.reg_conv2 = self.add_sublayer( 'dgqp_reg_conv2', nn.Conv2D( in_channels=self.reg_channels, out_channels=1, kernel_size=1, weight_attr=ParamAttr(initializer=Normal( mean=0., std=0.01)), bias_attr=ParamAttr(initializer=Constant(value=0))))
def __init__(self, head, in_channel, roi_extractor=RoIAlign().__dict__, bbox_assigner='BboxAssigner', with_pool=False, num_classes=80, bbox_weight=[10., 10., 5., 5.]): super(BBoxHead, self).__init__() self.head = head self.roi_extractor = roi_extractor if isinstance(roi_extractor, dict): self.roi_extractor = RoIAlign(**roi_extractor) self.bbox_assigner = bbox_assigner self.with_pool = with_pool self.num_classes = num_classes self.bbox_weight = bbox_weight self.bbox_score = nn.Linear( in_channel, self.num_classes + 1, weight_attr=paddle.ParamAttr(initializer=Normal( mean=0.0, std=0.01))) self.bbox_delta = nn.Linear( in_channel, 4 * self.num_classes, weight_attr=paddle.ParamAttr(initializer=Normal( mean=0.0, std=0.001))) self.assigned_label = None self.assigned_rois = None
def __init__(self, ch_in: int, ch_out: int, filter_size: int = 3, stride: int = 1, groups: int = 1, padding: int = 0, act: str = 'leakly', is_test: bool = False): super(ConvBNLayer, self).__init__() self.conv = nn.Conv2d( ch_in, ch_out, filter_size, padding=padding, stride=stride, groups=groups, weight_attr=paddle.ParamAttr(initializer=Normal(0., 0.02)), bias_attr=False) self.batch_norm = nn.BatchNorm(num_channels=ch_out, is_test=is_test, param_attr=paddle.ParamAttr( initializer=Normal(0., 0.02), regularizer=L2Decay(0.))) self.act = act
def __init__( self, ch_in, ch_out=128, num_classes=80, conv_num=2, dcn_head=False, lite_head=False, norm_type='bn', ): super(HMHead, self).__init__() head_conv = nn.Sequential() for i in range(conv_num): name = 'conv.{}'.format(i) if lite_head: lite_name = 'hm.' + name head_conv.add_sublayer( lite_name, LiteConv(in_channels=ch_in if i == 0 else ch_out, out_channels=ch_out, norm_type=norm_type)) else: if dcn_head: head_conv.add_sublayer( name, DeformableConvV2( in_channels=ch_in if i == 0 else ch_out, out_channels=ch_out, kernel_size=3, weight_attr=ParamAttr( initializer=Normal(0, 0.01)))) else: head_conv.add_sublayer( name, nn.Conv2D( in_channels=ch_in if i == 0 else ch_out, out_channels=ch_out, kernel_size=3, padding=1, weight_attr=ParamAttr(initializer=Normal(0, 0.01)), bias_attr=ParamAttr(learning_rate=2., regularizer=L2Decay(0.)))) head_conv.add_sublayer(name + '.act', nn.ReLU()) self.feat = head_conv bias_init = float(-np.log((1 - 0.01) / 0.01)) weight_attr = None if lite_head else ParamAttr( initializer=Normal(0, 0.01)) self.head = nn.Conv2D(in_channels=ch_out, out_channels=num_classes, kernel_size=1, weight_attr=weight_attr, bias_attr=ParamAttr( learning_rate=2., regularizer=L2Decay(0.), initializer=Constant(bias_init)))
def __init__(self, ch_in, ch_out=64, conv_num=2, dcn_head=False, lite_head=False, norm_type='bn'): super(WHHead, self).__init__() head_conv = nn.Sequential() for i in range(conv_num): name = 'conv.{}'.format(i) if lite_head: lite_name = 'wh.' + name head_conv.add_sublayer( lite_name, LiteConv(in_channels=ch_in if i == 0 else ch_out, out_channels=ch_out, norm_type=norm_type)) head_conv.add_sublayer(lite_name + '.act', nn.ReLU6()) else: if dcn_head: head_conv.add_sublayer( name, DeformableConvV2( in_channels=ch_in if i == 0 else ch_out, out_channels=ch_out, kernel_size=3, weight_attr=ParamAttr( initializer=Normal(0, 0.01)))) else: head_conv.add_sublayer( name, nn.Conv2D( in_channels=ch_in if i == 0 else ch_out, out_channels=ch_out, kernel_size=3, padding=1, weight_attr=ParamAttr(initializer=Normal(0, 0.01)), bias_attr=ParamAttr(learning_rate=2., regularizer=L2Decay(0.)))) head_conv.add_sublayer(name + '.act', nn.ReLU()) self.feat = head_conv self.head = nn.Conv2D( in_channels=ch_out, out_channels=4, kernel_size=1, weight_attr=ParamAttr(initializer=Normal(0, 0.001)), bias_attr=ParamAttr(learning_rate=2., regularizer=L2Decay(0.)))
def basic_branch(self, num_conv_out_channels, input_ch): # the level indexes are defined from fine to coarse, # the branch will contain one more part than that of its previous level # the sliding step is set to 1 pyramid_conv_list = nn.LayerList() pyramid_fc_list = nn.LayerList() idx_levels = 0 for idx_branches in range(self.num_branches): if idx_branches >= sum(self.num_in_each_level[0:idx_levels + 1]): idx_levels += 1 pyramid_conv_list.append( nn.Sequential(nn.Conv2D(input_ch, num_conv_out_channels, 1), nn.BatchNorm2D(num_conv_out_channels), nn.ReLU())) idx_levels = 0 for idx_branches in range(self.num_branches): if idx_branches >= sum(self.num_in_each_level[0:idx_levels + 1]): idx_levels += 1 fc = nn.Linear( in_features=num_conv_out_channels, out_features=self.num_classes, weight_attr=ParamAttr(initializer=Normal(mean=0., std=0.001)), bias_attr=ParamAttr(initializer=Constant(value=0.))) pyramid_fc_list.append(fc) return pyramid_conv_list, pyramid_fc_list
def __init__(self, num_channels, num_filters, filter_size, stride=1, dilation=1, groups=1, act=None, lr_mult=1.0, name=None, data_format="NCHW"): super(ConvBNLayer, self).__init__() conv_stdv = filter_size * filter_size * num_filters self._conv = nn.Conv2D(in_channels=num_channels, out_channels=num_filters, kernel_size=filter_size, stride=stride, padding=(filter_size - 1) // 2, dilation=dilation, groups=groups, weight_attr=ParamAttr(learning_rate=lr_mult, initializer=Normal( 0, math.sqrt( 2. / conv_stdv))), bias_attr=False, data_format=data_format) self._batch_norm = nn.BatchNorm(num_filters, act=act, data_layout=data_format)
def ConvTranspose2d(in_channels, out_channels, kernel_size, stride=1, padding=0, output_padding=0, groups=1, bias=True, dilation=1, weight_init=Normal(std=0.001), bias_init=Constant(0.)): weight_attr = paddle.framework.ParamAttr(initializer=weight_init) if bias: bias_attr = paddle.framework.ParamAttr(initializer=bias_init) else: bias_attr = False conv = nn.Conv2DTranspose(in_channels, out_channels, kernel_size, stride, padding, output_padding, dilation, groups, weight_attr=weight_attr, bias_attr=bias_attr) return conv
def __init__(self, bbox_feat, in_feat=1024, num_classes=81, cls_agnostic=False, roi_stages=1, with_pool=False, score_stage=[0, 1, 2], delta_stage=[2]): super(BBoxHead, self).__init__() self.num_classes = num_classes self.cls_agnostic = cls_agnostic self.delta_dim = 2 if cls_agnostic else num_classes self.bbox_feat = bbox_feat self.roi_stages = roi_stages self.bbox_score_list = [] self.bbox_delta_list = [] self.roi_feat_list = [[] for i in range(roi_stages)] self.with_pool = with_pool self.score_stage = score_stage self.delta_stage = delta_stage for stage in range(roi_stages): score_name = 'bbox_score_{}'.format(stage) delta_name = 'bbox_delta_{}'.format(stage) lr_factor = 2**stage bbox_score = self.add_sublayer( score_name, nn.Linear(in_feat, 1 * self.num_classes, weight_attr=ParamAttr(learning_rate=lr_factor, initializer=Normal(mean=0.0, std=0.01)), bias_attr=ParamAttr(learning_rate=2. * lr_factor, regularizer=L2Decay(0.)))) bbox_delta = self.add_sublayer( delta_name, nn.Linear(in_feat, 4 * self.delta_dim, weight_attr=ParamAttr(learning_rate=lr_factor, initializer=Normal(mean=0.0, std=0.001)), bias_attr=ParamAttr(learning_rate=2. * lr_factor, regularizer=L2Decay(0.)))) self.bbox_score_list.append(bbox_score) self.bbox_delta_list.append(bbox_delta)
def __init__( self, num_classes=1, num_identifiers=1, # defined by dataset.total_identities anchor_levels=3, anchor_scales=4, embedding_dim=512, emb_loss='JDEEmbeddingLoss', jde_loss='JDELoss'): super(JDEEmbeddingHead, self).__init__() self.num_classes = num_classes self.num_identifiers = num_identifiers self.anchor_levels = anchor_levels self.anchor_scales = anchor_scales self.embedding_dim = embedding_dim self.emb_loss = emb_loss self.jde_loss = jde_loss self.emb_scale = math.sqrt(2) * math.log( self.num_identifiers - 1) if self.num_identifiers > 1 else 1 self.identify_outputs = [] self.loss_params_cls = [] self.loss_params_reg = [] self.loss_params_ide = [] for i in range(self.anchor_levels): name = 'identify_output.{}'.format(i) identify_output = self.add_sublayer( name, nn.Conv2D(in_channels=64 * (2**self.anchor_levels) // (2**i), out_channels=self.embedding_dim, kernel_size=3, stride=1, padding=1, weight_attr=ParamAttr(name=name + '.conv.weights'), bias_attr=ParamAttr(name=name + '.conv.bias', regularizer=L2Decay(0.)))) self.identify_outputs.append(identify_output) loss_p_cls = self.add_sublayer('cls.{}'.format(i), LossParam(-4.15)) self.loss_params_cls.append(loss_p_cls) loss_p_reg = self.add_sublayer('reg.{}'.format(i), LossParam(-4.85)) self.loss_params_reg.append(loss_p_reg) loss_p_ide = self.add_sublayer('ide.{}'.format(i), LossParam(-2.3)) self.loss_params_ide.append(loss_p_ide) self.classifier = self.add_sublayer( 'classifier', nn.Linear(self.embedding_dim, self.num_identifiers, weight_attr=ParamAttr(learning_rate=1., initializer=Normal(mean=0.0, std=0.01)), bias_attr=ParamAttr(learning_rate=2., regularizer=L2Decay(0.))))
def __init__(self, feat_in=1024, feat_out=1024): super(RPNFeat, self).__init__() # rpn feat is shared with each level self.rpn_conv = nn.Conv2D(in_channels=feat_in, out_channels=feat_out, kernel_size=3, padding=1, weight_attr=paddle.ParamAttr( initializer=Normal(mean=0., std=0.01)))
def __init__(self, anchor_generator=AnchorGenerator().__dict__, rpn_target_assign=RPNTargetAssign().__dict__, train_proposal=ProposalGenerator(12000, 2000).__dict__, test_proposal=ProposalGenerator().__dict__, in_channel=1024, export_onnx=False): super(RPNHead, self).__init__() self.anchor_generator = anchor_generator self.rpn_target_assign = rpn_target_assign self.train_proposal = train_proposal self.test_proposal = test_proposal self.export_onnx = export_onnx if isinstance(anchor_generator, dict): self.anchor_generator = AnchorGenerator(**anchor_generator) if isinstance(rpn_target_assign, dict): self.rpn_target_assign = RPNTargetAssign(**rpn_target_assign) if isinstance(train_proposal, dict): self.train_proposal = ProposalGenerator(**train_proposal) if isinstance(test_proposal, dict): self.test_proposal = ProposalGenerator(**test_proposal) num_anchors = self.anchor_generator.num_anchors self.rpn_feat = RPNFeat(in_channel, in_channel) # rpn head is shared with each level # rpn roi classification scores self.rpn_rois_score = nn.Conv2D( in_channels=in_channel, out_channels=num_anchors, kernel_size=1, padding=0, weight_attr=paddle.ParamAttr( initializer=Normal(mean=0., std=0.01))) self.rpn_rois_score.skip_quant = True # rpn roi bbox regression deltas self.rpn_rois_delta = nn.Conv2D( in_channels=in_channel, out_channels=4 * num_anchors, kernel_size=1, padding=0, weight_attr=paddle.ParamAttr( initializer=Normal(mean=0., std=0.01))) self.rpn_rois_delta.skip_quant = True
def __init__(self, head, in_channel, roi_extractor=RoIAlign().__dict__, bbox_assigner='BboxAssigner', num_classes=80, bbox_weight=[[10., 10., 5., 5.], [20.0, 20.0, 10.0, 10.0], [30.0, 30.0, 15.0, 15.0]], num_cascade_stages=3, bbox_loss=None): nn.Layer.__init__(self, ) self.head = head self.roi_extractor = roi_extractor if isinstance(roi_extractor, dict): self.roi_extractor = RoIAlign(**roi_extractor) self.bbox_assigner = bbox_assigner self.num_classes = num_classes self.bbox_weight = bbox_weight self.num_cascade_stages = num_cascade_stages self.bbox_loss = bbox_loss self.bbox_score_list = [] self.bbox_delta_list = [] for i in range(num_cascade_stages): score_name = 'bbox_score_stage{}'.format(i) delta_name = 'bbox_delta_stage{}'.format(i) bbox_score = self.add_sublayer( score_name, nn.Linear(in_channel, self.num_classes + 1, weight_attr=paddle.ParamAttr( initializer=Normal(mean=0.0, std=0.01)))) bbox_delta = self.add_sublayer( delta_name, nn.Linear(in_channel, 4, weight_attr=paddle.ParamAttr( initializer=Normal(mean=0.0, std=0.001)))) self.bbox_score_list.append(bbox_score) self.bbox_delta_list.append(bbox_delta) self.assigned_label = None self.assigned_rois = None
def __init__(self, feat_in=1024, feat_out=1024): super(RPNFeat, self).__init__() # rpn feat is shared with each level self.rpn_conv = Conv2D( in_channels=feat_in, out_channels=feat_out, kernel_size=3, padding=1, weight_attr=ParamAttr(initializer=Normal(mean=0., std=0.01)), bias_attr=ParamAttr(learning_rate=2., regularizer=L2Decay(0.)))
def __init__(self, in_channel=1024, out_channel=1024): super(RPNFeat, self).__init__() # rpn feat is shared with each level self.rpn_conv = nn.Conv2D(in_channels=in_channel, out_channels=out_channel, kernel_size=3, padding=1, weight_attr=paddle.ParamAttr( initializer=Normal(mean=0., std=0.01))) self.rpn_conv.skip_quant = True
def __init__(self, ch_in, ch_out, filter_size, stride=1, groups=1, norm_type=None, norm_groups=32, norm_decay=0., freeze_norm=False, act=None): super(ConvNormLayer, self).__init__() self.act = act norm_lr = 0. if freeze_norm else 1. if norm_type is not None: assert norm_type in ['bn', 'sync_bn', 'gn'], \ "norm_type should be one of ['bn', 'sync_bn', 'gn'], but got {}".format(norm_type) param_attr = ParamAttr( initializer=Constant(1.0), learning_rate=norm_lr, regularizer=L2Decay(norm_decay), ) bias_attr = ParamAttr(learning_rate=norm_lr, regularizer=L2Decay(norm_decay)) global_stats = True if freeze_norm else None if norm_type in ['bn', 'sync_bn']: self.norm = nn.BatchNorm2D( ch_out, weight_attr=param_attr, bias_attr=bias_attr, use_global_stats=global_stats, ) elif norm_type == 'gn': self.norm = nn.GroupNorm(num_groups=norm_groups, num_channels=ch_out, weight_attr=param_attr, bias_attr=bias_attr) norm_params = self.norm.parameters() if freeze_norm: for param in norm_params: param.stop_gradient = True conv_bias_attr = False else: conv_bias_attr = True self.norm = None self.conv = nn.Conv2D( in_channels=ch_in, out_channels=ch_out, kernel_size=filter_size, stride=stride, padding=(filter_size - 1) // 2, groups=groups, weight_attr=ParamAttr(initializer=Normal(mean=0., std=0.001)), bias_attr=conv_bias_attr)
def __init__(self, ch_in, ch_out, filter_size, stride=1, norm_type='bn', norm_groups=32, use_dcn=False, norm_decay=0., freeze_norm=False, act=None, name=None): super(ConvNormLayer, self).__init__() assert norm_type in ['bn', 'sync_bn', 'gn'] self.act = act self.conv = nn.Conv2D(in_channels=ch_in, out_channels=ch_out, kernel_size=filter_size, stride=stride, padding=(filter_size - 1) // 2, groups=1, weight_attr=ParamAttr(name=name + "_weights", initializer=Normal( mean=0., std=0.01)), bias_attr=False) norm_lr = 0. if freeze_norm else 1. norm_name = name + '_bn' param_attr = ParamAttr(name=norm_name + "_scale", learning_rate=norm_lr, regularizer=L2Decay(norm_decay)) bias_attr = ParamAttr(name=norm_name + "_offset", learning_rate=norm_lr, regularizer=L2Decay(norm_decay)) global_stats = True if freeze_norm else False if norm_type in ['bn', 'sync_bn']: self.norm = nn.BatchNorm(ch_out, param_attr=param_attr, bias_attr=bias_attr, use_global_stats=global_stats, moving_mean_name=norm_name + '_mean', moving_variance_name=norm_name + '_variance') elif norm_type == 'gn': self.norm = nn.GroupNorm(num_groups=norm_groups, num_channels=ch_out, weight_attr=param_attr, bias_attr=bias_attr) norm_params = self.norm.parameters() if freeze_norm: for param in norm_params: param.stop_gradient = True
def __init__(self, in_channels, out_channels, kernel_size=3, groups=1): super(AlignConv, self).__init__() self.kernel_size = kernel_size self.align_conv = paddle.vision.ops.DeformConv2D( in_channels, out_channels, kernel_size=self.kernel_size, padding=(self.kernel_size - 1) // 2, groups=groups, weight_attr=ParamAttr(initializer=Normal(0, 0.01)), bias_attr=None)
def __init__(self, num_classes=80, conv_feat='RetinaFeat', anchor_generator='RetinaAnchorGenerator', bbox_assigner='MaxIoUAssigner', loss_class='FocalLoss', loss_bbox='SmoothL1Loss', nms='MultiClassNMS', prior_prob=0.01, nms_pre=1000, weights=[1., 1., 1., 1.]): super(RetinaHead, self).__init__() self.num_classes = num_classes self.conv_feat = conv_feat self.anchor_generator = anchor_generator self.bbox_assigner = bbox_assigner self.loss_class = loss_class self.loss_bbox = loss_bbox self.nms = nms self.nms_pre = nms_pre self.weights = weights bias_init_value = -math.log((1 - prior_prob) / prior_prob) num_anchors = self.anchor_generator.num_anchors self.retina_cls = nn.Conv2D( in_channels=self.conv_feat.feat_out, out_channels=self.num_classes * num_anchors, kernel_size=3, stride=1, padding=1, weight_attr=ParamAttr(initializer=Normal(mean=0.0, std=0.01)), bias_attr=ParamAttr(initializer=Constant(value=bias_init_value))) self.retina_reg = nn.Conv2D( in_channels=self.conv_feat.feat_out, out_channels=4 * num_anchors, kernel_size=3, stride=1, padding=1, weight_attr=ParamAttr(initializer=Normal(mean=0.0, std=0.01)), bias_attr=ParamAttr(initializer=Constant(value=0)))
def __init__(self, rpn_feat, anchor_per_position=15, rpn_channel=1024): super(RPNHead, self).__init__() self.rpn_feat = rpn_feat if isinstance(rpn_feat, dict): self.rpn_feat = RPNFeat(**rpn_feat) # rpn head is shared with each level # rpn roi classification scores self.rpn_rois_score = Conv2D( in_channels=rpn_channel, out_channels=anchor_per_position, kernel_size=1, padding=0, weight_attr=ParamAttr(initializer=Normal(mean=0., std=0.01)), bias_attr=ParamAttr(learning_rate=2., regularizer=L2Decay(0.))) # rpn roi bbox regression deltas self.rpn_rois_delta = Conv2D( in_channels=rpn_channel, out_channels=4 * anchor_per_position, kernel_size=1, padding=0, weight_attr=ParamAttr(initializer=Normal(mean=0., std=0.01)), bias_attr=ParamAttr(learning_rate=2., regularizer=L2Decay(0.)))
def __init__(self, ch_in, ch_out, filter_size, stride, norm_type='bn', norm_groups=32, use_dcn=False, norm_name=None, bias_on=False, lr_scale=1., name=None): super(ConvNormLayer, self).__init__() assert norm_type in ['bn', 'sync_bn', 'gn'] if bias_on: bias_attr = ParamAttr(name=name + "_bias", initializer=Constant(value=0.), learning_rate=lr_scale) else: bias_attr = False self.conv = nn.Conv2D(in_channels=ch_in, out_channels=ch_out, kernel_size=filter_size, stride=stride, padding=(filter_size - 1) // 2, groups=1, weight_attr=ParamAttr(name=name + "_weight", initializer=Normal( mean=0., std=0.01), learning_rate=1.), bias_attr=bias_attr) param_attr = ParamAttr(name=norm_name + "_scale", learning_rate=1., regularizer=L2Decay(0.)) bias_attr = ParamAttr(name=norm_name + "_offset", learning_rate=1., regularizer=L2Decay(0.)) if norm_type in ['bn', 'sync_bn']: self.norm = nn.BatchNorm2D(ch_out, weight_attr=param_attr, bias_attr=bias_attr) elif norm_type == 'gn': self.norm = nn.GroupNorm(num_groups=norm_groups, num_channels=ch_out, weight_attr=param_attr, bias_attr=bias_attr)
def __init__(self, ch_in, ch_out=64, conv_num=2): super(WHHead, self).__init__() head_conv = nn.Sequential() for i in range(conv_num): name = 'conv.{}'.format(i) head_conv.add_sublayer( name, nn.Conv2D(in_channels=ch_in if i == 0 else ch_out, out_channels=ch_out, kernel_size=3, padding=1, weight_attr=ParamAttr(initializer=Normal(0, 0.001)), bias_attr=ParamAttr(learning_rate=2., regularizer=L2Decay(0.)))) head_conv.add_sublayer(name + '.act', nn.ReLU()) self.feat = self.add_sublayer('wh_feat', head_conv) self.head = self.add_sublayer( 'wh_head', nn.Conv2D(in_channels=ch_out, out_channels=4, kernel_size=1, weight_attr=ParamAttr(initializer=Normal(0, 0.001)), bias_attr=ParamAttr(learning_rate=2., regularizer=L2Decay(0.))))
def initialize_parameters(self): Normal(std=0.02)(self.token_embedding.weight) Normal(std=0.01)(self.positional_embedding) if isinstance(self.visual, ModifiedResNet): if self.visual.attnpool is not None: std = self.embed_dim ** -0.5 normal_ = Normal(std=std) normal_(self.visual.attnpool.attn.q_proj.weight) normal_(self.visual.attnpool.attn.k_proj.weight) normal_(self.visual.attnpool.attn.v_proj.weight) normal_(self.visual.attnpool.attn.out_proj.weight) for resnet_block in [ self.visual.layer1, self.visual.layer2, self.visual.layer3, self.visual.layer4, ]: for name, param in resnet_block.named_parameters(): if name.endswith("bn3.weight"): Constant(value=0.0)(param) proj_std = (self.transformer.width ** -0.5) * ( (2 * self.transformer.layers) ** -0.5 ) attn_std = self.transformer.width ** -0.5 fc_std = (2 * self.transformer.width) ** -0.5 for resblock in self.transformer.resblocks: normal_ = Normal(std=attn_std) normal_(resblock.attn.q_proj.weight) normal_(resblock.attn.k_proj.weight) normal_(resblock.attn.v_proj.weight) Normal(std=proj_std)(resblock.attn.out_proj.weight) Normal(std=fc_std)(resblock.mlp.c_fc.weight) Normal(std=proj_std)(resblock.mlp.c_proj.weight) if self.text_projection is not None: Normal(std=self.transformer.width ** -0.5)(self.text_projection)
def __init__(self, layer_num, ch_out, name=None): super(ShortCut, self).__init__() shortcut_conv = Sequential() ch_in = ch_out * 2 for i in range(layer_num): fan_out = 3 * 3 * ch_out std = math.sqrt(2. / fan_out) in_channels = ch_in if i == 0 else ch_out shortcut_name = name + '.conv.{}'.format(i) shortcut_conv.add_sublayer( shortcut_name, Conv2D(in_channels=in_channels, out_channels=ch_out, kernel_size=3, padding=1, weight_attr=ParamAttr(initializer=Normal(0, std)), bias_attr=ParamAttr(learning_rate=2., regularizer=L2Decay(0.)))) if i < layer_num - 1: shortcut_conv.add_sublayer(shortcut_name + '.act', ReLU()) self.shortcut = self.add_sublayer('short', shortcut_conv)
def __init__(self, num_channels, num_filters, filter_size, stride=1, dilation=1, groups=1, act=None, lr_mult=1.0, name=None, data_format="NCHW"): super(ConvBNLayer, self).__init__() conv_stdv = filter_size * filter_size * num_filters self._conv = nn.Conv2D(in_channels=num_channels, out_channels=num_filters, kernel_size=filter_size, stride=stride, padding=(filter_size - 1) // 2, dilation=dilation, groups=groups, weight_attr=ParamAttr(name=name + "_weights", learning_rate=lr_mult, initializer=Normal( 0, math.sqrt( 2. / conv_stdv))), bias_attr=False, data_format=data_format) if name == "conv1": bn_name = "bn_" + name else: bn_name = "bn" + name[3:] self._batch_norm = nn.BatchNorm( num_filters, act=act, param_attr=ParamAttr(name=bn_name + "_scale"), bias_attr=ParamAttr(bn_name + "_offset"), moving_mean_name=bn_name + "_mean", moving_variance_name=bn_name + "_variance", data_layout=data_format)
def __init__(self, layer_num, ch_in, ch_out, norm_type='bn', lite_neck=False, name=None): super(ShortCut, self).__init__() shortcut_conv = nn.Sequential() for i in range(layer_num): fan_out = 3 * 3 * ch_out std = math.sqrt(2. / fan_out) in_channels = ch_in if i == 0 else ch_out shortcut_name = name + '.conv.{}'.format(i) if lite_neck: shortcut_conv.add_sublayer( shortcut_name, LiteConv(in_channels=in_channels, out_channels=ch_out, with_act=i < layer_num - 1, norm_type=norm_type)) else: shortcut_conv.add_sublayer( shortcut_name, nn.Conv2D( in_channels=in_channels, out_channels=ch_out, kernel_size=3, padding=1, weight_attr=ParamAttr(initializer=Normal(0, std)), bias_attr=ParamAttr(learning_rate=2., regularizer=L2Decay(0.)))) if i < layer_num - 1: shortcut_conv.add_sublayer(shortcut_name + '.act', nn.ReLU()) self.shortcut = self.add_sublayer('shortcut', shortcut_conv)
def __init__( self, levels, channels, output_stride=32, in_chans=3, cardinality=1, base_width=64, block=DlaBottleneck, residual_root=False, drop_rate=0.0, global_pool="avg", class_dim=1000, with_pool=True, ): super(DLA, self).__init__() self.channels = channels self.class_dim = class_dim self.with_pool = with_pool self.cardinality = cardinality self.base_width = base_width self.drop_rate = drop_rate assert output_stride == 32 # FIXME support dilation self.base_layer = nn.Sequential( nn.Conv2D( in_chans, channels[0], kernel_size=7, stride=1, padding=3, bias_attr=False, ), nn.BatchNorm2D(channels[0]), nn.ReLU(), ) self.level0 = self._make_conv_level(channels[0], channels[0], levels[0]) self.level1 = self._make_conv_level(channels[0], channels[1], levels[1], stride=2) cargs = dict(cardinality=cardinality, base_width=base_width, root_residual=residual_root) self.level2 = DlaTree(levels[2], block, channels[1], channels[2], 2, level_root=False, **cargs) self.level3 = DlaTree(levels[3], block, channels[2], channels[3], 2, level_root=True, **cargs) self.level4 = DlaTree(levels[4], block, channels[3], channels[4], 2, level_root=True, **cargs) self.level5 = DlaTree(levels[5], block, channels[4], channels[5], 2, level_root=True, **cargs) self.feature_info = [ # rare to have a meaningful stride 1 level dict(num_chs=channels[0], reduction=1, module="level0"), dict(num_chs=channels[1], reduction=2, module="level1"), dict(num_chs=channels[2], reduction=4, module="level2"), dict(num_chs=channels[3], reduction=8, module="level3"), dict(num_chs=channels[4], reduction=16, module="level4"), dict(num_chs=channels[5], reduction=32, module="level5"), ] self.num_features = channels[-1] if with_pool: self.global_pool = nn.AdaptiveAvgPool2D(1) if class_dim > 0: self.fc = nn.Conv2D(self.num_features, class_dim, 1) for m in self.sublayers(): if isinstance(m, nn.Conv2D): n = m._kernel_size[0] * m._kernel_size[1] * m._out_channels normal_ = Normal(mean=0.0, std=math.sqrt(2.0 / n)) normal_(m.weight) elif isinstance(m, nn.BatchNorm2D): ones_(m.weight) zeros_(m.bias)
def __init__(self, stacked_convs=2, feat_in=256, feat_out=256, num_classes=15, anchor_strides=[8, 16, 32, 64, 128], anchor_scales=[4], anchor_ratios=[1.0], target_means=0.0, target_stds=1.0, align_conv_type='AlignConv', align_conv_size=3, use_sigmoid_cls=True, anchor_assign=RBoxAssigner().__dict__, reg_loss_weight=[1.0, 1.0, 1.0, 1.0, 1.1], cls_loss_weight=[1.1, 1.05], reg_loss_type='l1', is_training=True): super(S2ANetHead, self).__init__() self.stacked_convs = stacked_convs self.feat_in = feat_in self.feat_out = feat_out self.anchor_list = None self.anchor_scales = anchor_scales self.anchor_ratios = anchor_ratios self.anchor_strides = anchor_strides self.anchor_strides = paddle.to_tensor(anchor_strides) self.anchor_base_sizes = list(anchor_strides) self.means = paddle.ones(shape=[5]) * target_means self.stds = paddle.ones(shape=[5]) * target_stds assert align_conv_type in ['AlignConv', 'Conv', 'DCN'] self.align_conv_type = align_conv_type self.align_conv_size = align_conv_size self.use_sigmoid_cls = use_sigmoid_cls self.cls_out_channels = num_classes if self.use_sigmoid_cls else 1 self.sampling = False self.anchor_assign = anchor_assign self.reg_loss_weight = reg_loss_weight self.cls_loss_weight = cls_loss_weight self.alpha = 1.0 self.beta = 1.0 self.reg_loss_type = reg_loss_type self.is_training = is_training self.s2anet_head_out = None # anchor self.anchor_generators = [] for anchor_base in self.anchor_base_sizes: self.anchor_generators.append( S2ANetAnchorGenerator(anchor_base, anchor_scales, anchor_ratios)) self.anchor_generators = nn.LayerList(self.anchor_generators) self.fam_cls_convs = nn.Sequential() self.fam_reg_convs = nn.Sequential() for i in range(self.stacked_convs): chan_in = self.feat_in if i == 0 else self.feat_out self.fam_cls_convs.add_sublayer( 'fam_cls_conv_{}'.format(i), nn.Conv2D( in_channels=chan_in, out_channels=self.feat_out, kernel_size=3, padding=1, weight_attr=ParamAttr(initializer=Normal(0.0, 0.01)), bias_attr=ParamAttr(initializer=Constant(0)))) self.fam_cls_convs.add_sublayer('fam_cls_conv_{}_act'.format(i), nn.ReLU()) self.fam_reg_convs.add_sublayer( 'fam_reg_conv_{}'.format(i), nn.Conv2D( in_channels=chan_in, out_channels=self.feat_out, kernel_size=3, padding=1, weight_attr=ParamAttr(initializer=Normal(0.0, 0.01)), bias_attr=ParamAttr(initializer=Constant(0)))) self.fam_reg_convs.add_sublayer('fam_reg_conv_{}_act'.format(i), nn.ReLU()) self.fam_reg = nn.Conv2D( self.feat_out, 5, 1, weight_attr=ParamAttr(initializer=Normal(0.0, 0.01)), bias_attr=ParamAttr(initializer=Constant(0))) prior_prob = 0.01 bias_init = float(-np.log((1 - prior_prob) / prior_prob)) self.fam_cls = nn.Conv2D( self.feat_out, self.cls_out_channels, 1, weight_attr=ParamAttr(initializer=Normal(0.0, 0.01)), bias_attr=ParamAttr(initializer=Constant(bias_init))) if self.align_conv_type == "AlignConv": self.align_conv = AlignConv(self.feat_out, self.feat_out, self.align_conv_size) elif self.align_conv_type == "Conv": self.align_conv = nn.Conv2D( self.feat_out, self.feat_out, self.align_conv_size, padding=(self.align_conv_size - 1) // 2, bias_attr=ParamAttr(initializer=Constant(0))) elif self.align_conv_type == "DCN": self.align_conv_offset = nn.Conv2D( self.feat_out, 2 * self.align_conv_size**2, 1, weight_attr=ParamAttr(initializer=Normal(0.0, 0.01)), bias_attr=ParamAttr(initializer=Constant(0))) self.align_conv = paddle.vision.ops.DeformConv2D( self.feat_out, self.feat_out, self.align_conv_size, padding=(self.align_conv_size - 1) // 2, weight_attr=ParamAttr(initializer=Normal(0.0, 0.01)), bias_attr=False) self.or_conv = nn.Conv2D( self.feat_out, self.feat_out, kernel_size=3, padding=1, weight_attr=ParamAttr(initializer=Normal(0.0, 0.01)), bias_attr=ParamAttr(initializer=Constant(0))) # ODM self.odm_cls_convs = nn.Sequential() self.odm_reg_convs = nn.Sequential() for i in range(self.stacked_convs): ch_in = self.feat_out # ch_in = int(self.feat_out / 8) if i == 0 else self.feat_out self.odm_cls_convs.add_sublayer( 'odm_cls_conv_{}'.format(i), nn.Conv2D( in_channels=ch_in, out_channels=self.feat_out, kernel_size=3, stride=1, padding=1, weight_attr=ParamAttr(initializer=Normal(0.0, 0.01)), bias_attr=ParamAttr(initializer=Constant(0)))) self.odm_cls_convs.add_sublayer('odm_cls_conv_{}_act'.format(i), nn.ReLU()) self.odm_reg_convs.add_sublayer( 'odm_reg_conv_{}'.format(i), nn.Conv2D( in_channels=self.feat_out, out_channels=self.feat_out, kernel_size=3, stride=1, padding=1, weight_attr=ParamAttr(initializer=Normal(0.0, 0.01)), bias_attr=ParamAttr(initializer=Constant(0)))) self.odm_reg_convs.add_sublayer('odm_reg_conv_{}_act'.format(i), nn.ReLU()) self.odm_cls = nn.Conv2D( self.feat_out, self.cls_out_channels, 3, padding=1, weight_attr=ParamAttr(initializer=Normal(0.0, 0.01)), bias_attr=ParamAttr(initializer=Constant(bias_init))) self.odm_reg = nn.Conv2D( self.feat_out, 5, 3, padding=1, weight_attr=ParamAttr(initializer=Normal(0.0, 0.01)), bias_attr=ParamAttr(initializer=Constant(0))) self.featmap_sizes = [] self.base_anchors_list = [] self.refine_anchor_list = []
def __init__(self, ch_in: int = 3, class_num: int = 20, ignore_thresh: float = 0.7, valid_thresh: float = 0.005, nms_topk: int = 400, nms_posk: int = 100, nms_thresh: float = 0.45, is_train: bool = True, load_checkpoint: str = None): super(YOLOv3, self).__init__() self.is_train = is_train self.block = DarkNet53_conv_body(ch_in=ch_in, is_test=not self.is_train) self.block_outputs = [] self.yolo_blocks = [] self.route_blocks_2 = [] self.anchor_masks = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] self.anchors = [ 10, 13, 16, 30, 33, 23, 30, 61, 62, 45, 59, 119, 116, 90, 156, 198, 373, 326 ] self.class_num = class_num self.ignore_thresh = ignore_thresh self.valid_thresh = valid_thresh self.nms_topk = nms_topk self.nms_posk = nms_posk self.nms_thresh = nms_thresh ch_in_list = [1024, 768, 384] for i in range(3): yolo_block = self.add_sublayer( "yolo_detecton_block_%d" % (i), YoloDetectionBlock(ch_in_list[i], channel=512 // (2**i), is_test=not self.is_train)) self.yolo_blocks.append(yolo_block) num_filters = len(self.anchor_masks[i]) * (self.class_num + 5) block_out = self.add_sublayer( "block_out_%d" % (i), nn.Conv2d( 1024 // (2**i), num_filters, 1, stride=1, padding=0, weight_attr=paddle.ParamAttr(initializer=Normal(0., 0.02)), bias_attr=paddle.ParamAttr(initializer=Constant(0.0), regularizer=L2Decay(0.)))) self.block_outputs.append(block_out) if i < 2: route = self.add_sublayer( "route2_%d" % i, ConvBNLayer(ch_in=512 // (2**i), ch_out=256 // (2**i), filter_size=1, stride=1, padding=0, is_test=(not self.is_train))) self.route_blocks_2.append(route) self.upsample = Upsample() if load_checkpoint is not None: model_dict = paddle.load(load_checkpoint)[0] self.set_dict(model_dict) print("load custom checkpoint success") else: checkpoint = os.path.join(self.directory, 'yolov3_darknet53_voc.pdparams') if not os.path.exists(checkpoint): os.system( 'wget https://paddlehub.bj.bcebos.com/dygraph/detection/yolov3_darknet53_voc.pdparams -O ' \ + checkpoint) model_dict = paddle.load(checkpoint)[0] self.set_dict(model_dict) print("load pretrained checkpoint success")