def get_loss(self, conv_fpn_feat, gt_bbox, im_info): p = self.p bs = p.batch_image # batch_size on a single gpu centerness_logit_dict, cls_logit_dict, offset_logit_dict = self.get_output(conv_fpn_feat) centerness_loss_list = [] cls_loss_list = [] offset_loss_list = [] # prepare gt ignore_label = X.block_grad(X.var('ignore_label', init=X.constant(p.loss_setting.ignore_label), shape=(1,1))) ignore_offset = X.block_grad(X.var('ignore_offset', init=X.constant(p.loss_setting.ignore_offset), shape=(1,1,1))) gt_bbox = X.var('gt_bbox') im_info = X.var('im_info') centerness_labels, cls_labels, offset_labels = make_fcos_gt(gt_bbox, im_info, p.loss_setting.ignore_offset, p.loss_setting.ignore_label, p.FCOSParam.num_classifier) centerness_labels = X.block_grad(centerness_labels) cls_labels = X.block_grad(cls_labels) offset_labels = X.block_grad(offset_labels) # gather output logits cls_logit_dict_list = [] centerness_logit_dict_list = [] offset_logit_dict_list = [] for idx, stride in enumerate(p.FCOSParam.stride): # (c,H1,W1), (c,H2,W2), ..., (c,H5,W5) -> (H1W1+H2W2+...+H5W5), ...c..., (H1W1+H2W2+...+H5W5) cls_logit_dict_list.append(mx.sym.reshape(cls_logit_dict[stride], shape=(0,0,-1))) centerness_logit_dict_list.append(mx.sym.reshape(centerness_logit_dict[stride], shape=(0,0,-1))) offset_logit_dict_list.append(mx.sym.reshape(offset_logit_dict[stride], shape=(0,0,-1))) cls_logits = mx.sym.reshape(mx.sym.concat(*cls_logit_dict_list, dim=2), shape=(0,-1)) centerness_logits = mx.sym.reshape(mx.sym.concat(*centerness_logit_dict_list, dim=2), shape=(0,-1)) offset_logits = mx.sym.reshape(mx.sym.concat(*offset_logit_dict_list, dim=2), shape=(0,4,-1)) # make losses nonignore_mask = mx.sym.broadcast_not_equal(lhs=cls_labels, rhs=ignore_label) nonignore_mask = X.block_grad(nonignore_mask) cls_loss = make_sigmoid_focal_loss(gamma=p.loss_setting.focal_loss_gamma, alpha=p.loss_setting.focal_loss_alpha, logits=cls_logits, labels=cls_labels, nonignore_mask=nonignore_mask) cls_loss = X.loss(cls_loss, grad_scale=1) nonignore_mask = mx.sym.broadcast_logical_and(lhs=mx.sym.broadcast_not_equal( lhs=X.block_grad(centerness_labels), rhs=ignore_label ), rhs=mx.sym.broadcast_greater( lhs=centerness_labels, rhs=mx.sym.full((1,1), 0) ) ) nonignore_mask = X.block_grad(nonignore_mask) centerness_loss = make_binary_cross_entropy_loss(centerness_logits, centerness_labels, nonignore_mask) centerness_loss = X.loss(centerness_loss, grad_scale=1) offset_loss = IoULoss(offset_logits, offset_labels, ignore_offset, centerness_labels, name='offset_loss') return centerness_loss, cls_loss, offset_loss
def __init__(self, pRpn): super().__init__(pRpn) # reinit bias for cls prior_prob = 0.02 pi = - math.log((1 - prior_prob) / prior_prob) self.cls_pred_bias = X.var("cls_pred_bias", init=X.constant(pi)) self.anchor_dict = None
def __init__(self, pRpn): super().__init__(pRpn) # init bias for cls prior_prob = 0.01 pi = -math.log((1-prior_prob) / prior_prob) # shared classification weight and bias self.cls_conv1_weight = X.var("cls_conv1_weight", init=X.gauss(std=0.01)) self.cls_conv1_bias = X.var("cls_conv1_bias", init=X.zero_init()) self.cls_conv2_weight = X.var("cls_conv2_weight", init=X.gauss(std=0.01)) self.cls_conv2_bias = X.var("cls_conv2_bias", init=X.zero_init()) self.cls_conv3_weight = X.var("cls_conv3_weight", init=X.gauss(std=0.01)) self.cls_conv3_bias = X.var("cls_conv3_bias", init=X.zero_init()) self.cls_conv4_weight = X.var("cls_conv4_weight", init=X.gauss(std=0.01)) self.cls_conv4_bias = X.var("cls_conv4_bias", init=X.zero_init()) self.cls_pred_weight = X.var("cls_pred_weight", init=X.gauss(std=0.01)) self.cls_pred_bias = X.var("cls_pred_bias", init=X.constant(pi)) # shared regression weight and bias self.bbox_conv1_weight = X.var("bbox_conv1_weight", init=X.gauss(std=0.01)) self.bbox_conv1_bias = X.var("bbox_conv1_bias", init=X.zero_init()) self.bbox_conv2_weight = X.var("bbox_conv2_weight", init=X.gauss(std=0.01)) self.bbox_conv2_bias = X.var("bbox_conv2_bias", init=X.zero_init()) self.bbox_conv3_weight = X.var("bbox_conv3_weight", init=X.gauss(std=0.01)) self.bbox_conv3_bias = X.var("bbox_conv3_bias", init=X.zero_init()) self.bbox_conv4_weight = X.var("bbox_conv4_weight", init=X.gauss(std=0.01)) self.bbox_conv4_bias = X.var("bbox_conv4_bias", init=X.zero_init()) self.bbox_pred_weight = X.var("bbox_pred_weight", init=X.gauss(std=0.01)) self.bbox_pred_bias = X.var("bbox_pred_bias", init=X.zero_init()) self._cls_logit_dict = None self._bbox_delta_dict = None
def __init__(self, pHead): self.p = patch_config_as_nothrow(pHead) num_points = self.p.point_generate.num_points self.dcn_kernel = int(math.sqrt(num_points)) self.dcn_pad = int((self.dcn_kernel - 1) / 2) assert self.dcn_kernel * self.dcn_kernel == num_points, \ "The points number should be square." assert self.dcn_kernel % 2 == 1, "The dcn kernel size should be odd." # init moment method dtype = "float16" if self.p.fp16 else "float32" self.moment_transfer = X.var(name="moment_transfer", shape=(2, ), init=X.zero_init(), lr_mult=0.01, dtype=dtype) # init bias for cls prior_prob = 0.01 pi = -math.log((1 - prior_prob) / prior_prob) # shared classification weight and bias self.cls_conv1_weight = X.var("cls_conv1_weight", init=X.gauss(std=0.01)) self.cls_conv1_bias = X.var("cls_conv1_bias", init=X.zero_init()) self.cls_conv2_weight = X.var("cls_conv2_weight", init=X.gauss(std=0.01)) self.cls_conv2_bias = X.var("cls_conv2_bias", init=X.zero_init()) self.cls_conv3_weight = X.var("cls_conv3_weight", init=X.gauss(std=0.01)) self.cls_conv3_bias = X.var("cls_conv3_bias", init=X.zero_init()) self.cls_conv_weight = X.var("cls_conv_weight", init=X.gauss(std=0.01)) self.cls_conv_bias = X.var("cls_conv_bias", init=X.zero_init()) self.cls_out_weight = X.var("cls_out_weight", init=X.gauss(std=0.01)) self.cls_out_bias = X.var("cls_out_bias", init=X.constant(pi)) # shared regression weight and bias self.reg_conv1_weight = X.var("reg_conv1_weight", init=X.gauss(std=0.01)) self.reg_conv1_bias = X.var("reg_conv1_bias", init=X.zero_init()) self.reg_conv2_weight = X.var("reg_conv2_weight", init=X.gauss(std=0.01)) self.reg_conv2_bias = X.var("reg_conv2_bias", init=X.zero_init()) self.reg_conv3_weight = X.var("reg_conv3_weight", init=X.gauss(std=0.01)) self.reg_conv3_bias = X.var("reg_conv3_bias", init=X.zero_init()) self.pts_init_conv_weight = X.var("pts_init_conv_weight", init=X.gauss(std=0.01)) self.pts_init_conv_bias = X.var("pts_init_conv_bias", init=X.zero_init()) self.pts_init_out_weight = X.var("pts_init_out_weight", init=X.gauss(std=0.01)) self.pts_init_out_bias = X.var("pts_init_out_bias", init=X.zero_init()) self.pts_refine_conv_weight = X.var("pts_refine_conv_weight", init=X.gauss(std=0.01)) self.pts_refine_conv_bias = X.var("pts_refine_conv_bias", init=X.zero_init()) self.pts_refine_out_weight = X.var("pts_refine_out_weight", init=X.gauss(std=0.01)) self.pts_refine_out_bias = X.var("pts_refine_out_bias", init=X.zero_init()) self._pts_out_inits = None self._pts_out_refines = None self._cls_outs = None
def get_output(self, conv_feat): if self._cls_logit_list is not None and self._bbox_delta_list is not None: return self._cls_logit_list, self._bbox_delta_list p = self.p stride = p.anchor_generate.stride if not isinstance(stride, tuple): stride = (stride) conv_channel = p.head.conv_channel num_base_anchor = len(p.anchor_generate.ratio) * len( p.anchor_generate.scale) num_class = p.num_class prior_prob = 0.01 pi = -math.log((1 - prior_prob) / prior_prob) # shared classification weight and bias self.cls_conv1_weight = X.var("cls_conv1_weight", init=X.gauss(std=0.01)) self.cls_conv1_bias = X.var("cls_conv1_bias", init=X.zero_init()) self.cls_conv2_weight = X.var("cls_conv2_weight", init=X.gauss(std=0.01)) self.cls_conv2_bias = X.var("cls_conv2_bias", init=X.zero_init()) self.cls_conv3_weight = X.var("cls_conv3_weight", init=X.gauss(std=0.01)) self.cls_conv3_bias = X.var("cls_conv3_bias", init=X.zero_init()) self.cls_conv4_weight = X.var("cls_conv4_weight", init=X.gauss(std=0.01)) self.cls_conv4_bias = X.var("cls_conv4_bias", init=X.zero_init()) self.cls_pred_weight = X.var("cls_pred_weight", init=X.gauss(std=0.01)) self.cls_pred_bias = X.var("cls_pred_bias", init=X.constant(pi)) # shared regression weight and bias self.bbox_conv1_weight = X.var("bbox_conv1_weight", init=X.gauss(std=0.01)) self.bbox_conv1_bias = X.var("bbox_conv1_bias", init=X.zero_init()) self.bbox_conv2_weight = X.var("bbox_conv2_weight", init=X.gauss(std=0.01)) self.bbox_conv2_bias = X.var("bbox_conv2_bias", init=X.zero_init()) self.bbox_conv3_weight = X.var("bbox_conv3_weight", init=X.gauss(std=0.01)) self.bbox_conv3_bias = X.var("bbox_conv3_bias", init=X.zero_init()) self.bbox_conv4_weight = X.var("bbox_conv4_weight", init=X.gauss(std=0.01)) self.bbox_conv4_bias = X.var("bbox_conv4_bias", init=X.zero_init()) self.bbox_pred_weight = X.var("bbox_pred_weight", init=X.gauss(std=0.01)) self.bbox_pred_bias = X.var("bbox_pred_bias", init=X.zero_init()) cls_logit_list = [] bbox_delta_list = [] for i, s in enumerate(stride): cls_logit = self._cls_subnet(conv_feat=conv_feat[i], conv_channel=conv_channel, num_base_anchor=num_base_anchor, num_class=num_class) bbox_delta = self._bbox_subnet(conv_feat=conv_feat[i], conv_channel=conv_channel, num_base_anchor=num_base_anchor, num_class=num_class) cls_logit_list.append(cls_logit) bbox_delta_list.append(bbox_delta) self._cls_logit_list = cls_logit_list self._bbox_delta_list = bbox_delta_list return self._cls_logit_list, self._bbox_delta_list
def get_output(self, conv_fpn_feat): p = self.p centerness_logit_dict = {} cls_logit_dict = {} offset_logit_dict = {} # heads are shared across stages shared_conv1_w = X.var(name="shared_conv1_3x3_weight", init=X.gauss(0.01)) shared_conv1_b = X.var(name="shared_conv1_3x3_bias", init=X.zero_init(), lr_mult=2, wd_mult=0) shared_conv2_w = X.var(name="shared_conv2_3x3_weight", init=X.gauss(0.01)) shared_conv2_b = X.var(name="shared_conv2_3x3_bias", init=X.zero_init(), lr_mult=2, wd_mult=0) shared_conv3_w = X.var(name="shared_conv3_3x3_weight", init=X.gauss(0.01)) shared_conv3_b = X.var(name="shared_conv3_3x3_bias", init=X.zero_init(), lr_mult=2, wd_mult=0) shared_conv4_w = X.var(name="shared_conv4_3x3_weight", init=X.gauss(0.01)) shared_conv4_b = X.var(name="shared_conv4_3x3_bias", init=X.zero_init(), lr_mult=2, wd_mult=0) centerness_conv_w = X.var(name="centerness_conv_3x3_weight", init=X.gauss(0.01)) centerness_conv_b = X.var(name="centerness_conv_3x3_bias", init=X.zero_init(), lr_mult=2, wd_mult=0) cls_conv_w = X.var(name="cls_conv_3x3_weight", init=X.gauss(0.01)) cls_conv_b = X.var(name="cls_conv_3x3_bias", init=X.constant(-math.log(99)), lr_mult=2, wd_mult=0) # init with -log((1-0.01)/0.01) offset_conv1_w = X.var(name="offset_conv1_3x3_weight", init=X.gauss(0.01)) offset_conv1_b = X.var(name="offset_conv1_3x3_bias", init=X.zero_init(), lr_mult=2, wd_mult=0) offset_conv2_w = X.var(name="offset_conv2_3x3_weight", init=X.gauss(0.01)) offset_conv2_b = X.var(name="offset_conv2_3x3_bias", init=X.zero_init(), lr_mult=2, wd_mult=0) offset_conv3_w = X.var(name="offset_conv3_3x3_weight", init=X.gauss(0.01)) offset_conv3_b = X.var(name="offset_conv3_3x3_bias", init=X.zero_init(), lr_mult=2, wd_mult=0) offset_conv4_w = X.var(name="offset_conv4_3x3_weight", init=X.gauss(0.01)) offset_conv4_b = X.var(name="offset_conv4_3x3_bias", init=X.zero_init(), lr_mult=2, wd_mult=0) offset_conv5_w = X.var(name="offset_conv5_3x3_weight", init=X.gauss(0.01)) offset_conv5_b = X.var(name="offset_conv5_3x3_bias", init=X.zero_init(), lr_mult=2, wd_mult=0) for stride in p.FCOSParam.stride: # centerness & cls shared layer shared_conv1 = X.conv( conv_fpn_feat['stride%s' % stride], kernel=3, filter=256, no_bias=False, name="shared_conv1_3x3_%s" % stride, weight=shared_conv1_w, bias=shared_conv1_b, ) shared_gn1 = X.gn(shared_conv1, name='shared_gn1_3x3_%s' % stride, num_group=32) shared_relu1 = X.relu(shared_gn1, name='shared_relu1_3x3_%s' % stride) shared_conv2 = X.conv( shared_relu1, kernel=3, filter=256, no_bias=False, name="shared_conv2_3x3_%s" % stride, weight=shared_conv2_w, bias=shared_conv2_b, ) shared_gn2 = X.gn(shared_conv2, name='shared_gn2_3x3_%s' % stride, num_group=32) shared_relu2 = X.relu(shared_gn2, name='shared_relu2_3x3_%s' % stride) shared_conv3 = X.conv( shared_relu2, kernel=3, filter=256, no_bias=False, name="shared_conv3_3x3_%s" % stride, weight=shared_conv3_w, bias=shared_conv3_b, ) shared_gn3 = X.gn(shared_conv3, name='shared_gn3_3x3_%s' % stride, num_group=32) shared_relu3 = X.relu(shared_gn3, name='shared_relu3_3x3_%s' % stride) shared_conv4 = X.conv( shared_relu3, kernel=3, filter=256, no_bias=False, name="shared_conv4_3x3_%s" % stride, weight=shared_conv4_w, bias=shared_conv4_b, ) shared_gn4 = X.gn(shared_conv4, name='shared_gn4_3x3_%s' % stride, num_group=32) shared_relu4 = X.relu(shared_gn4, name='shared_relu4_3x3_%s' % stride) # centerness head center_logit = X.conv( shared_relu4, kernel=3, filter=1, name="center_conv_3x3_%s" % stride, no_bias=False, weight=centerness_conv_w, bias=centerness_conv_b, ) # cls head cls_logit = X.conv( shared_relu4, kernel=3, filter=p.FCOSParam.num_classifier, # remove bg channel name="cls_conv_3x3_%s" % stride, no_bias=False, weight=cls_conv_w, bias=cls_conv_b, ) # offset head offset_conv1 = X.conv( conv_fpn_feat['stride%s' % stride], kernel=3, filter=256, name="offset_conv1_3x3_%s" % stride, no_bias=False, weight=offset_conv1_w, bias=offset_conv1_b, ) offset_gn1 = X.gn(offset_conv1, name='offset_gn1_3x3_%s' % stride, num_group=32) offset_relu1 = X.relu(offset_gn1, name='offset_relu1_3x3_%s' % stride) offset_conv2 = X.conv( offset_relu1, kernel=3, filter=256, name="offset_conv2_3x3_%s" % stride, no_bias=False, weight=offset_conv2_w, bias=offset_conv2_b, ) offset_gn2 = X.gn(offset_conv2, name='offset_gn2_3x3_%s' % stride, num_group=32) offset_relu2 = X.relu(offset_gn2, name='offset_relu2_3x3_%s' % stride) offset_conv3 = X.conv( offset_relu2, kernel=3, filter=256, name="offset_conv3_3x3_%s" % stride, no_bias=False, weight=offset_conv3_w, bias=offset_conv3_b, ) offset_gn3 = X.gn(offset_conv3, name='offset_gn3_3x3_%s' % stride, num_group=32) offset_relu3 = X.relu(offset_gn3, name='offset_relu3_3x3_%s' % stride) offset_conv4 = X.conv( offset_relu3, kernel=3, filter=256, name="offset_conv1_3x3_%s" % stride, no_bias=False, weight=offset_conv4_w, bias=offset_conv4_b, ) offset_gn4 = X.gn(offset_conv4, name='offset_gn4_3x3_%s' % stride, num_group=32) offset_relu4 = X.relu(offset_gn4, name='offset_relu4_3x3_%s' % stride) offset_logit = X.conv( offset_relu4, kernel=3, filter=4, name="offset_conv5_3x3_%s" % stride, no_bias=False, weight=offset_conv5_w, bias=offset_conv5_b, ) offset_logit = mx.sym.broadcast_mul(lhs=offset_logit, rhs=X.var(name="offset_scale_%s_w" % stride, init=X.constant(1), shape=(1,1,1,1))) offset_logit = mx.sym.exp(offset_logit) centerness_logit_dict[stride] = center_logit cls_logit_dict[stride] = cls_logit offset_logit_dict[stride] = offset_logit self.centerness_logit_dict = centerness_logit_dict self.cls_logit_dict = cls_logit_dict self.offset_logit_dict = offset_logit_dict return self.centerness_logit_dict, self.cls_logit_dict, self.offset_logit_dict