Ejemplo n.º 1
0
    def forward(self, data):
        """ only used in training
        """
        template = data['template'].cuda()
        search = data['search'].cuda()
        template_box = data['template_box'].cuda()
        search_box = data['search_box'].cuda()
        # 12: from template to search
        label_cls12 = data['label_cls12'].cuda()
        label_loc12 = data['label_loc12'].cuda()
        label_loc_weight12 = data['label_loc_weight12'].cuda()
        # 21: from search to template
        label_cls21 = data['label_cls21'].cuda()
        label_loc21 = data['label_loc21'].cuda()
        label_loc_weight21 = data['label_loc_weight21'].cuda()

        # get feature
        zf = self.backbone(template)
        xf = self.backbone(search)
        # neck
        zf = self.neck(zf)
        xf = self.neck(xf)
        # non-local
        # zf = self.non_local(zf)
        # xf = self.non_local(xf)

        # crop
        template_box = torch.split(template_box, 1, dim=0)
        search_box = torch.split(search_box, 1, dim=0)

        if isinstance(zf, (list, tuple)):
            zf_crop = [self.roi_align(zi, template_box) for zi in zf]
            xf_crop = [self.roi_align(xi, search_box) for xi in xf]
        else:
            zf_crop = self.roi_align(zf, template_box)
            xf_crop = self.roi_align(xf, search_box)
        # head
        cls12, loc12 = self.rpn_head(zf_crop, xf)
        cls21, loc21 = self.rpn_head(xf_crop, zf)

        # get loss
        cls12 = self.log_softmax(cls12)
        cls_loss12 = select_cross_entropy_loss(cls12, label_cls12)
        loc_loss12 = weight_l1_loss(loc12, label_loc12, label_loc_weight12)

        cls21 = self.log_softmax(cls21)
        cls_loss21 = select_cross_entropy_loss(cls21, label_cls21)
        loc_loss21 = weight_l1_loss(loc21, label_loc21, label_loc_weight21)

        cls_loss = 0.5 * (cls_loss12 + cls_loss21)
        loc_loss = 0.5 * (loc_loss12 + loc_loss21)

        outputs = {}
        outputs['total_loss'] = cfg.TRAIN.CLS_WEIGHT * cls_loss + \
            cfg.TRAIN.LOC_WEIGHT * loc_loss
        outputs['cls_loss'] = cls_loss
        outputs['loc_loss'] = loc_loss
        # done
        return outputs
Ejemplo n.º 2
0
    def forward(self, data):
        """ only used in training
        """
        template = data['template'].cuda(
        )  # 28, 2, 127, 127: batch 28, color 3, size 127x127
        search = data['search'].cuda()
        label_cls = data['label_cls'].type(
            torch.FloatTensor).cuda()  # torch.Size([28, 5, 25, 25])
        label_loc = data['label_loc'].cuda()

        # get feature
        zf = self.backbone(template)
        xf = self.backbone(search)

        if cfg.TRANSFORMER.TRANSFORMER:
            zf = zf[-1]
            xf = xf[-1]
            if cfg.ADJUST.ADJUST:
                zf = self.neck(zf)
                xf = self.neck(xf)
            output = self.tr_head(zf, xf)

            outputs = self.criterion(output, (label_cls, label_loc))
            return outputs
        else:
            label_loc_weight = data['label_loc_weight'].cuda()
            if cfg.MASK.MASK:
                zf = zf[-1]
                self.xf_refine = xf[:-1]
                xf = xf[-1]
            if cfg.ADJUST.ADJUST:
                zf = self.neck(zf)
                xf = self.neck(xf)
            cls, loc = self.rpn_head(zf, xf)

            # loc torch.Size([28, 20, 25, 25])
            # label_loc torch.Size([28, 4, 5, 25, 25])
            # label_loc_weight torch.Size([28, 5, 25, 25])
            # get loss
            cls = self.log_softmax(cls)  # torch.Size([28, 5, 25, 25, 2])
            cls_loss = select_cross_entropy_loss(
                cls, label_cls)  # cls_loss torch.Size([])
            loc_loss = weight_l1_loss(loc, label_loc, label_loc_weight)
            outputs = {}
            outputs['total_loss'] = cfg.TRAIN.CLS_WEIGHT * cls_loss + \
                cfg.TRAIN.LOC_WEIGHT * loc_loss
            outputs['cls_loss'] = cls_loss
            outputs['loc_loss'] = loc_loss

            if cfg.MASK.MASK:
                # TODO
                mask, self.mask_corr_feature = self.mask_head(zf, xf)
                mask_loss = None
                outputs['total_loss'] += cfg.TRAIN.MASK_WEIGHT * mask_loss
                outputs['mask_loss'] = mask_loss
            return outputs
Ejemplo n.º 3
0
    def forward(self, data):
        """ only used in training
        """

        template = data['template'].cuda()
        search = data['search'].cuda()
        bbox = data['bbox'].cuda()
        labelcls2 = data['label_cls2'].cuda()
        labelxff = data['labelxff'].cuda()
        weightcls3 = data['weightcls3'].cuda()
        labelcls3 = data['labelcls3'].cuda()
        weightxff = data['weightxff'].cuda()

        zf1, zf = self.backbone(template)
        xf1, xf = self.backbone(search)
        xff, ress = self.grader(xf1, zf1)

        anchors = self.getcenter(xff)

        label_cls,label_loc,label_loc_weight\
            =self.fin2.get(anchors,bbox,xff.size()[3])

        cls1, cls2, cls3, loc = self.new(xf, zf, ress)

        cls1 = self.log_softmax(cls1)
        cls2 = self.log_softmax(cls2)

        cls_loss1 = select_cross_entropy_loss(cls1, label_cls)
        cls_loss2 = select_cross_entropy_loss(cls2, labelcls2)
        cls_loss3 = self.cls3loss(cls3, labelcls3)

        cls_loss = cfg.TRAIN.w3 * cls_loss3 + cfg.TRAIN.w1 * cls_loss1 + cfg.TRAIN.w2 * cls_loss2

        loc_loss1 = weight_l1_loss(loc, label_loc, label_loc_weight)

        pre_bbox = self._convert_bbox(loc, anchors)
        label_bbox = self._convert_bbox(label_loc, anchors)

        loc_loss2 = self.IOULOSS(pre_bbox, label_bbox, label_loc_weight)

        loc_loss = cfg.TRAIN.w4 * loc_loss1 + cfg.TRAIN.w5 * loc_loss2

        shapeloss = l1loss(xff, labelxff, weightxff)

        outputs = {}
        outputs['total_loss'] =\
            cfg.TRAIN.LOC_WEIGHT*loc_loss\
                +cfg.TRAIN.CLS_WEIGHT*cls_loss\
                    +cfg.TRAIN.SHAPE_WEIGHT*shapeloss
        outputs['cls_loss'] = cls_loss
        outputs['loc_loss'] = loc_loss
        outputs['shapeloss'] = shapeloss
        #2 4 1  都用loss2

        return outputs
Ejemplo n.º 4
0
    def get_grads(self, cls_feas, loc_feas, label_cls, label_loc,
                  label_loc_weight):
        cls = []
        loc = []
        cls_lws, loc_lws = [], []
        for idx, (cls_fea, loc_fea) in enumerate(zip(cls_feas, loc_feas),
                                                 start=2):
            rpn = getattr(self, 'rpn' + str(idx))
            cls_fea = cls_fea.data.detach()
            cls_fea.requires_grad = True
            c = F.conv2d(cls_fea,
                         weight=rpn.cls.last_weights.detach(),
                         bias=rpn.cls.last_bias.detach())
            loc_fea = loc_fea.data.detach()
            loc_fea.requires_grad = True
            l = F.conv2d(loc_fea,
                         weight=rpn.loc.last_weights.detach(),
                         bias=rpn.loc.last_bias.detach())
            cls.append(c)
            loc.append(l)
            cls_feas[idx - 2] = cls_fea
            loc_feas[idx - 2] = loc_fea

            clw = rpn.cls.layer_weight
            llw = rpn.loc.layer_weight
            cls_lws.append(clw)
            loc_lws.append(llw)

        if self.weighted:
            cls_weight = F.softmax(
                self.cls_weight + torch.cat(cls_lws).detach(), 0)
            loc_weight = F.softmax(
                self.loc_weight + torch.cat(loc_lws).detach(), 0)

        if self.weighted:
            cls, loc = self.weighted_avg(cls, cls_weight), self.weighted_avg(
                loc, loc_weight)
        else:
            cls, loc = self.avg(cls), self.avg(loc)

        # get loss
        cls = self.log_softmax(cls)
        cls_loss = select_cross_entropy_loss(cls, label_cls)
        loc_loss = weight_l1_loss(loc, label_loc, label_loc_weight)
        loss = cfg.TRAIN.CLS_WEIGHT * cls_loss + \
            cfg.TRAIN.LOC_WEIGHT * loc_loss
        loss.backward()

        cls_grads = []
        loc_grads = []
        for idx, (cls_fea, loc_fea) in enumerate(zip(cls_feas, loc_feas)):
            cls_grads.append(cls_fea.grad.data.detach() * 10000)
            loc_grads.append(loc_fea.grad.data.detach() * 10000)

        return cls_grads, loc_grads
Ejemplo n.º 5
0
    def forward(self, data):
        """ only used in training
        """
        template = data['template'].cuda()
        search = data['search'].cuda()
        label_cls = data['label_cls'].cuda()
        label_loc = data['label_loc'].cuda()
        label_loc_weight = data['label_loc_weight'].cuda()

        # get feature
        zf = self.backbone(template)
        xf = self.backbone(search)
        if cfg.MASK.MASK:
            zf = zf[-1]
            self.xf_refine = xf[:-1]
            xf = xf[-1]
        if cfg.ADJUST.ADJUST:
            zf = self.neck(zf)
            xf = self.neck(xf)

        if cfg.RPN.RPN:
            cls, loc = self.rpn_head(zf, xf)

            # get loss
            cls = self.log_softmax(cls)
            cls_loss = select_cross_entropy_loss(cls, label_cls)
            loc_loss = weight_l1_loss(loc, label_loc, label_loc_weight)

            outputs = {}
            outputs['total_loss'] = cfg.TRAIN.CLS_WEIGHT * cls_loss + \
                cfg.TRAIN.LOC_WEIGHT * loc_loss
            outputs['cls_loss'] = cls_loss
            outputs['loc_loss'] = loc_loss
        else:
            b, _, h, w = xf.size()
            cls = F.conv2d(xf.view(1, -1, h, w), zf,
                           groups=b) * 1e-3 + self.backbone.corr_bias
            cls = cls.transpose(0, 1)

            # get loss
            cls = self.log_softmax(cls)
            cls_loss = select_cross_entropy_loss(cls, label_cls)

            outputs = {}
            outputs['total_loss'] = cls_loss

        if cfg.MASK.MASK:
            # TODO
            mask, self.mask_corr_feature = self.mask_head(zf, xf)
            mask_loss = None
            outputs['total_loss'] += cfg.TRAIN.MASK_WEIGHT * mask_loss
            outputs['mask_loss'] = mask_loss
        return outputs
Ejemplo n.º 6
0
    def forward(self, data):
        """ only used in training
        """
        template = data['template'].cuda()
        search = data['search'].cuda()
        label_cls = data['label_cls'].cuda()
        label_loc = data['label_loc'].cuda()
        label_loc_weight = data['label_loc_weight'].cuda()
        label = data['label'].cuda()

        # get feature
        zf = self.backbone(template)
        xf = self.backbone(search)
        if cfg.MASK.MASK:
            zf = zf[-1]
            self.xf_refine = xf[:-1]
            xf = xf[-1]
        if cfg.ADJUST.ADJUST:
            zf = self.neck(zf)
            xf = self.neck(xf)

        if self.use_yolo:
            loss, loss_cls, loss_l2 = self.rpn_head(zf, xf, label)
            outputs = {}
            outputs['total_loss'] = loss
            outputs['cls_loss'] = loss_cls
            outputs['loc_loss'] = loss_l2
        else:
            cls, loc = self.rpn_head(zf, xf)
            # get loss
            cls = self.log_softmax(cls)
            cls_loss = select_cross_entropy_loss(cls, label_cls)
            loc_loss = weight_l1_loss(loc, label_loc, label_loc_weight)

            outputs = {}
            outputs['total_loss'] = cfg.TRAIN.CLS_WEIGHT * cls_loss + \
                cfg.TRAIN.LOC_WEIGHT * loc_loss
            outputs['cls_loss'] = cls_loss
            outputs['loc_loss'] = loc_loss

            if cfg.MASK.MASK:
                # TODO
                mask, self.mask_corr_feature = self.mask_head(zf, xf)
                mask_loss = None
                outputs['total_loss'] += cfg.TRAIN.MASK_WEIGHT * mask_loss
                outputs['mask_loss'] = mask_loss

        return outputs
Ejemplo n.º 7
0
    def forward(self, data, epsilon):
        """ only used in training
                """
        template = data['template']
        search = data['search']
        label_cls = data['label_cls']
        label_loc = data['label_loc']
        label_loc_weight = data['label_loc_weight']

        # get feature
        zf = self.backbone(template)

        search = search + 0.1 * self.cn(search)

        xf = self.backbone(search)

        if cfg.ADJUST.ADJUST:
            zf = self.neck(zf)
            xf = self.neck(xf)
        cls, loc = self.rpn_head(zf, xf)

        import pdb
        pdb.set_trace()
        score = self._convert_score(cls)

        idx = np.argwhere(score < 0.5)
        lt = score[idx]
        idx05 = idx[np.argmax(lt)]
        lt05 = score[idx05]

        # get loss
        cls = self.log_softmax(cls)

        cls_loss = select_cross_entropy_loss(cls, label_cls)
        loc_loss = weight_l1_loss(loc, label_loc, label_loc_weight)

        outputs = {'search': search, 'cls_loss': cls_loss}
        outputs['total_loss'] = cfg.TRAIN.CLS_WEIGHT * cls_loss + \
                                cfg.TRAIN.LOC_WEIGHT * loc_loss
        outputs['loc_loss'] = loc_loss

        return outputs
Ejemplo n.º 8
0
    def forward(self, data):
        """ only used in training, different training set
        """
        template = data['template'].cuda()
        search = data['search'].cuda()
        label_cls = data['label_cls'].cuda()
        label_loc = data['label_loc'].cuda()
        label_loc_weight = data['label_loc_weight'].cuda()

        # get feature
        zf = self.backbone(template)
        xf = self.backbone(search)
        if cfg.MASK.MASK:
            # only feature map is employed
            zf = zf[-1]
            self.xf_refine = xf[:-1]
            xf = xf[-1]
        if cfg.ADJUST.ADJUST:
            # Downsample Layer
            zf = self.neck(zf)
            xf = self.neck(xf)
        cls, loc = self.rpn_head(zf, xf)

        # get loss
        cls = self.log_softmax(cls)
        cls_loss = select_cross_entropy_loss(cls, label_cls)
        loc_loss = weight_l1_loss(loc, label_loc, label_loc_weight)

        outputs = {}
        # Total loss include classification loss and localization loss
        outputs['total_loss'] = cfg.TRAIN.CLS_WEIGHT * cls_loss + \
            cfg.TRAIN.LOC_WEIGHT * loc_loss
        outputs['cls_loss'] = cls_loss
        outputs['loc_loss'] = loc_loss

        if cfg.MASK.MASK:
            # no mask loss, 3969 * H * W, 256 * H * W
            mask, self.mask_corr_feature = self.mask_head(zf, xf)
            mask_loss = None
            outputs['total_loss'] += cfg.TRAIN.MASK_WEIGHT * mask_loss
            outputs['mask_loss'] = mask_loss
        return outputs
Ejemplo n.º 9
0
    def forward(self, data):
        """ only used in training  对应到特征图上每个anchor的信息: , , overlap(正样本和所有anchor的IOU)
        """

        #如果不使用gru,对于模板和搜索区域均只在单帧上提取信息
        if not cfg.GRU.USE_GRU:

            template = data['template'].cuda()
            search = data['search'].cuda()
            label_cls = data['label_cls'].cuda()                #cls(此anchor是正样本:1、负样本:0、忽略:-1
            label_loc = data['label_loc'].cuda()                #delta(正样本框相对于anchor的编码偏移量
            label_loc_weight = data['label_loc_weight'].cuda()  #正样本对应的那些anchor的权重,其他位置为0

            # get feature
            zf = self.backbone(template)
            xf = self.backbone(search)

        #如果使用gru,模板需要在前t帧中累积提取,搜索区域只在最后一帧中提取
        else:

            zfs = [None] * self.grus.seq_in_len  # 多帧模板图z的特征f
            for i in range(self.grus.seq_in_len):
                # 每个data[i]中包含的信息为 'template','search','label_cls','label_loc','label_loc_weight','t_bbox','s_bbox''neg'
                zfs[i] = self.backbone(data[i]["template"].cuda())

            zfs=torch.stack(zfs,dim=1)            #将输入变为[n,t,c,h,w]的形式
            zf =self.grus(zfs).squeeze()          #grus输出为[n,1,c,h,w]的形式转化为【n,c,h,w】的形式

            #搜索区域只需要取模板序列组输入完成后的下一帧搜索区域图像就可以
            xf =  self.backbone(data[self.grus.seq_in_len]["search"].cuda())

#-------------------------------特征提取并行化-----------------------------------------------------

            # batch, _, _, _ = data[0]["template"].shape
            # zfs = [None] * (self.grus.seq_in_len)  # 多帧模板图z的特征f
            # for i in range(self.grus.seq_in_len):
            #     # 每个data[i]中包含的信息为 'template','search','label_cls','label_loc','label_loc_weight','t_bbox','s_bbox''neg'
            #     zfs[i] = data[i]["template"]
            #
            # #连续t个序列在batch层面上并行,加快计算速度
            # zfs =  self.backbone( torch.cat(zfs,dim=0).cuda())
            # zfs =zfs.reshape(self.grus.seq_in_len, batch,  self.grus.input_channels, self.grus.input_height, self.grus.input_width)
            # zfs =zfs.permute(1, 0, 2, 3, 4).contiguous()
            #
            #
            # zf =self.grus(zfs).squeeze()          #grus输出为[n,1,c,h,w]的形式转化为【n,c,h,w】的形式
            #
            # #搜索区域只需要取模板序列组输入完成后的下一帧搜索区域图像就可以
            # xf =  self.backbone(data[self.grus.seq_in_len]["search"].cuda())

# ------------------------------------------------------------------------------------

            # 标签信息的提取方式和搜索区域的提取保持同步
            label_cls = data[self.grus.seq_in_len]['label_cls'].cuda()                #cls(此anchor是正样本:1、负样本:0、忽略:-1
            label_loc = data[self.grus.seq_in_len]['label_loc'].cuda()                #delta(正样本框相对于anchor的编码偏移量
            label_loc_weight = data[self.grus.seq_in_len]['label_loc_weight'].cuda()  #正样本对应的那些anchor的权重,其他位置为0



        if cfg.MASK.MASK:               #siamese mask
            zf = zf[-1]
            self.xf_refine = xf[:-1]
            xf = xf[-1]
        if cfg.ADJUST.ADJUST:           #siamese rpn++
            zf = self.neck(zf)
            xf = self.neck(xf)
        cls, loc = self.rpn_head(zf, xf)        #rpn相关计算

        # get loss
        cls_log = self.log_softmax(cls)             #softmax之后在log,将【0,1】之间的概率拉到【-inf,0】之间,后面紧接着的应该使用nlloss,  其中softmax+log+nllloss 等价于CrossEntropyLoss,这里之所以要拆解开的原因是我们需要按照anchor的mask来计算损失
        cls_loss = select_cross_entropy_loss(cls_log, label_cls)
        loc_loss = weight_l1_loss(loc, label_loc, label_loc_weight)



        outputs = {}
        outputs['total_loss'] = cfg.TRAIN.CLS_WEIGHT * cls_loss + \
            cfg.TRAIN.LOC_WEIGHT * loc_loss
        outputs['cls_loss'] = cls_loss*cfg.GRU.NONE_GRU_LR_COFF
        outputs['loc_loss'] = loc_loss*cfg.GRU.NONE_GRU_LR_COFF

        # 是否计算GRU预测特征的损失
        if cfg.GRU.FeatLoss:
            zf_gt = self.backbone(data[self.grus.seq_in_len]["template"].cuda())
            feat_loss=weight_feat_loss(zf, zf_gt, data[self.grus.seq_in_len]["t_bbox"])
            outputs['total_loss'] += cfg.TRAIN.FEAT_WEIGHT * feat_loss
            outputs['feat_loss']    =feat_loss

            #传出去tensorboard监视看
            outputs['zf_gt'] = zf_gt
            outputs['zf'] = zf
            outputs['zfs'] = zfs




        if cfg.MASK.MASK:
            # TODO
            mask, self.mask_corr_feature = self.mask_head(zf, xf)
            mask_loss = None
            outputs['total_loss'] += cfg.TRAIN.MASK_WEIGHT * mask_loss
            outputs['mask_loss'] = mask_loss*cfg.GRU.NONE_GRU_LR_COFF


        if data[0]['iter']%cfg.TRAIN.ShowPeriod==0:
            #截断梯度,只取数据
            locd = loc.detach()
            clsd = cls.detach()
            outputs['box_img'] =self.show_result(clsd, locd, data[self.grus.seq_in_len]["search"])

        return outputs
Ejemplo n.º 10
0
    def forward(self, data):
        """ only used in training
        """
        if cfg.TRACK.TYPE == 'SiamCARTracker':
            template = data['template'].cuda()
            search = data['search'].cuda()
            label_cls = data['label_cls'].cuda()
            label_loc = data['bbox'].cuda()

            # get feature
            zf = self.backbone(template)
            xf = self.backbone(search)
            if cfg.ADJUST.ADJUST:
                zf = self.neck(zf)
                xf = self.neck(xf)

            features = self.xcorr_depthwise(xf[0], zf[0])
            for i in range(len(xf) - 1):
                features_new = self.xcorr_depthwise(xf[i + 1], zf[i + 1])
                features = torch.cat([features, features_new], 1)
            features = self.down(features)

            cls, loc, cen = self.car_head(features)
            locations = compute_locations(cls, cfg.TRACK.STRIDE)
            cls = self.log_softmax(cls)
            cls_loss, loc_loss, cen_loss = self.loss_evaluator(
                locations, cls, loc, cen, label_cls, label_loc)

            # get loss
            outputs = {}
            outputs['total_loss'] = cfg.TRAIN.CLS_WEIGHT * cls_loss + \
                cfg.TRAIN.LOC_WEIGHT * loc_loss + cfg.TRAIN.CEN_WEIGHT * cen_loss
            outputs['cls_loss'] = cls_loss
            outputs['loc_loss'] = loc_loss
            outputs['cen_loss'] = cen_loss
            return outputs
        else:
            template = data['template'].cuda()
            search = data['search'].cuda()
            label_cls = data['label_cls'].cuda()
            label_loc = data['label_loc'].cuda()
            label_loc_weight = data['label_loc_weight'].cuda()

            # get feature
            zf = self.backbone(template)
            xf = self.backbone(search)
            if cfg.MASK.MASK:
                zf = zf[-1]
                self.xf_refine = xf[:-1]
                xf = xf[-1]
            if cfg.ADJUST.ADJUST:
                zf = self.neck(zf)
                xf = self.neck(xf)
            cls, loc = self.rpn_head(zf, xf)

            # get loss
            cls = self.log_softmax(cls)
            cls_loss = select_cross_entropy_loss(cls, label_cls)
            loc_loss = weight_l1_loss(loc, label_loc, label_loc_weight)

            outputs = {}
            outputs['total_loss'] = cfg.TRAIN.CLS_WEIGHT * cls_loss + \
                cfg.TRAIN.LOC_WEIGHT * loc_loss
            outputs['cls_loss'] = cls_loss
            outputs['loc_loss'] = loc_loss

            if cfg.MASK.MASK:
                # TODO
                mask, self.mask_corr_feature = self.mask_head(zf, xf)
                mask_loss = None
                outputs['total_loss'] += cfg.TRAIN.MASK_WEIGHT * mask_loss
                outputs['mask_loss'] = mask_loss
            return outputs
Ejemplo n.º 11
0
    def forward(self, data):
        """ only used in training
        """
        template = data['template'].cuda()
        search = data['search'].cuda()
        label_cls = data['label_cls'].cuda()
        label_loc = data['label_loc'].cuda()
        label_loc_weight = data['label_loc_weight'].cuda()
        bbox = data['bbox']

        # get feature
        zf = self.backbone(template)
        xf = self.backbone(search)
        if cfg.MASK.MASK:
            zf = zf[-1]
            self.xf_refine = xf[:-1]
            xf = xf[-1]
        if cfg.ADJUST.ADJUST:
            zf = self.neck(zf)
            xf = self.neck(xf)
        if cfg.LATENT:
            cls_features, loc_features = self.rpn_head(zf, xf)
            if cfg.LATENTS.NEW_LABEL:
                label_cls = self.get_new_label_cls(cls_features, loc_features,
                                                   label_cls)

            kl = self.rpn_head.update_weights(cls_features, loc_features,
                                              label_cls)
            cls, loc = self.rpn_head.get_cls_loc(cls_features, loc_features)

        else:
            cls, loc = self.rpn_head(zf, xf)

        # get loss
        cls = self.log_softmax(cls)
        cls_loss = select_cross_entropy_loss(cls, label_cls)
        loc_loss = weight_l1_loss(loc, label_loc, label_loc_weight)

        features = {}
        features['cls'] = cls_features
        features['loc'] = loc_features

        outputs = {}

        outputs['inner_loop_loss'] = cfg.TRAIN.CLS_WEIGHT * cls_loss + \
            cfg.TRAIN.LOC_WEIGHT * loc_loss# + 0.001*kl
        outputs['cls_loss'] = cls_loss
        outputs['loc_loss'] = loc_loss

        if cfg.LATENTS:
            outputs['out_loop_loss'] = cfg.TRAIN.CLS_WEIGHT * cls_loss + \
                                    cfg.TRAIN.LOC_WEIGHT * loc_loss

        if cfg.MASK.MASK:
            # TODO
            mask, self.mask_corr_feature = self.mask_head(zf, xf)
            mask_loss = None
            outputs['total_loss'] += cfg.TRAIN.MASK_WEIGHT * mask_loss
            outputs['mask_loss'] = mask_loss

        return outputs, features