Ejemplo n.º 1
0
    def __init__(self,):
        self.anchors = Anchors(cfg.ANCHOR.STRIDE,
                               cfg.ANCHOR.RATIOS,
                               cfg.ANCHOR.SCALES)

        self.anchors.generate_all_anchors(im_c=cfg.TRAIN.SEARCH_SIZE//2,
                                          size=cfg.TRAIN.OUTPUT_SIZE)
Ejemplo n.º 2
0
    def __init__(self):
        super(ModelBuilder, self).__init__()

        # build backbone
        self.backbone = get_backbone(cfg.BACKBONE.TYPE,
                                     **cfg.BACKBONE.KWARGS)
        #是否添加gru模块
        if cfg.GRU.USE_GRU:
            self.grus =GRU_Model(cfg.GRU.SEQ_IN,cfg.GRU.SEQ_OUT)
            if self.grus.seq_out_len !=1:
                raise ValueError("For tracking task GRU_Model.seq_out_len must be set as 1\n",
                                 "please check the value of __C.GRU.SEQ_OUT in config.py file"
                                 )
            self.zfs = [None] * self.grus.seq_in_len  # 多帧模板图z的特征f,测试前向的时候使用


        # build adjust layer (siamese rpn++才有这个层)
        if cfg.ADJUST.ADJUST:
            self.neck = get_neck(cfg.ADJUST.TYPE,
                                 **cfg.ADJUST.KWARGS)

        # build rpn head
        self.rpn_head = get_rpn_head(cfg.RPN.TYPE,
                                     **cfg.RPN.KWARGS)

        # build mask head(siamese mask里面才有这一层)
        if cfg.MASK.MASK:
            self.mask_head = get_mask_head(cfg.MASK.TYPE,
                                           **cfg.MASK.KWARGS)

            if cfg.REFINE.REFINE:
                self.refine_head = get_refine_head(cfg.REFINE.TYPE)

        #---------------------tenosrboard监视用-------------------
        # hanning窗口,为tensorboard显示预测结果提前出事

        hanning = np.hanning(cfg.TRAIN.OUTPUT_SIZE)  # 生成和输出特征图大小相同的hanning窗
        window = np.outer(hanning, hanning)  # 一维度的hanning窗口通过外积得到二位hanning 窗口

        self.anchor_num = len(cfg.ANCHOR.RATIOS)*len(cfg.ANCHOR.SCALES)
        self.window = torch.from_numpy(np.tile(window.flatten(), self.anchor_num)).cuda().float()          #size*size*anchor_num展平

        #按照给定的比例因子生成一个位置的多种anchor,输出shape为【anchor_num,4】其中输出的anchor的值为[-w * 0.5, -h * 0.5, w * 0.5, h * 0.5]
        self.anchors = Anchors(cfg.ANCHOR.STRIDE,       #8
                               cfg.ANCHOR.RATIOS,       # [0.33, 0.5, 1, 2, 3]
                               cfg.ANCHOR.SCALES)
        # 生成两种类型的anchor,第一种是左上右下坐标类型的,第二种是中心点类型的,shape均为【4,anchor_num,size,size]
        self.anchors.generate_all_anchors(im_c=cfg.TRAIN.SEARCH_SIZE//2,
                                          size=cfg.TRAIN.OUTPUT_SIZE)
        self.anchors_tensor =torch.from_numpy(self.anchors.all_anchors[0]).cuda()
Ejemplo n.º 3
0
    def generate_anchor(self, score_size):
        anchors = Anchors(cfg.ANCHOR.STRIDE, cfg.ANCHOR.RATIOS,
                          cfg.ANCHOR.SCALES)

        anchor = anchors.anchors  #shape 为 【anchor_num,4】,输出的是【-0.5w,-0.5h,0.5w,0.5h],左上右下的坐标类型
        x1, y1, x2, y2 = anchor[:, 0], anchor[:, 1], anchor[:, 2], anchor[:, 3]

        # 得到w,h的信息,x,y为0,0,下面的过程是生成anchor的x,y信息,最左上角点坐标为【-0.5w,-0.5h],中心坐标为0,0
        anchor = np.stack([(x1 + x2) * 0.5, (y1 + y2) * 0.5, x2 - x1, y2 - y1],
                          1)

        #下面为特征图上的每个格点生成中心点x,y的坐标
        total_stride = anchors.stride
        anchor_num = anchor.shape[0]
        anchor = np.tile(anchor, score_size * score_size).reshape((-1, 4))
        ori = -(score_size // 2) * total_stride

        #使用meshgrid生成连续的格点坐标,xx,yy大小均为【size,size]
        xx, yy = np.meshgrid(
            [ori + total_stride * dx for dx in range(score_size)],
            [ori + total_stride * dy for dy in range(score_size)])
        #为多种anchor生成连续的格点坐标
        xx = np.tile(xx.flatten(), (anchor_num, 1)).flatten()
        yy = np.tile(yy.flatten(), (anchor_num, 1)).flatten()

        #将x,y的坐标更新到anchor里面去,输出anchor的shape为【anchor_num*size*size,4],坐标为中心点形式
        anchor[:, 0], anchor[:,
                             1] = xx.astype(np.float32), yy.astype(np.float32)
        return anchor
 def test_get_training_data(self, data, bbox):
     anchors = Anchors(cfg.ANCHOR.STRIDE,
                       cfg.ANCHOR.RATIOS,
                       cfg.ANCHOR.SCALES)
     anchor = anchors.anchors
     anchors.generate_all_anchors(im_c=cfg.TRAIN.SEARCH_SIZE // 2,
                                  size=cfg.TRAIN.OUTPUT_SIZE)
     anchor_center = anchors.all_anchors[1]
     cx, cy, w, h = anchor_center[0], anchor_center[1], \
                    anchor_center[2], anchor_center[3]
     delta = data['label_loc'].data.cpu().detach().numpy()
     tcx = delta[0] * w + cx
     txy = delta[1] * h + cy
     tw = np.exp(delta[2]) * w
     th = np.exp(delta[3]) * h
     print(bbox)
     return bbox
Ejemplo n.º 5
0
 def generate_anchor(self, score_size):
     anchors = Anchors(cfg.ANCHOR.STRIDE,
                       cfg.ANCHOR.RATIOS,
                       cfg.ANCHOR.SCALES)
     anchor = anchors.anchors
     x1, y1, x2, y2 = anchor[:, 0], anchor[:, 1], anchor[:, 2], anchor[:, 3]
     anchor = np.stack([(x1 + x2) * 0.5, (y1 + y2) * 0.5, x2 - x1, y2 - y1], 1)
     total_stride = anchors.stride
     anchor_num = anchor.shape[0]
     anchor = np.tile(anchor, score_size * score_size).reshape((-1, 4))
     ori = - (score_size // 2) * total_stride
     xx, yy = np.meshgrid([ori + total_stride * dx for dx in range(score_size)],
                          [ori + total_stride * dy for dy in range(score_size)])
     xx, yy = np.tile(xx.flatten(), (anchor_num, 1)).flatten(), \
              np.tile(yy.flatten(), (anchor_num, 1)).flatten()
     anchor[:, 0], anchor[:, 1] = xx.astype(np.float32), yy.astype(np.float32)
     return anchor
Ejemplo n.º 6
0
 def generate_anchor(self, score_size):
     # 25 * 25
     anchors = Anchors(cfg.ANCHOR.STRIDE, cfg.ANCHOR.RATIOS,
                       cfg.ANCHOR.SCALES)
     # corner anchors : 5 * 4, (x0, y0, x1, y1).
     anchor = anchors.anchors
     x1, y1, x2, y2 = anchor[:, 0], anchor[:, 1], anchor[:, 2], anchor[:, 3]
     # change to (center_x, center_y, w, h)
     anchor = np.stack([(x1 + x2) * 0.5, (y1 + y2) * 0.5, x2 - x1, y2 - y1],
                       1)
     total_stride = anchors.stride
     anchor_num = anchor.shape[0]
     # anchor : (anchor_num*25*25, 4)
     anchor = np.tile(anchor, score_size * score_size).reshape((-1, 4))
     ori = -(score_size // 2) * total_stride
     xx, yy = np.meshgrid(
         [ori + total_stride * dx for dx in range(score_size)],
         [ori + total_stride * dy for dy in range(score_size)])
     xx, yy = np.tile(xx.flatten(), (anchor_num, 1)).flatten(), \
         np.tile(yy.flatten(), (anchor_num, 1)).flatten()
     # Define center point for each anchor boxes, (Anchor_num * H * W , 4) (cx, cy, w , h)
     anchor[:, 0], anchor[:,
                          1] = xx.astype(np.float32), yy.astype(np.float32)
     return anchor
Ejemplo n.º 7
0
class AnchorTarget:
    def __init__(self, ):
        self.anchors = Anchors(cfg.ANCHOR.STRIDE, cfg.ANCHOR.RATIOS,
                               cfg.ANCHOR.SCALES)

        self.anchors.generate_all_anchors(im_c=cfg.TRAIN.SEARCH_SIZE // 2,
                                          size=cfg.TRAIN.OUTPUT_SIZE)

    def __call__(self, target, size, neg=False):
        anchor_num = len(cfg.ANCHOR.RATIOS) * len(cfg.ANCHOR.SCALES)

        # -1 ignore 0 negative 1 positive for anchor classification
        cls = -1 * np.ones((anchor_num, size, size), dtype=np.int64)
        delta = np.zeros((4, anchor_num, size, size), dtype=np.float32)
        delta_weight = np.zeros((anchor_num, size, size), dtype=np.float32)

        def select(position, keep_num=16):
            num = position[0].shape[0]
            if num <= keep_num:
                return position, num
            slt = np.arange(num)
            np.random.shuffle(slt)
            slt = slt[:keep_num]
            return tuple(p[slt] for p in position), keep_num

        tcx, tcy, tw, th = corner2center(target)

        if neg:
            # l = size // 2 - 3
            # r = size // 2 + 3 + 1
            # cls[:, l:r, l:r] = 0

            cx = size // 2
            cy = size // 2
            cx += int(
                np.ceil((tcx - cfg.TRAIN.SEARCH_SIZE // 2) /
                        cfg.ANCHOR.STRIDE + 0.5))
            cy += int(
                np.ceil((tcy - cfg.TRAIN.SEARCH_SIZE // 2) /
                        cfg.ANCHOR.STRIDE + 0.5))
            l = max(0, cx - 3)
            r = min(size, cx + 4)
            u = max(0, cy - 3)
            d = min(size, cy + 4)
            cls[:, u:d, l:r] = 0

            neg, neg_num = select(np.where(cls == 0), cfg.TRAIN.NEG_NUM)
            cls[:] = -1
            cls[neg] = 0

            overlap = np.zeros((anchor_num, size, size), dtype=np.float32)
            return cls, delta, delta_weight, overlap
        # corner anchors and center anchors.
        anchor_box = self.anchors.all_anchors[0]
        anchor_center = self.anchors.all_anchors[1]
        x1, y1, x2, y2 = anchor_box[0], anchor_box[1], \
            anchor_box[2], anchor_box[3]
        cx, cy, w, h = anchor_center[0], anchor_center[1], \
            anchor_center[2], anchor_center[3]
        # all regression target.
        delta[0] = (tcx - cx) / w
        delta[1] = (tcy - cy) / h
        delta[2] = np.log(tw / w)
        delta[3] = np.log(th / h)

        overlap = IoU([x1, y1, x2, y2], target)

        pos = np.where(overlap > cfg.TRAIN.THR_HIGH)
        neg = np.where(overlap < cfg.TRAIN.THR_LOW)
        # select positibe and negative samples
        pos, pos_num = select(pos, cfg.TRAIN.POS_NUM)
        neg, neg_num = select(neg, cfg.TRAIN.TOTAL_NUM - cfg.TRAIN.POS_NUM)

        cls[pos] = 1
        delta_weight[pos] = 1. / (pos_num + 1e-6)

        cls[neg] = 0
        return cls, delta, delta_weight, overlap
Ejemplo n.º 8
0
class AnchorTarget:
    def __init__(self, ):
        self.anchors = Anchors(cfg.ANCHOR.STRIDE, cfg.ANCHOR.RATIOS,
                               cfg.ANCHOR.SCALES)

        self.anchors.generate_all_anchors(im_c=cfg.TRAIN.SEARCH_SIZE // 2,
                                          size=cfg.TRAIN.OUTPUT_SIZE)

    def __call__(self, target, size, neg=False):
        anchor_num = len(cfg.ANCHOR.RATIOS) * len(cfg.ANCHOR.SCALES)

        # -1 ignore 0 negative 1 positive
        cls = -1 * np.ones((anchor_num, size, size), dtype=np.int64)
        delta = np.zeros((4, anchor_num, size, size), dtype=np.float32)
        delta_weight = np.zeros((anchor_num, size, size), dtype=np.float32)

        def select(position, keep_num=16):
            num = position[0].shape[0]
            if num <= keep_num:
                return position, num
            slt = np.arange(num)
            np.random.shuffle(slt)
            slt = slt[:keep_num]
            return tuple(p[slt] for p in position), keep_num

        tcx, tcy, tw, th = corner2center(target)
        # 125.46613458311141 125.46613458311141 71.39393890439626 60.505337643758054
        if neg:
            # l = size // 2 - 3
            # r = size // 2 + 3 + 1
            # cls[:, l:r, l:r] = 0

            cx = size // 2
            cy = size // 2
            cx += int(
                np.ceil((tcx - cfg.TRAIN.SEARCH_SIZE // 2) /
                        cfg.ANCHOR.STRIDE + 0.5))
            cy += int(
                np.ceil((tcy - cfg.TRAIN.SEARCH_SIZE // 2) /
                        cfg.ANCHOR.STRIDE + 0.5))
            l = max(0, cx - 3)
            r = min(size, cx + 4)
            u = max(0, cy - 3)
            d = min(size, cy + 4)
            cls[:, u:d, l:r] = 0

            neg, neg_num = select(np.where(cls == 0), cfg.TRAIN.NEG_NUM)
            cls[:] = -1
            cls[neg] = 0

            overlap = np.zeros((anchor_num, size, size), dtype=np.float32)
            return cls, delta, delta_weight, overlap

        # anchor dc sinh ra khi cho biet ratio ..datetime A combination of a date and a time. Attributes: ()
        anchor_box = self.anchors.all_anchors[0]
        anchor_center = self.anchors.all_anchors[1]
        x1, y1, x2, y2 = anchor_box[0], anchor_box[1], \
            anchor_box[2], anchor_box[3]
        cx, cy, w, h = anchor_center[0], anchor_center[1], \
            anchor_center[2], anchor_center[3]

        # tcx target center x
        # delta la anchor da (0, 1)

        delta[0] = (tcx - cx) / w
        delta[1] = (tcy - cy) / h
        delta[2] = np.log(tw / w)
        delta[3] = np.log(th / h)

        overlap = IoU([x1, y1, x2, y2], target)

        pos = np.where(overlap > cfg.TRAIN.THR_HIGH)
        neg = np.where(overlap < cfg.TRAIN.THR_LOW)

        pos, pos_num = select(pos, cfg.TRAIN.POS_NUM)
        neg, neg_num = select(neg, cfg.TRAIN.TOTAL_NUM - cfg.TRAIN.POS_NUM)

        cls[pos] = 1
        delta_weight[pos] = 1. / (pos_num + 1e-6)

        cls[neg] = 0
        return cls, delta, delta_weight, overlap
Ejemplo n.º 9
0
class ModelBuilder(nn.Module):
    def __init__(self):
        super(ModelBuilder, self).__init__()

        # build backbone
        self.backbone = get_backbone(cfg.BACKBONE.TYPE,
                                     **cfg.BACKBONE.KWARGS)
        #是否添加gru模块
        if cfg.GRU.USE_GRU:
            self.grus =GRU_Model(cfg.GRU.SEQ_IN,cfg.GRU.SEQ_OUT)
            if self.grus.seq_out_len !=1:
                raise ValueError("For tracking task GRU_Model.seq_out_len must be set as 1\n",
                                 "please check the value of __C.GRU.SEQ_OUT in config.py file"
                                 )
            self.zfs = [None] * self.grus.seq_in_len  # 多帧模板图z的特征f,测试前向的时候使用


        # build adjust layer (siamese rpn++才有这个层)
        if cfg.ADJUST.ADJUST:
            self.neck = get_neck(cfg.ADJUST.TYPE,
                                 **cfg.ADJUST.KWARGS)

        # build rpn head
        self.rpn_head = get_rpn_head(cfg.RPN.TYPE,
                                     **cfg.RPN.KWARGS)

        # build mask head(siamese mask里面才有这一层)
        if cfg.MASK.MASK:
            self.mask_head = get_mask_head(cfg.MASK.TYPE,
                                           **cfg.MASK.KWARGS)

            if cfg.REFINE.REFINE:
                self.refine_head = get_refine_head(cfg.REFINE.TYPE)

        #---------------------tenosrboard监视用-------------------
        # hanning窗口,为tensorboard显示预测结果提前出事

        hanning = np.hanning(cfg.TRAIN.OUTPUT_SIZE)  # 生成和输出特征图大小相同的hanning窗
        window = np.outer(hanning, hanning)  # 一维度的hanning窗口通过外积得到二位hanning 窗口

        self.anchor_num = len(cfg.ANCHOR.RATIOS)*len(cfg.ANCHOR.SCALES)
        self.window = torch.from_numpy(np.tile(window.flatten(), self.anchor_num)).cuda().float()          #size*size*anchor_num展平

        #按照给定的比例因子生成一个位置的多种anchor,输出shape为【anchor_num,4】其中输出的anchor的值为[-w * 0.5, -h * 0.5, w * 0.5, h * 0.5]
        self.anchors = Anchors(cfg.ANCHOR.STRIDE,       #8
                               cfg.ANCHOR.RATIOS,       # [0.33, 0.5, 1, 2, 3]
                               cfg.ANCHOR.SCALES)
        # 生成两种类型的anchor,第一种是左上右下坐标类型的,第二种是中心点类型的,shape均为【4,anchor_num,size,size]
        self.anchors.generate_all_anchors(im_c=cfg.TRAIN.SEARCH_SIZE//2,
                                          size=cfg.TRAIN.OUTPUT_SIZE)
        self.anchors_tensor =torch.from_numpy(self.anchors.all_anchors[0]).cuda()








    def template(self, z):          #这里跟踪的时候,不考虑模板更新,将模板分支与搜索区域分支的前向分开,这里只做模板区域的分支的更新

        zf = self.backbone(z)
        if cfg.MASK.MASK:
            zf = zf[-1]
        if cfg.ADJUST.ADJUST:
            zf = self.neck(zf)
        self.zf = zf



    def gru_template(self, z,idx):                      #模板区域的分支gru融合前self.grus.seq_in_len个特征

        buf_idx=idx%self.grus.seq_in_len                #当前帧在缓存中的索引
        self.zfs[buf_idx] = self.backbone(z)

        if idx<self.grus.seq_in_len-1:                  #处在初始化阶段,只返回当前的结果
            zf= self.zfs[buf_idx]
        else:
            #否则用gru融合从当前帧算起一个前seq_in_len帧的特征
            T=self.grus.seq_in_len
            gru_zfs = [None] * T  # 多帧模板图z的特征f
            for t in range(T):
                gru_zfs[T-1-t] =self.zfs[(idx-t)%T]   #buf_t_idx=(idx-t)%T当前帧的前t帧在buf中的位置

            gru_zfs=torch.stack(gru_zfs,dim=1)            #将输入变为[n,t,c,h,w]的形式
            zf =self.grus(gru_zfs).squeeze(dim=1)          #grus输出为[n,1,c,h,w]的形式转化为【n,c,h,w】的形式



        if cfg.MASK.MASK:
            zf = zf[-1]
        if cfg.ADJUST.ADJUST:
            zf = self.neck(zf)
        self.zf = zf

    def track(self, x):             #这里跟踪的时候,不考虑模板更新,将模板分支与搜索区域分支的前向分开,这里只做搜索区域的分支的更新
        xf = self.backbone(x)
        if cfg.MASK.MASK:
            self.xf = xf[:-1]
            xf = xf[-1]
        if cfg.ADJUST.ADJUST:
            xf = self.neck(xf)
        cls, loc = self.rpn_head(self.zf, xf)
        if cfg.MASK.MASK:
            mask, self.mask_corr_feature = self.mask_head(self.zf, xf)
        return {
                'cls': cls,
                'loc': loc,
                'mask': mask if cfg.MASK.MASK else None
               }

    def mask_refine(self, pos):
        return self.refine_head(self.xf, self.mask_corr_feature, pos)

    def log_softmax(self, cls):
        b, a2, h, w = cls.size()
        cls = cls.view(b, 2, a2//2, h, w)
        cls = cls.permute(0, 2, 3, 4, 1).contiguous()       #softmax只能在第0个维度上执行,交换通道
        cls = F.log_softmax(cls, dim=4)
        return cls

    def show_result(self,cls,loc,search):

        b,_,h,w =cls.shape
        score=cls.reshape(b,2,-1,h,w)
        score=F.softmax(score, dim=1)[:,1,...]      #!!!!通道1才是代表正样本(是目标)
        score = score.view(b, -1)     #anchor层面上取取概率值,-1代表的size为anchor_num*size*size
        score = (score * (1 - 0.4) + self.window.view(1,-1) * 0.4)   #hanning窗口加权
        pos = torch.argmax(score, dim=1).view(-1)                    #batch的数据也展开
        index = torch.arange(pos.shape[0], device=pos.device)        #bath的索引

        #self.anchors_tensor中心点类型的,shape均为【4,anchor_num,size,size]
        loc1d = loc.view(b, 4, -1, h,w)

        XYWH= torch.stack(
            [(loc1d[:, 0, ...] * self.anchors_tensor[2, ...] + self.anchors_tensor[0, ...]).view(loc1d.shape[0], -1)[index, pos],
             (loc1d[:, 1, ...] * self.anchors_tensor[3, ...] + self.anchors_tensor[1, ...]).view(loc1d.shape[0], -1)[index, pos],
             (torch.exp(loc1d[:, 2, ...]) * self.anchors_tensor[2, ...]).view(loc1d.shape[0], -1)[index, pos],
             (torch.exp(loc1d[:, 3, ...]) * self.anchors_tensor[3, ...]).view(loc1d.shape[0], -1)[index, pos]],
            dim=1)


        x1 = XYWH[:, 0] - XYWH[:, 2] * 0.5 + 0.5
        y1 = XYWH[:, 1] - XYWH[:, 3] * 0.5 + 0.5
        x2 = XYWH[:, 0] + XYWH[:, 2] * 0.5 - 0.5
        y2 = XYWH[:, 1] + XYWH[:, 3] * 0.5 - 0.5


        bboxes = torch.stack([x1, y1, x2, y2], dim=1)
        bboxes = bboxes.reshape(b,1,4)
        b = min(b, cfg.TRAIN.MaxShowBatch)
        box_img = draw_rect(search[:b, ...], bboxes[:b, ...])
        box_img = vutils.make_grid(box_img, normalize=True, scale_each=True)


        return box_img



    def forward(self, data):
        """ only used in training  对应到特征图上每个anchor的信息: , , overlap(正样本和所有anchor的IOU)
        """

        #如果不使用gru,对于模板和搜索区域均只在单帧上提取信息
        if not cfg.GRU.USE_GRU:

            template = data['template'].cuda()
            search = data['search'].cuda()
            label_cls = data['label_cls'].cuda()                #cls(此anchor是正样本:1、负样本:0、忽略:-1
            label_loc = data['label_loc'].cuda()                #delta(正样本框相对于anchor的编码偏移量
            label_loc_weight = data['label_loc_weight'].cuda()  #正样本对应的那些anchor的权重,其他位置为0

            # get feature
            zf = self.backbone(template)
            xf = self.backbone(search)

        #如果使用gru,模板需要在前t帧中累积提取,搜索区域只在最后一帧中提取
        else:

            zfs = [None] * self.grus.seq_in_len  # 多帧模板图z的特征f
            for i in range(self.grus.seq_in_len):
                # 每个data[i]中包含的信息为 'template','search','label_cls','label_loc','label_loc_weight','t_bbox','s_bbox''neg'
                zfs[i] = self.backbone(data[i]["template"].cuda())

            zfs=torch.stack(zfs,dim=1)            #将输入变为[n,t,c,h,w]的形式
            zf =self.grus(zfs).squeeze()          #grus输出为[n,1,c,h,w]的形式转化为【n,c,h,w】的形式

            #搜索区域只需要取模板序列组输入完成后的下一帧搜索区域图像就可以
            xf =  self.backbone(data[self.grus.seq_in_len]["search"].cuda())

#-------------------------------特征提取并行化-----------------------------------------------------

            # batch, _, _, _ = data[0]["template"].shape
            # zfs = [None] * (self.grus.seq_in_len)  # 多帧模板图z的特征f
            # for i in range(self.grus.seq_in_len):
            #     # 每个data[i]中包含的信息为 'template','search','label_cls','label_loc','label_loc_weight','t_bbox','s_bbox''neg'
            #     zfs[i] = data[i]["template"]
            #
            # #连续t个序列在batch层面上并行,加快计算速度
            # zfs =  self.backbone( torch.cat(zfs,dim=0).cuda())
            # zfs =zfs.reshape(self.grus.seq_in_len, batch,  self.grus.input_channels, self.grus.input_height, self.grus.input_width)
            # zfs =zfs.permute(1, 0, 2, 3, 4).contiguous()
            #
            #
            # zf =self.grus(zfs).squeeze()          #grus输出为[n,1,c,h,w]的形式转化为【n,c,h,w】的形式
            #
            # #搜索区域只需要取模板序列组输入完成后的下一帧搜索区域图像就可以
            # xf =  self.backbone(data[self.grus.seq_in_len]["search"].cuda())

# ------------------------------------------------------------------------------------

            # 标签信息的提取方式和搜索区域的提取保持同步
            label_cls = data[self.grus.seq_in_len]['label_cls'].cuda()                #cls(此anchor是正样本:1、负样本:0、忽略:-1
            label_loc = data[self.grus.seq_in_len]['label_loc'].cuda()                #delta(正样本框相对于anchor的编码偏移量
            label_loc_weight = data[self.grus.seq_in_len]['label_loc_weight'].cuda()  #正样本对应的那些anchor的权重,其他位置为0



        if cfg.MASK.MASK:               #siamese mask
            zf = zf[-1]
            self.xf_refine = xf[:-1]
            xf = xf[-1]
        if cfg.ADJUST.ADJUST:           #siamese rpn++
            zf = self.neck(zf)
            xf = self.neck(xf)
        cls, loc = self.rpn_head(zf, xf)        #rpn相关计算

        # get loss
        cls_log = self.log_softmax(cls)             #softmax之后在log,将【0,1】之间的概率拉到【-inf,0】之间,后面紧接着的应该使用nlloss,  其中softmax+log+nllloss 等价于CrossEntropyLoss,这里之所以要拆解开的原因是我们需要按照anchor的mask来计算损失
        cls_loss = select_cross_entropy_loss(cls_log, label_cls)
        loc_loss = weight_l1_loss(loc, label_loc, label_loc_weight)



        outputs = {}
        outputs['total_loss'] = cfg.TRAIN.CLS_WEIGHT * cls_loss + \
            cfg.TRAIN.LOC_WEIGHT * loc_loss
        outputs['cls_loss'] = cls_loss*cfg.GRU.NONE_GRU_LR_COFF
        outputs['loc_loss'] = loc_loss*cfg.GRU.NONE_GRU_LR_COFF

        # 是否计算GRU预测特征的损失
        if cfg.GRU.FeatLoss:
            zf_gt = self.backbone(data[self.grus.seq_in_len]["template"].cuda())
            feat_loss=weight_feat_loss(zf, zf_gt, data[self.grus.seq_in_len]["t_bbox"])
            outputs['total_loss'] += cfg.TRAIN.FEAT_WEIGHT * feat_loss
            outputs['feat_loss']    =feat_loss

            #传出去tensorboard监视看
            outputs['zf_gt'] = zf_gt
            outputs['zf'] = zf
            outputs['zfs'] = zfs




        if cfg.MASK.MASK:
            # TODO
            mask, self.mask_corr_feature = self.mask_head(zf, xf)
            mask_loss = None
            outputs['total_loss'] += cfg.TRAIN.MASK_WEIGHT * mask_loss
            outputs['mask_loss'] = mask_loss*cfg.GRU.NONE_GRU_LR_COFF


        if data[0]['iter']%cfg.TRAIN.ShowPeriod==0:
            #截断梯度,只取数据
            locd = loc.detach()
            clsd = cls.detach()
            outputs['box_img'] =self.show_result(clsd, locd, data[self.grus.seq_in_len]["search"])

        return outputs
Ejemplo n.º 10
0
class AnchorTarget:
    def __init__(self,):
        self.anchors = Anchors(cfg.ANCHOR.STRIDE,
                               cfg.ANCHOR.RATIOS,
                               cfg.ANCHOR.SCALES)

        self.anchors.generate_all_anchors(im_c=cfg.TRAIN.SEARCH_SIZE//2,
                                          size=cfg.TRAIN.OUTPUT_SIZE)

    # target为target的bbox,size为输出特征图的大小
    def __call__(self, target, size, neg=False):
        anchor_num = len(cfg.ANCHOR.RATIOS) * len(cfg.ANCHOR.SCALES)

        # -1 ignore 0 negative 1 positive
        cls = -1 * np.ones((anchor_num, size, size), dtype=np.int64)
        delta = np.zeros((4, anchor_num, size, size), dtype=np.float32)
        delta_weight = np.zeros((anchor_num, size, size), dtype=np.float32)

        # 从position索引值中选择keep_num个索引值
        def select(position, keep_num=16):
            num = position[0].shape[0]
            if num <= keep_num:
                return position, num
            slt = np.arange(num)
            np.random.shuffle(slt)
            slt = slt[:keep_num]
            return tuple(p[slt] for p in position), keep_num

        tcx, tcy, tw, th = corner2center(target)

        if neg:
            # l = size // 2 - 3
            # r = size // 2 + 3 + 1
            # cls[:, l:r, l:r] = 0

            cx = size // 2
            cy = size // 2
            cx += int(np.ceil((tcx - cfg.TRAIN.SEARCH_SIZE // 2) /
                      cfg.ANCHOR.STRIDE + 0.5))
            cy += int(np.ceil((tcy - cfg.TRAIN.SEARCH_SIZE // 2) /
                      cfg.ANCHOR.STRIDE + 0.5))
            l = max(0, cx - 3)
            r = min(size, cx + 4)
            u = max(0, cy - 3)
            d = min(size, cy + 4)
            cls[:, u:d, l:r] = 0

            neg, neg_num = select(np.where(cls == 0), cfg.TRAIN.NEG_NUM)
            cls[:] = -1
            cls[neg] = 0

            overlap = np.zeros((anchor_num, size, size), dtype=np.float32)
            return cls, delta, delta_weight, overlap

        # self.anchors.all_anchors有2个,[0]为角标形式,[1]为中心坐标形式
        anchor_box = self.anchors.all_anchors[0]
        anchor_center = self.anchors.all_anchors[1]
        x1, y1, x2, y2 = anchor_box[0], anchor_box[1], \
            anchor_box[2], anchor_box[3]
        cx, cy, w, h = anchor_center[0], anchor_center[1], \
            anchor_center[2], anchor_center[3]

        # 注意这是向量化运算 5 x 25 x 25 维
        delta[0] = (tcx - cx) / w
        delta[1] = (tcy - cy) / h
        delta[2] = np.log(tw / w)
        delta[3] = np.log(th / h)

        overlap = IoU([x1, y1, x2, y2], target) # 计算交并比

        pos = np.where(overlap > cfg.TRAIN.THR_HIGH) # 返回overlap值高于THR_HIGH(0.6)的索引值
        neg = np.where(overlap < cfg.TRAIN.THR_LOW) # 返回overlap值低于THR_LOW(0.3)的索引值

        pos, pos_num = select(pos, cfg.TRAIN.POS_NUM) # 从正样本索引值中select出POS_NUM个正样本索引值
        neg, neg_num = select(neg, cfg.TRAIN.TOTAL_NUM - cfg.TRAIN.POS_NUM) # 从负样本索引值中select出POS_NUM个正样本索引值

        cls[pos] = 1 # 把pos索引值处的cls置1
        delta_weight[pos] = 1. / (pos_num + 1e-6) # 把pos索引值处的delta_weight置1. / (pos_num + 1e-6)

        cls[neg] = 0 # 把neg索引值处的cls置0
        return cls, delta, delta_weight, overlap
Ejemplo n.º 11
0
class AnchorTarget:
    def __init__(self, ):
        self.anchors = Anchors(cfg.ANCHOR.STRIDE, cfg.ANCHOR.RATIOS,
                               cfg.ANCHOR.SCALES)

        self.anchors.generate_all_anchors(im_c=cfg.TRAIN.SEARCH_SIZE / 2.0,
                                          size=cfg.TRAIN.OUTPUT_SIZE)

    def __call__(self, target, template, size, neg=False):
        anchor_num = len(cfg.ANCHOR.RATIOS) * len(cfg.ANCHOR.SCALES)

        # -1 ignore 0 negative 1 positive
        cls = -1 * np.ones((anchor_num, size, size), dtype=np.int64)
        delta = np.zeros((4, anchor_num, size, size), dtype=np.float32)
        delta_weight = np.zeros((anchor_num, size, size), dtype=np.float32)

        def select(position, keep_num=16):
            num = position[0].shape[0]
            if num <= keep_num:
                return position, num
            slt = np.arange(num)
            np.random.shuffle(slt)
            slt = slt[:keep_num]
            return tuple(p[slt] for p in position), keep_num

        tcx, tcy, tw, th = corner2center(target)
        cx, cy, w, h = corner2center(template)

        # regress from the template, not anchor
        delta[0] = (tcx - cx) / w
        delta[1] = (tcy - cy) / h
        delta[2] = np.log(tw / w)
        delta[3] = np.log(th / h)

        if neg:
            # l = size // 2 - 3
            # r = size // 2 + 3 + 1
            # cls[:, l:r, l:r] = 0

            # import ipdb
            # ipdb.set_trace()

            cx = size // 2
            cy = size // 2
            cx = int(
                np.around(cx + (tcx - cfg.TRAIN.SEARCH_SIZE / 2.0) /
                          cfg.ANCHOR.STRIDE))
            cy = int(
                np.around(cy + (tcy - cfg.TRAIN.SEARCH_SIZE / 2.0) /
                          cfg.ANCHOR.STRIDE))
            l = max(0, cx - 3)
            r = min(size, cx + 4)
            u = max(0, cy - 3)
            d = min(size, cy + 4)
            cls[:, u:d, l:r] = 0

            neg, neg_num = select(np.where(cls == 0), cfg.TRAIN.NEG_NUM)
            cls[:] = -1
            cls[neg] = 0

            overlap = np.zeros((anchor_num, size, size), dtype=np.float32)
            return cls, delta, delta_weight, overlap

        anchor_box = self.anchors.all_anchors[0]
        x1, y1, x2, y2 = anchor_box[0], anchor_box[1], \
            anchor_box[2], anchor_box[3]
        # anchor_center = self.anchors.all_anchors[1]
        # cx, cy, w, h = anchor_center[0], anchor_center[1], \
        #     anchor_center[2], anchor_center[3]

        # delta[0] = (tcx - cx) / w
        # delta[1] = (tcy - cy) / h
        # delta[2] = np.log(tw / w)
        # delta[3] = np.log(th / h)

        overlap = IoU([x1, y1, x2, y2], target)

        pos = np.where( \
            np.logical_or(overlap > cfg.TRAIN.THR_HIGH, overlap == np.max(overlap)) \
            )
        neg = np.where( \
            np.logical_and(overlap < cfg.TRAIN.THR_LOW, overlap < np.max(overlap)) \
            )
        # att_mask = np.zeros_like(overlap) #np.max(overlap, axis=0) < cfg.TRAIN.THR_LOW

        # _, iy, ix = np.unravel_index(np.argmax(overlap), [int(anchor_num), size, size])
        # x_pos = np.reshape(np.array([ix-2, iy-2, ix+3, iy+3]).astype(np.float32), (1, 4))

        pos, pos_num = select(pos, cfg.TRAIN.POS_NUM)
        neg, neg_num = select(neg, cfg.TRAIN.TOTAL_NUM - cfg.TRAIN.POS_NUM)

        cls[pos] = 1
        delta_weight[pos] = 1. / (pos_num + 1e-6)

        cls[neg] = 0
        return cls, delta, delta_weight, overlap