def __init__(self,): self.anchors = Anchors(cfg.ANCHOR.STRIDE, cfg.ANCHOR.RATIOS, cfg.ANCHOR.SCALES) self.anchors.generate_all_anchors(im_c=cfg.TRAIN.SEARCH_SIZE//2, size=cfg.TRAIN.OUTPUT_SIZE)
def __init__(self): super(ModelBuilder, self).__init__() # build backbone self.backbone = get_backbone(cfg.BACKBONE.TYPE, **cfg.BACKBONE.KWARGS) #是否添加gru模块 if cfg.GRU.USE_GRU: self.grus =GRU_Model(cfg.GRU.SEQ_IN,cfg.GRU.SEQ_OUT) if self.grus.seq_out_len !=1: raise ValueError("For tracking task GRU_Model.seq_out_len must be set as 1\n", "please check the value of __C.GRU.SEQ_OUT in config.py file" ) self.zfs = [None] * self.grus.seq_in_len # 多帧模板图z的特征f,测试前向的时候使用 # build adjust layer (siamese rpn++才有这个层) if cfg.ADJUST.ADJUST: self.neck = get_neck(cfg.ADJUST.TYPE, **cfg.ADJUST.KWARGS) # build rpn head self.rpn_head = get_rpn_head(cfg.RPN.TYPE, **cfg.RPN.KWARGS) # build mask head(siamese mask里面才有这一层) if cfg.MASK.MASK: self.mask_head = get_mask_head(cfg.MASK.TYPE, **cfg.MASK.KWARGS) if cfg.REFINE.REFINE: self.refine_head = get_refine_head(cfg.REFINE.TYPE) #---------------------tenosrboard监视用------------------- # hanning窗口,为tensorboard显示预测结果提前出事 hanning = np.hanning(cfg.TRAIN.OUTPUT_SIZE) # 生成和输出特征图大小相同的hanning窗 window = np.outer(hanning, hanning) # 一维度的hanning窗口通过外积得到二位hanning 窗口 self.anchor_num = len(cfg.ANCHOR.RATIOS)*len(cfg.ANCHOR.SCALES) self.window = torch.from_numpy(np.tile(window.flatten(), self.anchor_num)).cuda().float() #size*size*anchor_num展平 #按照给定的比例因子生成一个位置的多种anchor,输出shape为【anchor_num,4】其中输出的anchor的值为[-w * 0.5, -h * 0.5, w * 0.5, h * 0.5] self.anchors = Anchors(cfg.ANCHOR.STRIDE, #8 cfg.ANCHOR.RATIOS, # [0.33, 0.5, 1, 2, 3] cfg.ANCHOR.SCALES) # 生成两种类型的anchor,第一种是左上右下坐标类型的,第二种是中心点类型的,shape均为【4,anchor_num,size,size] self.anchors.generate_all_anchors(im_c=cfg.TRAIN.SEARCH_SIZE//2, size=cfg.TRAIN.OUTPUT_SIZE) self.anchors_tensor =torch.from_numpy(self.anchors.all_anchors[0]).cuda()
def generate_anchor(self, score_size): anchors = Anchors(cfg.ANCHOR.STRIDE, cfg.ANCHOR.RATIOS, cfg.ANCHOR.SCALES) anchor = anchors.anchors #shape 为 【anchor_num,4】,输出的是【-0.5w,-0.5h,0.5w,0.5h],左上右下的坐标类型 x1, y1, x2, y2 = anchor[:, 0], anchor[:, 1], anchor[:, 2], anchor[:, 3] # 得到w,h的信息,x,y为0,0,下面的过程是生成anchor的x,y信息,最左上角点坐标为【-0.5w,-0.5h],中心坐标为0,0 anchor = np.stack([(x1 + x2) * 0.5, (y1 + y2) * 0.5, x2 - x1, y2 - y1], 1) #下面为特征图上的每个格点生成中心点x,y的坐标 total_stride = anchors.stride anchor_num = anchor.shape[0] anchor = np.tile(anchor, score_size * score_size).reshape((-1, 4)) ori = -(score_size // 2) * total_stride #使用meshgrid生成连续的格点坐标,xx,yy大小均为【size,size] xx, yy = np.meshgrid( [ori + total_stride * dx for dx in range(score_size)], [ori + total_stride * dy for dy in range(score_size)]) #为多种anchor生成连续的格点坐标 xx = np.tile(xx.flatten(), (anchor_num, 1)).flatten() yy = np.tile(yy.flatten(), (anchor_num, 1)).flatten() #将x,y的坐标更新到anchor里面去,输出anchor的shape为【anchor_num*size*size,4],坐标为中心点形式 anchor[:, 0], anchor[:, 1] = xx.astype(np.float32), yy.astype(np.float32) return anchor
def test_get_training_data(self, data, bbox): anchors = Anchors(cfg.ANCHOR.STRIDE, cfg.ANCHOR.RATIOS, cfg.ANCHOR.SCALES) anchor = anchors.anchors anchors.generate_all_anchors(im_c=cfg.TRAIN.SEARCH_SIZE // 2, size=cfg.TRAIN.OUTPUT_SIZE) anchor_center = anchors.all_anchors[1] cx, cy, w, h = anchor_center[0], anchor_center[1], \ anchor_center[2], anchor_center[3] delta = data['label_loc'].data.cpu().detach().numpy() tcx = delta[0] * w + cx txy = delta[1] * h + cy tw = np.exp(delta[2]) * w th = np.exp(delta[3]) * h print(bbox) return bbox
def generate_anchor(self, score_size): anchors = Anchors(cfg.ANCHOR.STRIDE, cfg.ANCHOR.RATIOS, cfg.ANCHOR.SCALES) anchor = anchors.anchors x1, y1, x2, y2 = anchor[:, 0], anchor[:, 1], anchor[:, 2], anchor[:, 3] anchor = np.stack([(x1 + x2) * 0.5, (y1 + y2) * 0.5, x2 - x1, y2 - y1], 1) total_stride = anchors.stride anchor_num = anchor.shape[0] anchor = np.tile(anchor, score_size * score_size).reshape((-1, 4)) ori = - (score_size // 2) * total_stride xx, yy = np.meshgrid([ori + total_stride * dx for dx in range(score_size)], [ori + total_stride * dy for dy in range(score_size)]) xx, yy = np.tile(xx.flatten(), (anchor_num, 1)).flatten(), \ np.tile(yy.flatten(), (anchor_num, 1)).flatten() anchor[:, 0], anchor[:, 1] = xx.astype(np.float32), yy.astype(np.float32) return anchor
def generate_anchor(self, score_size): # 25 * 25 anchors = Anchors(cfg.ANCHOR.STRIDE, cfg.ANCHOR.RATIOS, cfg.ANCHOR.SCALES) # corner anchors : 5 * 4, (x0, y0, x1, y1). anchor = anchors.anchors x1, y1, x2, y2 = anchor[:, 0], anchor[:, 1], anchor[:, 2], anchor[:, 3] # change to (center_x, center_y, w, h) anchor = np.stack([(x1 + x2) * 0.5, (y1 + y2) * 0.5, x2 - x1, y2 - y1], 1) total_stride = anchors.stride anchor_num = anchor.shape[0] # anchor : (anchor_num*25*25, 4) anchor = np.tile(anchor, score_size * score_size).reshape((-1, 4)) ori = -(score_size // 2) * total_stride xx, yy = np.meshgrid( [ori + total_stride * dx for dx in range(score_size)], [ori + total_stride * dy for dy in range(score_size)]) xx, yy = np.tile(xx.flatten(), (anchor_num, 1)).flatten(), \ np.tile(yy.flatten(), (anchor_num, 1)).flatten() # Define center point for each anchor boxes, (Anchor_num * H * W , 4) (cx, cy, w , h) anchor[:, 0], anchor[:, 1] = xx.astype(np.float32), yy.astype(np.float32) return anchor
class AnchorTarget: def __init__(self, ): self.anchors = Anchors(cfg.ANCHOR.STRIDE, cfg.ANCHOR.RATIOS, cfg.ANCHOR.SCALES) self.anchors.generate_all_anchors(im_c=cfg.TRAIN.SEARCH_SIZE // 2, size=cfg.TRAIN.OUTPUT_SIZE) def __call__(self, target, size, neg=False): anchor_num = len(cfg.ANCHOR.RATIOS) * len(cfg.ANCHOR.SCALES) # -1 ignore 0 negative 1 positive for anchor classification cls = -1 * np.ones((anchor_num, size, size), dtype=np.int64) delta = np.zeros((4, anchor_num, size, size), dtype=np.float32) delta_weight = np.zeros((anchor_num, size, size), dtype=np.float32) def select(position, keep_num=16): num = position[0].shape[0] if num <= keep_num: return position, num slt = np.arange(num) np.random.shuffle(slt) slt = slt[:keep_num] return tuple(p[slt] for p in position), keep_num tcx, tcy, tw, th = corner2center(target) if neg: # l = size // 2 - 3 # r = size // 2 + 3 + 1 # cls[:, l:r, l:r] = 0 cx = size // 2 cy = size // 2 cx += int( np.ceil((tcx - cfg.TRAIN.SEARCH_SIZE // 2) / cfg.ANCHOR.STRIDE + 0.5)) cy += int( np.ceil((tcy - cfg.TRAIN.SEARCH_SIZE // 2) / cfg.ANCHOR.STRIDE + 0.5)) l = max(0, cx - 3) r = min(size, cx + 4) u = max(0, cy - 3) d = min(size, cy + 4) cls[:, u:d, l:r] = 0 neg, neg_num = select(np.where(cls == 0), cfg.TRAIN.NEG_NUM) cls[:] = -1 cls[neg] = 0 overlap = np.zeros((anchor_num, size, size), dtype=np.float32) return cls, delta, delta_weight, overlap # corner anchors and center anchors. anchor_box = self.anchors.all_anchors[0] anchor_center = self.anchors.all_anchors[1] x1, y1, x2, y2 = anchor_box[0], anchor_box[1], \ anchor_box[2], anchor_box[3] cx, cy, w, h = anchor_center[0], anchor_center[1], \ anchor_center[2], anchor_center[3] # all regression target. delta[0] = (tcx - cx) / w delta[1] = (tcy - cy) / h delta[2] = np.log(tw / w) delta[3] = np.log(th / h) overlap = IoU([x1, y1, x2, y2], target) pos = np.where(overlap > cfg.TRAIN.THR_HIGH) neg = np.where(overlap < cfg.TRAIN.THR_LOW) # select positibe and negative samples pos, pos_num = select(pos, cfg.TRAIN.POS_NUM) neg, neg_num = select(neg, cfg.TRAIN.TOTAL_NUM - cfg.TRAIN.POS_NUM) cls[pos] = 1 delta_weight[pos] = 1. / (pos_num + 1e-6) cls[neg] = 0 return cls, delta, delta_weight, overlap
class AnchorTarget: def __init__(self, ): self.anchors = Anchors(cfg.ANCHOR.STRIDE, cfg.ANCHOR.RATIOS, cfg.ANCHOR.SCALES) self.anchors.generate_all_anchors(im_c=cfg.TRAIN.SEARCH_SIZE // 2, size=cfg.TRAIN.OUTPUT_SIZE) def __call__(self, target, size, neg=False): anchor_num = len(cfg.ANCHOR.RATIOS) * len(cfg.ANCHOR.SCALES) # -1 ignore 0 negative 1 positive cls = -1 * np.ones((anchor_num, size, size), dtype=np.int64) delta = np.zeros((4, anchor_num, size, size), dtype=np.float32) delta_weight = np.zeros((anchor_num, size, size), dtype=np.float32) def select(position, keep_num=16): num = position[0].shape[0] if num <= keep_num: return position, num slt = np.arange(num) np.random.shuffle(slt) slt = slt[:keep_num] return tuple(p[slt] for p in position), keep_num tcx, tcy, tw, th = corner2center(target) # 125.46613458311141 125.46613458311141 71.39393890439626 60.505337643758054 if neg: # l = size // 2 - 3 # r = size // 2 + 3 + 1 # cls[:, l:r, l:r] = 0 cx = size // 2 cy = size // 2 cx += int( np.ceil((tcx - cfg.TRAIN.SEARCH_SIZE // 2) / cfg.ANCHOR.STRIDE + 0.5)) cy += int( np.ceil((tcy - cfg.TRAIN.SEARCH_SIZE // 2) / cfg.ANCHOR.STRIDE + 0.5)) l = max(0, cx - 3) r = min(size, cx + 4) u = max(0, cy - 3) d = min(size, cy + 4) cls[:, u:d, l:r] = 0 neg, neg_num = select(np.where(cls == 0), cfg.TRAIN.NEG_NUM) cls[:] = -1 cls[neg] = 0 overlap = np.zeros((anchor_num, size, size), dtype=np.float32) return cls, delta, delta_weight, overlap # anchor dc sinh ra khi cho biet ratio ..datetime A combination of a date and a time. Attributes: () anchor_box = self.anchors.all_anchors[0] anchor_center = self.anchors.all_anchors[1] x1, y1, x2, y2 = anchor_box[0], anchor_box[1], \ anchor_box[2], anchor_box[3] cx, cy, w, h = anchor_center[0], anchor_center[1], \ anchor_center[2], anchor_center[3] # tcx target center x # delta la anchor da (0, 1) delta[0] = (tcx - cx) / w delta[1] = (tcy - cy) / h delta[2] = np.log(tw / w) delta[3] = np.log(th / h) overlap = IoU([x1, y1, x2, y2], target) pos = np.where(overlap > cfg.TRAIN.THR_HIGH) neg = np.where(overlap < cfg.TRAIN.THR_LOW) pos, pos_num = select(pos, cfg.TRAIN.POS_NUM) neg, neg_num = select(neg, cfg.TRAIN.TOTAL_NUM - cfg.TRAIN.POS_NUM) cls[pos] = 1 delta_weight[pos] = 1. / (pos_num + 1e-6) cls[neg] = 0 return cls, delta, delta_weight, overlap
class ModelBuilder(nn.Module): def __init__(self): super(ModelBuilder, self).__init__() # build backbone self.backbone = get_backbone(cfg.BACKBONE.TYPE, **cfg.BACKBONE.KWARGS) #是否添加gru模块 if cfg.GRU.USE_GRU: self.grus =GRU_Model(cfg.GRU.SEQ_IN,cfg.GRU.SEQ_OUT) if self.grus.seq_out_len !=1: raise ValueError("For tracking task GRU_Model.seq_out_len must be set as 1\n", "please check the value of __C.GRU.SEQ_OUT in config.py file" ) self.zfs = [None] * self.grus.seq_in_len # 多帧模板图z的特征f,测试前向的时候使用 # build adjust layer (siamese rpn++才有这个层) if cfg.ADJUST.ADJUST: self.neck = get_neck(cfg.ADJUST.TYPE, **cfg.ADJUST.KWARGS) # build rpn head self.rpn_head = get_rpn_head(cfg.RPN.TYPE, **cfg.RPN.KWARGS) # build mask head(siamese mask里面才有这一层) if cfg.MASK.MASK: self.mask_head = get_mask_head(cfg.MASK.TYPE, **cfg.MASK.KWARGS) if cfg.REFINE.REFINE: self.refine_head = get_refine_head(cfg.REFINE.TYPE) #---------------------tenosrboard监视用------------------- # hanning窗口,为tensorboard显示预测结果提前出事 hanning = np.hanning(cfg.TRAIN.OUTPUT_SIZE) # 生成和输出特征图大小相同的hanning窗 window = np.outer(hanning, hanning) # 一维度的hanning窗口通过外积得到二位hanning 窗口 self.anchor_num = len(cfg.ANCHOR.RATIOS)*len(cfg.ANCHOR.SCALES) self.window = torch.from_numpy(np.tile(window.flatten(), self.anchor_num)).cuda().float() #size*size*anchor_num展平 #按照给定的比例因子生成一个位置的多种anchor,输出shape为【anchor_num,4】其中输出的anchor的值为[-w * 0.5, -h * 0.5, w * 0.5, h * 0.5] self.anchors = Anchors(cfg.ANCHOR.STRIDE, #8 cfg.ANCHOR.RATIOS, # [0.33, 0.5, 1, 2, 3] cfg.ANCHOR.SCALES) # 生成两种类型的anchor,第一种是左上右下坐标类型的,第二种是中心点类型的,shape均为【4,anchor_num,size,size] self.anchors.generate_all_anchors(im_c=cfg.TRAIN.SEARCH_SIZE//2, size=cfg.TRAIN.OUTPUT_SIZE) self.anchors_tensor =torch.from_numpy(self.anchors.all_anchors[0]).cuda() def template(self, z): #这里跟踪的时候,不考虑模板更新,将模板分支与搜索区域分支的前向分开,这里只做模板区域的分支的更新 zf = self.backbone(z) if cfg.MASK.MASK: zf = zf[-1] if cfg.ADJUST.ADJUST: zf = self.neck(zf) self.zf = zf def gru_template(self, z,idx): #模板区域的分支gru融合前self.grus.seq_in_len个特征 buf_idx=idx%self.grus.seq_in_len #当前帧在缓存中的索引 self.zfs[buf_idx] = self.backbone(z) if idx<self.grus.seq_in_len-1: #处在初始化阶段,只返回当前的结果 zf= self.zfs[buf_idx] else: #否则用gru融合从当前帧算起一个前seq_in_len帧的特征 T=self.grus.seq_in_len gru_zfs = [None] * T # 多帧模板图z的特征f for t in range(T): gru_zfs[T-1-t] =self.zfs[(idx-t)%T] #buf_t_idx=(idx-t)%T当前帧的前t帧在buf中的位置 gru_zfs=torch.stack(gru_zfs,dim=1) #将输入变为[n,t,c,h,w]的形式 zf =self.grus(gru_zfs).squeeze(dim=1) #grus输出为[n,1,c,h,w]的形式转化为【n,c,h,w】的形式 if cfg.MASK.MASK: zf = zf[-1] if cfg.ADJUST.ADJUST: zf = self.neck(zf) self.zf = zf def track(self, x): #这里跟踪的时候,不考虑模板更新,将模板分支与搜索区域分支的前向分开,这里只做搜索区域的分支的更新 xf = self.backbone(x) if cfg.MASK.MASK: self.xf = xf[:-1] xf = xf[-1] if cfg.ADJUST.ADJUST: xf = self.neck(xf) cls, loc = self.rpn_head(self.zf, xf) if cfg.MASK.MASK: mask, self.mask_corr_feature = self.mask_head(self.zf, xf) return { 'cls': cls, 'loc': loc, 'mask': mask if cfg.MASK.MASK else None } def mask_refine(self, pos): return self.refine_head(self.xf, self.mask_corr_feature, pos) def log_softmax(self, cls): b, a2, h, w = cls.size() cls = cls.view(b, 2, a2//2, h, w) cls = cls.permute(0, 2, 3, 4, 1).contiguous() #softmax只能在第0个维度上执行,交换通道 cls = F.log_softmax(cls, dim=4) return cls def show_result(self,cls,loc,search): b,_,h,w =cls.shape score=cls.reshape(b,2,-1,h,w) score=F.softmax(score, dim=1)[:,1,...] #!!!!通道1才是代表正样本(是目标) score = score.view(b, -1) #anchor层面上取取概率值,-1代表的size为anchor_num*size*size score = (score * (1 - 0.4) + self.window.view(1,-1) * 0.4) #hanning窗口加权 pos = torch.argmax(score, dim=1).view(-1) #batch的数据也展开 index = torch.arange(pos.shape[0], device=pos.device) #bath的索引 #self.anchors_tensor中心点类型的,shape均为【4,anchor_num,size,size] loc1d = loc.view(b, 4, -1, h,w) XYWH= torch.stack( [(loc1d[:, 0, ...] * self.anchors_tensor[2, ...] + self.anchors_tensor[0, ...]).view(loc1d.shape[0], -1)[index, pos], (loc1d[:, 1, ...] * self.anchors_tensor[3, ...] + self.anchors_tensor[1, ...]).view(loc1d.shape[0], -1)[index, pos], (torch.exp(loc1d[:, 2, ...]) * self.anchors_tensor[2, ...]).view(loc1d.shape[0], -1)[index, pos], (torch.exp(loc1d[:, 3, ...]) * self.anchors_tensor[3, ...]).view(loc1d.shape[0], -1)[index, pos]], dim=1) x1 = XYWH[:, 0] - XYWH[:, 2] * 0.5 + 0.5 y1 = XYWH[:, 1] - XYWH[:, 3] * 0.5 + 0.5 x2 = XYWH[:, 0] + XYWH[:, 2] * 0.5 - 0.5 y2 = XYWH[:, 1] + XYWH[:, 3] * 0.5 - 0.5 bboxes = torch.stack([x1, y1, x2, y2], dim=1) bboxes = bboxes.reshape(b,1,4) b = min(b, cfg.TRAIN.MaxShowBatch) box_img = draw_rect(search[:b, ...], bboxes[:b, ...]) box_img = vutils.make_grid(box_img, normalize=True, scale_each=True) return box_img def forward(self, data): """ only used in training 对应到特征图上每个anchor的信息: , , overlap(正样本和所有anchor的IOU) """ #如果不使用gru,对于模板和搜索区域均只在单帧上提取信息 if not cfg.GRU.USE_GRU: template = data['template'].cuda() search = data['search'].cuda() label_cls = data['label_cls'].cuda() #cls(此anchor是正样本:1、负样本:0、忽略:-1 label_loc = data['label_loc'].cuda() #delta(正样本框相对于anchor的编码偏移量 label_loc_weight = data['label_loc_weight'].cuda() #正样本对应的那些anchor的权重,其他位置为0 # get feature zf = self.backbone(template) xf = self.backbone(search) #如果使用gru,模板需要在前t帧中累积提取,搜索区域只在最后一帧中提取 else: zfs = [None] * self.grus.seq_in_len # 多帧模板图z的特征f for i in range(self.grus.seq_in_len): # 每个data[i]中包含的信息为 'template','search','label_cls','label_loc','label_loc_weight','t_bbox','s_bbox''neg' zfs[i] = self.backbone(data[i]["template"].cuda()) zfs=torch.stack(zfs,dim=1) #将输入变为[n,t,c,h,w]的形式 zf =self.grus(zfs).squeeze() #grus输出为[n,1,c,h,w]的形式转化为【n,c,h,w】的形式 #搜索区域只需要取模板序列组输入完成后的下一帧搜索区域图像就可以 xf = self.backbone(data[self.grus.seq_in_len]["search"].cuda()) #-------------------------------特征提取并行化----------------------------------------------------- # batch, _, _, _ = data[0]["template"].shape # zfs = [None] * (self.grus.seq_in_len) # 多帧模板图z的特征f # for i in range(self.grus.seq_in_len): # # 每个data[i]中包含的信息为 'template','search','label_cls','label_loc','label_loc_weight','t_bbox','s_bbox''neg' # zfs[i] = data[i]["template"] # # #连续t个序列在batch层面上并行,加快计算速度 # zfs = self.backbone( torch.cat(zfs,dim=0).cuda()) # zfs =zfs.reshape(self.grus.seq_in_len, batch, self.grus.input_channels, self.grus.input_height, self.grus.input_width) # zfs =zfs.permute(1, 0, 2, 3, 4).contiguous() # # # zf =self.grus(zfs).squeeze() #grus输出为[n,1,c,h,w]的形式转化为【n,c,h,w】的形式 # # #搜索区域只需要取模板序列组输入完成后的下一帧搜索区域图像就可以 # xf = self.backbone(data[self.grus.seq_in_len]["search"].cuda()) # ------------------------------------------------------------------------------------ # 标签信息的提取方式和搜索区域的提取保持同步 label_cls = data[self.grus.seq_in_len]['label_cls'].cuda() #cls(此anchor是正样本:1、负样本:0、忽略:-1 label_loc = data[self.grus.seq_in_len]['label_loc'].cuda() #delta(正样本框相对于anchor的编码偏移量 label_loc_weight = data[self.grus.seq_in_len]['label_loc_weight'].cuda() #正样本对应的那些anchor的权重,其他位置为0 if cfg.MASK.MASK: #siamese mask zf = zf[-1] self.xf_refine = xf[:-1] xf = xf[-1] if cfg.ADJUST.ADJUST: #siamese rpn++ zf = self.neck(zf) xf = self.neck(xf) cls, loc = self.rpn_head(zf, xf) #rpn相关计算 # get loss cls_log = self.log_softmax(cls) #softmax之后在log,将【0,1】之间的概率拉到【-inf,0】之间,后面紧接着的应该使用nlloss, 其中softmax+log+nllloss 等价于CrossEntropyLoss,这里之所以要拆解开的原因是我们需要按照anchor的mask来计算损失 cls_loss = select_cross_entropy_loss(cls_log, label_cls) loc_loss = weight_l1_loss(loc, label_loc, label_loc_weight) outputs = {} outputs['total_loss'] = cfg.TRAIN.CLS_WEIGHT * cls_loss + \ cfg.TRAIN.LOC_WEIGHT * loc_loss outputs['cls_loss'] = cls_loss*cfg.GRU.NONE_GRU_LR_COFF outputs['loc_loss'] = loc_loss*cfg.GRU.NONE_GRU_LR_COFF # 是否计算GRU预测特征的损失 if cfg.GRU.FeatLoss: zf_gt = self.backbone(data[self.grus.seq_in_len]["template"].cuda()) feat_loss=weight_feat_loss(zf, zf_gt, data[self.grus.seq_in_len]["t_bbox"]) outputs['total_loss'] += cfg.TRAIN.FEAT_WEIGHT * feat_loss outputs['feat_loss'] =feat_loss #传出去tensorboard监视看 outputs['zf_gt'] = zf_gt outputs['zf'] = zf outputs['zfs'] = zfs if cfg.MASK.MASK: # TODO mask, self.mask_corr_feature = self.mask_head(zf, xf) mask_loss = None outputs['total_loss'] += cfg.TRAIN.MASK_WEIGHT * mask_loss outputs['mask_loss'] = mask_loss*cfg.GRU.NONE_GRU_LR_COFF if data[0]['iter']%cfg.TRAIN.ShowPeriod==0: #截断梯度,只取数据 locd = loc.detach() clsd = cls.detach() outputs['box_img'] =self.show_result(clsd, locd, data[self.grus.seq_in_len]["search"]) return outputs
class AnchorTarget: def __init__(self,): self.anchors = Anchors(cfg.ANCHOR.STRIDE, cfg.ANCHOR.RATIOS, cfg.ANCHOR.SCALES) self.anchors.generate_all_anchors(im_c=cfg.TRAIN.SEARCH_SIZE//2, size=cfg.TRAIN.OUTPUT_SIZE) # target为target的bbox,size为输出特征图的大小 def __call__(self, target, size, neg=False): anchor_num = len(cfg.ANCHOR.RATIOS) * len(cfg.ANCHOR.SCALES) # -1 ignore 0 negative 1 positive cls = -1 * np.ones((anchor_num, size, size), dtype=np.int64) delta = np.zeros((4, anchor_num, size, size), dtype=np.float32) delta_weight = np.zeros((anchor_num, size, size), dtype=np.float32) # 从position索引值中选择keep_num个索引值 def select(position, keep_num=16): num = position[0].shape[0] if num <= keep_num: return position, num slt = np.arange(num) np.random.shuffle(slt) slt = slt[:keep_num] return tuple(p[slt] for p in position), keep_num tcx, tcy, tw, th = corner2center(target) if neg: # l = size // 2 - 3 # r = size // 2 + 3 + 1 # cls[:, l:r, l:r] = 0 cx = size // 2 cy = size // 2 cx += int(np.ceil((tcx - cfg.TRAIN.SEARCH_SIZE // 2) / cfg.ANCHOR.STRIDE + 0.5)) cy += int(np.ceil((tcy - cfg.TRAIN.SEARCH_SIZE // 2) / cfg.ANCHOR.STRIDE + 0.5)) l = max(0, cx - 3) r = min(size, cx + 4) u = max(0, cy - 3) d = min(size, cy + 4) cls[:, u:d, l:r] = 0 neg, neg_num = select(np.where(cls == 0), cfg.TRAIN.NEG_NUM) cls[:] = -1 cls[neg] = 0 overlap = np.zeros((anchor_num, size, size), dtype=np.float32) return cls, delta, delta_weight, overlap # self.anchors.all_anchors有2个,[0]为角标形式,[1]为中心坐标形式 anchor_box = self.anchors.all_anchors[0] anchor_center = self.anchors.all_anchors[1] x1, y1, x2, y2 = anchor_box[0], anchor_box[1], \ anchor_box[2], anchor_box[3] cx, cy, w, h = anchor_center[0], anchor_center[1], \ anchor_center[2], anchor_center[3] # 注意这是向量化运算 5 x 25 x 25 维 delta[0] = (tcx - cx) / w delta[1] = (tcy - cy) / h delta[2] = np.log(tw / w) delta[3] = np.log(th / h) overlap = IoU([x1, y1, x2, y2], target) # 计算交并比 pos = np.where(overlap > cfg.TRAIN.THR_HIGH) # 返回overlap值高于THR_HIGH(0.6)的索引值 neg = np.where(overlap < cfg.TRAIN.THR_LOW) # 返回overlap值低于THR_LOW(0.3)的索引值 pos, pos_num = select(pos, cfg.TRAIN.POS_NUM) # 从正样本索引值中select出POS_NUM个正样本索引值 neg, neg_num = select(neg, cfg.TRAIN.TOTAL_NUM - cfg.TRAIN.POS_NUM) # 从负样本索引值中select出POS_NUM个正样本索引值 cls[pos] = 1 # 把pos索引值处的cls置1 delta_weight[pos] = 1. / (pos_num + 1e-6) # 把pos索引值处的delta_weight置1. / (pos_num + 1e-6) cls[neg] = 0 # 把neg索引值处的cls置0 return cls, delta, delta_weight, overlap
class AnchorTarget: def __init__(self, ): self.anchors = Anchors(cfg.ANCHOR.STRIDE, cfg.ANCHOR.RATIOS, cfg.ANCHOR.SCALES) self.anchors.generate_all_anchors(im_c=cfg.TRAIN.SEARCH_SIZE / 2.0, size=cfg.TRAIN.OUTPUT_SIZE) def __call__(self, target, template, size, neg=False): anchor_num = len(cfg.ANCHOR.RATIOS) * len(cfg.ANCHOR.SCALES) # -1 ignore 0 negative 1 positive cls = -1 * np.ones((anchor_num, size, size), dtype=np.int64) delta = np.zeros((4, anchor_num, size, size), dtype=np.float32) delta_weight = np.zeros((anchor_num, size, size), dtype=np.float32) def select(position, keep_num=16): num = position[0].shape[0] if num <= keep_num: return position, num slt = np.arange(num) np.random.shuffle(slt) slt = slt[:keep_num] return tuple(p[slt] for p in position), keep_num tcx, tcy, tw, th = corner2center(target) cx, cy, w, h = corner2center(template) # regress from the template, not anchor delta[0] = (tcx - cx) / w delta[1] = (tcy - cy) / h delta[2] = np.log(tw / w) delta[3] = np.log(th / h) if neg: # l = size // 2 - 3 # r = size // 2 + 3 + 1 # cls[:, l:r, l:r] = 0 # import ipdb # ipdb.set_trace() cx = size // 2 cy = size // 2 cx = int( np.around(cx + (tcx - cfg.TRAIN.SEARCH_SIZE / 2.0) / cfg.ANCHOR.STRIDE)) cy = int( np.around(cy + (tcy - cfg.TRAIN.SEARCH_SIZE / 2.0) / cfg.ANCHOR.STRIDE)) l = max(0, cx - 3) r = min(size, cx + 4) u = max(0, cy - 3) d = min(size, cy + 4) cls[:, u:d, l:r] = 0 neg, neg_num = select(np.where(cls == 0), cfg.TRAIN.NEG_NUM) cls[:] = -1 cls[neg] = 0 overlap = np.zeros((anchor_num, size, size), dtype=np.float32) return cls, delta, delta_weight, overlap anchor_box = self.anchors.all_anchors[0] x1, y1, x2, y2 = anchor_box[0], anchor_box[1], \ anchor_box[2], anchor_box[3] # anchor_center = self.anchors.all_anchors[1] # cx, cy, w, h = anchor_center[0], anchor_center[1], \ # anchor_center[2], anchor_center[3] # delta[0] = (tcx - cx) / w # delta[1] = (tcy - cy) / h # delta[2] = np.log(tw / w) # delta[3] = np.log(th / h) overlap = IoU([x1, y1, x2, y2], target) pos = np.where( \ np.logical_or(overlap > cfg.TRAIN.THR_HIGH, overlap == np.max(overlap)) \ ) neg = np.where( \ np.logical_and(overlap < cfg.TRAIN.THR_LOW, overlap < np.max(overlap)) \ ) # att_mask = np.zeros_like(overlap) #np.max(overlap, axis=0) < cfg.TRAIN.THR_LOW # _, iy, ix = np.unravel_index(np.argmax(overlap), [int(anchor_num), size, size]) # x_pos = np.reshape(np.array([ix-2, iy-2, ix+3, iy+3]).astype(np.float32), (1, 4)) pos, pos_num = select(pos, cfg.TRAIN.POS_NUM) neg, neg_num = select(neg, cfg.TRAIN.TOTAL_NUM - cfg.TRAIN.POS_NUM) cls[pos] = 1 delta_weight[pos] = 1. / (pos_num + 1e-6) cls[neg] = 0 return cls, delta, delta_weight, overlap