def __call__(self, image, bbox, size, gray=False): ''' :param image: crop后的图像,大小511*511,模板图像已经对齐到图像中心, :param bbox: 带有上下文信息的box大小,在crop511坐标系下的坐标 :param size: 网络输入时模板大小127*127,或者搜索区域大小255*255 :param gray: 是否进行灰度化 :return: ''' shape = image.shape #固定大小511*511 crop_bbox = center2corner(Center(shape[0]//2, shape[1]//2, #要从image中抠出搜索区域,这里计算出模板在图中左上角和右下角的坐标 size-1, size-1)) # gray augmentation(如果随机选择过程要进行灰度化,则先将彩色图像转化为灰度,在从灰度转化为3通道“彩图”) if gray: image = self._gray_aug(image) # shift scale augmentation # 在这里真正完成图像的扣取操作,和简单的缩放操作 image, bbox = self._shift_scale_aug(image, bbox, crop_bbox, size) # color augmentation if self.color > np.random.random(): image = self._color_aug(image) # blur augmentation,按照目标和区域的面积比限制卷积核的尺寸,否则模糊核过大,看不到目标 _, _, w, h = corner2center(bbox) area_ratio = (w*h*1.0)/(size*size) if self.blur > np.random.random(): image = self._blur_aug(image,area_ratio) # flip augmentation if self.flip and self.flip > np.random.random(): image, bbox = self._flip_aug(image, bbox) return image, bbox
def _convert_bbox(self, delta, point): delta = delta.permute(1, 2, 3, 0).contiguous().view(4, -1) delta = delta.detach().cpu().numpy() delta[0, :] = point[:, 0] - delta[0, :] delta[1, :] = point[:, 1] - delta[1, :] delta[2, :] = point[:, 0] + delta[2, :] delta[3, :] = point[:, 1] + delta[3, :] delta[0, :], delta[1, :], delta[2, :], delta[3, :] = corner2center(delta) return delta
def __call__(self, target, shape, neg=False): # -1 ignore 0 negative 1 positive cls = np.array([1], dtype=np.float) delta = np.zeros((4), dtype=np.float32) if neg: cls = np.array([0], dtype=np.float) return cls, delta h, w = shape x1, y1, x2, y2 = target tcx, tcy, tw, th = corner2center(target) # print(tcx, tcy, tw, th) x1 = float(x1) / w y1 = float(y1) / h x2 = float(x2) / w y2 = float(y2) / h # delta = np.array([x1, y1, x2, y2], dtype=np.float32) tcx, tcy, tw, th = corner2center(target) delta = np.array([tcx / w, tcy / h, tw / w, th / h], dtype=np.float32) return cls, delta
def _shift_scale_aug(self, image, bbox, crop_bbox, size): ''' 对具有上下文信息的gt bbox进行位移和缩放调整,然后输出的bbox,和对应的图像区域 :param image: :param bbox: 带有上下文信息的box(gt值),在crop511坐标系下的坐标 :param crop_bbox: 要crop的bbox位置信息 127*127或者255*255 :param size: 期望crop出来的区域尺寸,网络输入时模板大小127*127,或者搜索区域大小255*255 :return:返回的图像,是按照增强后的crop_box扣取出的roi图像区域,返回的bbox是gt信息也做相应调整后并转化到crop图像坐标系下的位置信息 ''' im_h, im_w = image.shape[:2] # adjust crop bounding box crop_bbox_center = corner2center(crop_bbox) #对要crop输出的box进行大小调整和位移调整 if self.scale: scale_x = (1.0 + Augmentation.random() * self.scale) scale_y = (1.0 + Augmentation.random() * self.scale) h, w = crop_bbox_center.h, crop_bbox_center.w scale_x = min(scale_x, float(im_w) / w) #对要crop输出的box的w,h进行调整,取最小值是为了上搜索区域w,h不要超过图像区域 scale_y = min(scale_y, float(im_h) / h) crop_bbox_center = Center(crop_bbox_center.x, crop_bbox_center.y, crop_bbox_center.w * scale_x, crop_bbox_center.h * scale_y) crop_bbox = center2corner(crop_bbox_center) if self.shift: sx = Augmentation.random() * self.shift #siamese rpn++ 论文中讨论了shift最大范围的时候能够一定程度上解决网络学习过程中的位置偏见问题 sy = Augmentation.random() * self.shift # print("shift", self.shift,sx,sy) x1, y1, x2, y2 = crop_bbox sx = max(-x1, min(im_w - 1 - x2, sx)) #min(im_w - 1 - x2, sx) 保证x2+sx不会超出图像右边界,也就是即使平移搜索区域,右边也不要超出右边图像边界,max(-x1,xxx)是保证x1+xxx不会小鱼0,也就是即使平移搜索区域,左边也不会超出左边图像边界 sy = max(-y1, min(im_h - 1 - y2, sy)) crop_bbox = Corner(x1 + sx, y1 + sy, x2 + sx, y2 + sy) # adjust target bounding box 要crop的box的变换上面已经确定,这里需要将他的gt信息也同样做调整 x1, y1 = crop_bbox.x1, crop_bbox.y1 # 以要crop输出的box的左上角为参考点,计算bbox新的坐标,也就是相应得修改gt的信息,与要crop的内容保持一致 # 输出的bbox是相对与127*127或者255*255图像下的坐标, bbox = Corner(bbox.x1 - x1, bbox.y1 - y1, bbox.x2 - x1, bbox.y2 - y1) if self.scale: bbox = Corner(bbox.x1 / scale_x, bbox.y1 / scale_y, bbox.x2 / scale_x, bbox.y2 / scale_y) image = self._crop_roi(image, crop_bbox, size) #扣取出要crop的区域 return image, bbox
def generate_all_anchors(self, im_c, size): """ 依据输入图像大小和rpn特征图大小size,以及generate_anchors生成的单点的anchor信息,为rpn输出层特征每一点生成anchor的相关信息 im_c: image center (搜索区域图像的中心 255//2) size: image size (输出相关操作后的特征图的大小17*17) """ if self.image_center == im_c and self.size == size: return False self.image_center = im_c self.size = size a0x = im_c - size // 2 * self.stride #在输入分辨下,相关面与搜索区域中心对齐,相关面左上角的坐标,也就模板第一次卷积对应的中心位置,通过加减0.5w得到anchor左上右下的坐标 ori = np.array([a0x] * 4, dtype=np.float32) zero_anchors = self.anchors + ori #为坐上角那个点产生anchor 大小为[n,4] x1 = zero_anchors[:, 0] #大小为n y1 = zero_anchors[:, 1] x2 = zero_anchors[:, 2] y2 = zero_anchors[:, 3] #reshape为[n,1,1],中间这个1代表尺度,这里只选择了一个尺度 x1, y1, x2, y2 = map(lambda x: x.reshape(self.anchor_num, 1, 1), [x1, y1, x2, y2]) cx, cy, w, h = corner2center([x1, y1, x2, y2]) #shape 为【anchor_nums,1,1] #生成相对与左上角的点的偏移量 disp_x = np.arange(0, size).reshape( 1, 1, -1) * self.stride #shape为【1,1,size】 disp_y = np.arange(0, size).reshape(1, -1, 1) * self.stride cx = cx + disp_x # shape为【anchor_nums,1,size] cy = cy + disp_y # shape为 [anchor_nums,size,1] # broadcast 为每个点产生anchor zero = np.zeros((self.anchor_num, size, size), dtype=np.float32) cx, cy, w, h = map(lambda x: x + zero, [cx, cy, w, h]) x1, y1, x2, y2 = center2corner([cx, cy, w, h]) #生产两种类型的anchor,第一种是左上右下坐标类型的,第二种是中心点类型的,shape均为【4,anchor_num,size,size] self.all_anchors = (np.stack([x1, y1, x2, y2]).astype(np.float32), np.stack([cx, cy, w, h]).astype(np.float32)) return True
def generate_all_anchors(self, im_c, size): """ im_c: image center - search image 中心位置 size: image size - 输出特征图大小 """ if self.image_center == im_c and self.size == size: return False self.image_center = im_c self.size = size a0x = im_c - size // 2 * self.stride ori = np.array([a0x] * 4, dtype=np.float32) zero_anchors = self.anchors + ori # 均为角点坐标的形式 x1 = zero_anchors[:, 0] y1 = zero_anchors[:, 1] x2 = zero_anchors[:, 2] y2 = zero_anchors[:, 3] x1, y1, x2, y2 = map(lambda x: x.reshape(self.anchor_num, 1, 1), [x1, y1, x2, y2]) cx, cy, w, h = corner2center([x1, y1, x2, y2]) disp_x = np.arange(0, size).reshape(1, 1, -1) * self.stride disp_y = np.arange(0, size).reshape(1, -1, 1) * self.stride cx = cx + disp_x cy = cy + disp_y # broadcast zero = np.zeros((self.anchor_num, size, size), dtype=np.float32) # 5 x 25 x 25 cx, cy, w, h = map(lambda x: x + zero, [cx, cy, w, h]) # 保存锚点位置 x1, y1, x2, y2 = center2corner([cx, cy, w, h]) # 中心点坐标形式转化为角点坐标形式 self.all_anchors = (np.stack([x1, y1, x2, y2]).astype(np.float32), np.stack([cx, cy, w, h]).astype(np.float32) ) # stack() 将元素变成numpy的数组之后,作升维处理,[N,cx,cy,w,h] return True
def _shift_scale_aug(self, image, bbox, crop_bbox, size): im_h, im_w = image.shape[:2] # adjust crop bounding box crop_bbox_center = corner2center(crop_bbox) if self.scale: scale_x = (1.0 + Augmentation.random() * self.scale) scale_y = (1.0 + Augmentation.random() * self.scale) h, w = crop_bbox_center.h, crop_bbox_center.w scale_x = min(scale_x, float(im_w) / w) scale_y = min(scale_y, float(im_h) / h) crop_bbox_center = Center(crop_bbox_center.x, crop_bbox_center.y, crop_bbox_center.w * scale_x, crop_bbox_center.h * scale_y) crop_bbox = center2corner(crop_bbox_center) if self.shift: sx = Augmentation.random() * self.shift sy = Augmentation.random() * self.shift x1, y1, x2, y2 = crop_bbox sx = max(-x1, min(im_w - 1 - x2, sx)) sy = max(-y1, min(im_h - 1 - y2, sy)) crop_bbox = Corner(x1 + sx, y1 + sy, x2 + sx, y2 + sy) # adjust target bounding box x1, y1 = crop_bbox.x1, crop_bbox.y1 bbox = Corner(bbox.x1 - x1, bbox.y1 - y1, bbox.x2 - x1, bbox.y2 - y1) if self.scale: bbox = Corner(bbox.x1 / scale_x, bbox.y1 / scale_y, bbox.x2 / scale_x, bbox.y2 / scale_y) image = self._crop_roi(image, crop_bbox, size) return image, bbox
def generate_all_anchors(self, im_c, size): """ Question: What's the difference between generate_all_anchors and generate_anchors? im_c: image center size: image size """ if self.image_center == im_c and self.size == size: return False self.image_center = im_c self.size = size a0x = im_c - size // 2 * self.stride ori = np.array([a0x] * 4, dtype=np.float32) zero_anchors = self.anchors + ori x1 = zero_anchors[:, 0] y1 = zero_anchors[:, 1] x2 = zero_anchors[:, 2] y2 = zero_anchors[:, 3] x1, y1, x2, y2 = map(lambda x: x.reshape(self.anchor_num, 1, 1), [x1, y1, x2, y2]) cx, cy, w, h = corner2center([x1, y1, x2, y2]) disp_x = np.arange(0, size).reshape(1, 1, -1) * self.stride disp_y = np.arange(0, size).reshape(1, -1, 1) * self.stride cx = cx + disp_x cy = cy + disp_y # broadcast zero = np.zeros((self.anchor_num, size, size), dtype=np.float32) cx, cy, w, h = map(lambda x: x + zero, [cx, cy, w, h]) x1, y1, x2, y2 = center2corner([cx, cy, w, h]) self.all_anchors = (np.stack([x1, y1, x2, y2]).astype(np.float32), np.stack([cx, cy, w, h]).astype(np.float32)) return True
def generate_all_anchors(self, im_c, size): """ im_c: image center size: image size """ if self.image_center == im_c and self.size == size: return False self.image_center = im_c self.size = size a0x = im_c - size // 2 * self.stride ori = np.array([a0x] * 4, dtype=np.float32) zero_anchors = self.anchors + ori x1 = zero_anchors[:, 0] y1 = zero_anchors[:, 1] x2 = zero_anchors[:, 2] y2 = zero_anchors[:, 3] x1, y1, x2, y2 = map(partial(reshape_anchor_1_1, anchor_num=self.anchor_num), [x1, y1, x2, y2]) cx, cy, w, h = corner2center([x1, y1, x2, y2]) disp_x = np.arange(0, size).reshape(1, 1, -1) * self.stride disp_y = np.arange(0, size).reshape(1, -1, 1) * self.stride cx = cx + disp_x cy = cy + disp_y # broadcast zero = np.zeros((self.anchor_num, size, size), dtype=np.float32) cx, cy, w, h = map(partial(add_zero, zero=zero), [cx, cy, w, h]) x1, y1, x2, y2 = center2corner([cx, cy, w, h]) self.all_anchors = (np.stack([x1, y1, x2, y2]).astype(np.float32), np.stack([cx, cy, w, h]).astype(np.float32)) return True
def __call__(self, target, size, neg=False): anchor_num = len(cfg.ANCHOR.RATIOS) * len(cfg.ANCHOR.SCALES) # -1 ignore 0 negative 1 positive for anchor classification cls = -1 * np.ones((anchor_num, size, size), dtype=np.int64) delta = np.zeros((4, anchor_num, size, size), dtype=np.float32) delta_weight = np.zeros((anchor_num, size, size), dtype=np.float32) def select(position, keep_num=16): num = position[0].shape[0] if num <= keep_num: return position, num slt = np.arange(num) np.random.shuffle(slt) slt = slt[:keep_num] return tuple(p[slt] for p in position), keep_num tcx, tcy, tw, th = corner2center(target) if neg: # l = size // 2 - 3 # r = size // 2 + 3 + 1 # cls[:, l:r, l:r] = 0 cx = size // 2 cy = size // 2 cx += int( np.ceil((tcx - cfg.TRAIN.SEARCH_SIZE // 2) / cfg.ANCHOR.STRIDE + 0.5)) cy += int( np.ceil((tcy - cfg.TRAIN.SEARCH_SIZE // 2) / cfg.ANCHOR.STRIDE + 0.5)) l = max(0, cx - 3) r = min(size, cx + 4) u = max(0, cy - 3) d = min(size, cy + 4) cls[:, u:d, l:r] = 0 neg, neg_num = select(np.where(cls == 0), cfg.TRAIN.NEG_NUM) cls[:] = -1 cls[neg] = 0 overlap = np.zeros((anchor_num, size, size), dtype=np.float32) return cls, delta, delta_weight, overlap # corner anchors and center anchors. anchor_box = self.anchors.all_anchors[0] anchor_center = self.anchors.all_anchors[1] x1, y1, x2, y2 = anchor_box[0], anchor_box[1], \ anchor_box[2], anchor_box[3] cx, cy, w, h = anchor_center[0], anchor_center[1], \ anchor_center[2], anchor_center[3] # all regression target. delta[0] = (tcx - cx) / w delta[1] = (tcy - cy) / h delta[2] = np.log(tw / w) delta[3] = np.log(th / h) overlap = IoU([x1, y1, x2, y2], target) pos = np.where(overlap > cfg.TRAIN.THR_HIGH) neg = np.where(overlap < cfg.TRAIN.THR_LOW) # select positibe and negative samples pos, pos_num = select(pos, cfg.TRAIN.POS_NUM) neg, neg_num = select(neg, cfg.TRAIN.TOTAL_NUM - cfg.TRAIN.POS_NUM) cls[pos] = 1 delta_weight[pos] = 1. / (pos_num + 1e-6) cls[neg] = 0 return cls, delta, delta_weight, overlap
def __call__(self, target, size, neg=False): anchor_num = len(cfg.ANCHOR.RATIOS) * len(cfg.ANCHOR.SCALES) # -1 ignore 0 negative 1 positive cls = -1 * np.ones((anchor_num, size, size), dtype=np.int64) delta = np.zeros((4, anchor_num, size, size), dtype=np.float32) delta_weight = np.zeros((anchor_num, size, size), dtype=np.float32) def select(position, keep_num=16): num = position[0].shape[0] if num <= keep_num: return position, num slt = np.arange(num) np.random.shuffle(slt) slt = slt[:keep_num] return tuple(p[slt] for p in position), keep_num tcx, tcy, tw, th = corner2center(target) # 125.46613458311141 125.46613458311141 71.39393890439626 60.505337643758054 if neg: # l = size // 2 - 3 # r = size // 2 + 3 + 1 # cls[:, l:r, l:r] = 0 cx = size // 2 cy = size // 2 cx += int( np.ceil((tcx - cfg.TRAIN.SEARCH_SIZE // 2) / cfg.ANCHOR.STRIDE + 0.5)) cy += int( np.ceil((tcy - cfg.TRAIN.SEARCH_SIZE // 2) / cfg.ANCHOR.STRIDE + 0.5)) l = max(0, cx - 3) r = min(size, cx + 4) u = max(0, cy - 3) d = min(size, cy + 4) cls[:, u:d, l:r] = 0 neg, neg_num = select(np.where(cls == 0), cfg.TRAIN.NEG_NUM) cls[:] = -1 cls[neg] = 0 overlap = np.zeros((anchor_num, size, size), dtype=np.float32) return cls, delta, delta_weight, overlap # anchor dc sinh ra khi cho biet ratio ..datetime A combination of a date and a time. Attributes: () anchor_box = self.anchors.all_anchors[0] anchor_center = self.anchors.all_anchors[1] x1, y1, x2, y2 = anchor_box[0], anchor_box[1], \ anchor_box[2], anchor_box[3] cx, cy, w, h = anchor_center[0], anchor_center[1], \ anchor_center[2], anchor_center[3] # tcx target center x # delta la anchor da (0, 1) delta[0] = (tcx - cx) / w delta[1] = (tcy - cy) / h delta[2] = np.log(tw / w) delta[3] = np.log(th / h) overlap = IoU([x1, y1, x2, y2], target) pos = np.where(overlap > cfg.TRAIN.THR_HIGH) neg = np.where(overlap < cfg.TRAIN.THR_LOW) pos, pos_num = select(pos, cfg.TRAIN.POS_NUM) neg, neg_num = select(neg, cfg.TRAIN.TOTAL_NUM - cfg.TRAIN.POS_NUM) cls[pos] = 1 delta_weight[pos] = 1. / (pos_num + 1e-6) cls[neg] = 0 return cls, delta, delta_weight, overlap
def __call__(self, target, size, neg=False): heatmap_label0 = np.zeros((1, size, size), dtype=np.float32) if cfg.TRAIN.STACK == 0: heatmap_label = [heatmap_label0] else: heatmap_label = [heatmap_label0 for i in range(cfg.TRAIN.STACK)] objsize_label = np.zeros((2, size, size), dtype=np.float32) if cfg.TRAIN.OFFSETS: offsets_label = np.zeros((2, size, size), dtype=np.float32) if neg: if cfg.TRAIN.OFFSETS: offsets_label = np.zeros((2, size, size), dtype=np.float32) return heatmap_label, offsets_label, objsize_label else: return heatmap_label, objsize_label tcx, tcy, tw, th = corner2center(target) heat_cx = cfg.TRAIN.OUTPUT_SIZE // 2 + ( tcx - (cfg.TRAIN.SEARCH_SIZE - 1) / 2) / self.stride heat_cy = cfg.TRAIN.OUTPUT_SIZE // 2 + ( tcy - (cfg.TRAIN.SEARCH_SIZE - 1) / 2) / self.stride pos_x = round(heat_cx) pos_y = round(heat_cy) if cfg.TRAIN.DIF_STD: std = [self.std, self.std * 0.9, self.std * 0.81] radius = [self.radius, self.radius, self.radius] #/2 /4 else: std = [self.std, self.std, self.std] radius = [self.radius, self.radius, self.radius] for i in range(cfg.TRAIN.OUTPUT_SIZE): for j in range(cfg.TRAIN.OUTPUT_SIZE): distance = (i - heat_cy)**2 + (j - heat_cx)**2 if math.sqrt(distance) < self.radius: for idx, hm in enumerate(heatmap_label): if math.sqrt(distance) < radius[idx]: hm[0, i, j] = np.exp(-distance / (2 * std[idx]**2)) if cfg.TRAIN.OFFSETS: if cfg.TRAIN.SAMEOFF: offsets_label[0, i, j] = (heat_cx - pos_x) * self.stride / 64 offsets_label[1, i, j] = (heat_cy - pos_y) * self.stride / 64 else: offsets_label[0, i, j] = (tcx - self.keypoints[0, i, j]) / 64 offsets_label[1, i, j] = (tcy - self.keypoints[1, i, j]) / 64 if cfg.TRAIN.NORMWH: objsize_label[0, i, j] = np.log(tw / 64) objsize_label[1, i, j] = np.log(th / 64) else: objsize_label[0, i, j] = tw objsize_label[1, i, j] = th if i == pos_y and j == pos_x: for idx, hm in enumerate(heatmap_label): hm[0, i, j] = 1 if cfg.TRAIN.OFFSETS: return heatmap_label, offsets_label, objsize_label else: return heatmap_label, objsize_label
def __call__(self, target, size, neg=False): anchor_num = len(cfg.ANCHOR.RATIOS) * len(cfg.ANCHOR.SCALES) # -1 ignore 0 negative 1 positive cls = -1 * np.ones((anchor_num, size, size), dtype=np.int64) delta = np.zeros((4, anchor_num, size, size), dtype=np.float32) delta_weight = np.zeros((anchor_num, size, size), dtype=np.float32) # 从position索引值中选择keep_num个索引值 def select(position, keep_num=16): num = position[0].shape[0] if num <= keep_num: return position, num slt = np.arange(num) np.random.shuffle(slt) slt = slt[:keep_num] return tuple(p[slt] for p in position), keep_num tcx, tcy, tw, th = corner2center(target) if neg: # l = size // 2 - 3 # r = size // 2 + 3 + 1 # cls[:, l:r, l:r] = 0 cx = size // 2 cy = size // 2 cx += int(np.ceil((tcx - cfg.TRAIN.SEARCH_SIZE // 2) / cfg.ANCHOR.STRIDE + 0.5)) cy += int(np.ceil((tcy - cfg.TRAIN.SEARCH_SIZE // 2) / cfg.ANCHOR.STRIDE + 0.5)) l = max(0, cx - 3) r = min(size, cx + 4) u = max(0, cy - 3) d = min(size, cy + 4) cls[:, u:d, l:r] = 0 neg, neg_num = select(np.where(cls == 0), cfg.TRAIN.NEG_NUM) cls[:] = -1 cls[neg] = 0 overlap = np.zeros((anchor_num, size, size), dtype=np.float32) return cls, delta, delta_weight, overlap # self.anchors.all_anchors有2个,[0]为角标形式,[1]为中心坐标形式 anchor_box = self.anchors.all_anchors[0] anchor_center = self.anchors.all_anchors[1] x1, y1, x2, y2 = anchor_box[0], anchor_box[1], \ anchor_box[2], anchor_box[3] cx, cy, w, h = anchor_center[0], anchor_center[1], \ anchor_center[2], anchor_center[3] # 注意这是向量化运算 5 x 25 x 25 维 delta[0] = (tcx - cx) / w delta[1] = (tcy - cy) / h delta[2] = np.log(tw / w) delta[3] = np.log(th / h) overlap = IoU([x1, y1, x2, y2], target) # 计算交并比 pos = np.where(overlap > cfg.TRAIN.THR_HIGH) # 返回overlap值高于THR_HIGH(0.6)的索引值 neg = np.where(overlap < cfg.TRAIN.THR_LOW) # 返回overlap值低于THR_LOW(0.3)的索引值 pos, pos_num = select(pos, cfg.TRAIN.POS_NUM) # 从正样本索引值中select出POS_NUM个正样本索引值 neg, neg_num = select(neg, cfg.TRAIN.TOTAL_NUM - cfg.TRAIN.POS_NUM) # 从负样本索引值中select出POS_NUM个正样本索引值 cls[pos] = 1 # 把pos索引值处的cls置1 delta_weight[pos] = 1. / (pos_num + 1e-6) # 把pos索引值处的delta_weight置1. / (pos_num + 1e-6) cls[neg] = 0 # 把neg索引值处的cls置0 return cls, delta, delta_weight, overlap
def __call__(self, target, template, size, neg=False): anchor_num = len(cfg.ANCHOR.RATIOS) * len(cfg.ANCHOR.SCALES) # -1 ignore 0 negative 1 positive cls = -1 * np.ones((anchor_num, size, size), dtype=np.int64) delta = np.zeros((4, anchor_num, size, size), dtype=np.float32) delta_weight = np.zeros((anchor_num, size, size), dtype=np.float32) def select(position, keep_num=16): num = position[0].shape[0] if num <= keep_num: return position, num slt = np.arange(num) np.random.shuffle(slt) slt = slt[:keep_num] return tuple(p[slt] for p in position), keep_num tcx, tcy, tw, th = corner2center(target) cx, cy, w, h = corner2center(template) # regress from the template, not anchor delta[0] = (tcx - cx) / w delta[1] = (tcy - cy) / h delta[2] = np.log(tw / w) delta[3] = np.log(th / h) if neg: # l = size // 2 - 3 # r = size // 2 + 3 + 1 # cls[:, l:r, l:r] = 0 # import ipdb # ipdb.set_trace() cx = size // 2 cy = size // 2 cx = int( np.around(cx + (tcx - cfg.TRAIN.SEARCH_SIZE / 2.0) / cfg.ANCHOR.STRIDE)) cy = int( np.around(cy + (tcy - cfg.TRAIN.SEARCH_SIZE / 2.0) / cfg.ANCHOR.STRIDE)) l = max(0, cx - 3) r = min(size, cx + 4) u = max(0, cy - 3) d = min(size, cy + 4) cls[:, u:d, l:r] = 0 neg, neg_num = select(np.where(cls == 0), cfg.TRAIN.NEG_NUM) cls[:] = -1 cls[neg] = 0 overlap = np.zeros((anchor_num, size, size), dtype=np.float32) return cls, delta, delta_weight, overlap anchor_box = self.anchors.all_anchors[0] x1, y1, x2, y2 = anchor_box[0], anchor_box[1], \ anchor_box[2], anchor_box[3] # anchor_center = self.anchors.all_anchors[1] # cx, cy, w, h = anchor_center[0], anchor_center[1], \ # anchor_center[2], anchor_center[3] # delta[0] = (tcx - cx) / w # delta[1] = (tcy - cy) / h # delta[2] = np.log(tw / w) # delta[3] = np.log(th / h) overlap = IoU([x1, y1, x2, y2], target) pos = np.where( \ np.logical_or(overlap > cfg.TRAIN.THR_HIGH, overlap == np.max(overlap)) \ ) neg = np.where( \ np.logical_and(overlap < cfg.TRAIN.THR_LOW, overlap < np.max(overlap)) \ ) # att_mask = np.zeros_like(overlap) #np.max(overlap, axis=0) < cfg.TRAIN.THR_LOW # _, iy, ix = np.unravel_index(np.argmax(overlap), [int(anchor_num), size, size]) # x_pos = np.reshape(np.array([ix-2, iy-2, ix+3, iy+3]).astype(np.float32), (1, 4)) pos, pos_num = select(pos, cfg.TRAIN.POS_NUM) neg, neg_num = select(neg, cfg.TRAIN.TOTAL_NUM - cfg.TRAIN.POS_NUM) cls[pos] = 1 delta_weight[pos] = 1. / (pos_num + 1e-6) cls[neg] = 0 return cls, delta, delta_weight, overlap