Ejemplo n.º 1
0
    def __call__(self, image, bbox, size, gray=False):
        '''
        :param image: crop后的图像,大小511*511,模板图像已经对齐到图像中心,
        :param bbox: 带有上下文信息的box大小,在crop511坐标系下的坐标
        :param size: 网络输入时模板大小127*127,或者搜索区域大小255*255
        :param gray: 是否进行灰度化
        :return:
        '''
        shape = image.shape                #固定大小511*511
        crop_bbox = center2corner(Center(shape[0]//2, shape[1]//2,              #要从image中抠出搜索区域,这里计算出模板在图中左上角和右下角的坐标
                                         size-1, size-1))
        # gray augmentation(如果随机选择过程要进行灰度化,则先将彩色图像转化为灰度,在从灰度转化为3通道“彩图”)
        if gray:
            image = self._gray_aug(image)

        # shift scale augmentation
        # 在这里真正完成图像的扣取操作,和简单的缩放操作
        image, bbox = self._shift_scale_aug(image, bbox, crop_bbox, size)

        # color augmentation
        if self.color > np.random.random():
            image = self._color_aug(image)

        # blur augmentation,按照目标和区域的面积比限制卷积核的尺寸,否则模糊核过大,看不到目标
        _, _, w, h = corner2center(bbox)
        area_ratio = (w*h*1.0)/(size*size)
        if self.blur > np.random.random():
            image = self._blur_aug(image,area_ratio)

        # flip augmentation
        if self.flip and self.flip > np.random.random():
            image, bbox = self._flip_aug(image, bbox)
        return image, bbox
Ejemplo n.º 2
0
    def _get_bbox(self, image, shape):
        '''
        默认模板图像位于整个图像的中心,将gt标注的bbox加上0.5倍大小的上下文图像内容作为模板区域,认为是网络训练的模板区域,缩放到127*127,输出缩放后的相对与图像中心的模板坐标
        注意:这里虽然shape是给的gt意义下的bbox信息,但是这个bbox是对应原始图像坐标系下的坐标,而这里输入图像是crop之后大小为511×511的图像,目标已经在图像的正中间了,因此
            这里的bbox信息虽然有【x1,y1,x2,y2】四个量,但是最后用到的只有w=x2-x1,h=y2-y1这两个量,这两个量也只是为了得到目标(带有上下文信息的目标)在511*511图中的宽高
            目标中心位置已经默认在crop阶段对齐到了511*511的图像中心
        :param image:
        :param shape:
        :return:
        '''
        imh, imw = image.shape[:2]
        if len(shape) == 4:
            w, h = shape[2] - shape[0], shape[3] - shape[1]
        else:
            w, h = shape

        #通过下面的方式会对目标区域进行缩放,当时长宽上的缩放比例保持一致,也就是说目标不会变形,因为最后的模板是正方形的
        #对于长方形的部分,短边就用背景来填补,也就是说对于狭长的bbox并不友好,会引入较多的背景信息

        context_amount = 0.5  #上下文占用的比例,gt构成的box再加上一定比例的上下文图像内容,认为是模板区域
        exemplar_size = cfg.TRAIN.EXEMPLAR_SIZE
        wc_z = w + context_amount * (w + h)
        hc_z = h + context_amount * (w + h)
        s_z = np.sqrt(wc_z * hc_z)
        scale_z = exemplar_size / s_z  #模板在网络中默认为127*127大小的,在crop数据集合的时候,把具有上下问的模板区域resize成了127*127,所以w,h要同比例缩放
        w = w * scale_z
        h = h * scale_z
        cx, cy = imw // 2, imh // 2  #因为在制作数据集合的时候,模板区域已经默认对齐到图像中心
        bbox = center2corner(Center(cx, cy, w, h))
        return bbox  #bbox的中心就是在511*511图的中心,这里输出转化为【x1,y1,x2,y2】的形式
Ejemplo n.º 3
0
    def _shift_scale_aug(self, image, bbox, crop_bbox, size):
        ''' 对具有上下文信息的gt bbox进行位移和缩放调整,然后输出的bbox,和对应的图像区域
        :param image:
        :param bbox:  带有上下文信息的box(gt值),在crop511坐标系下的坐标
        :param crop_bbox: 要crop的bbox位置信息 127*127或者255*255
        :param size: 期望crop出来的区域尺寸,网络输入时模板大小127*127,或者搜索区域大小255*255
        :return:返回的图像,是按照增强后的crop_box扣取出的roi图像区域,返回的bbox是gt信息也做相应调整后并转化到crop图像坐标系下的位置信息
        '''
        im_h, im_w = image.shape[:2]

        # adjust crop bounding box
        crop_bbox_center = corner2center(crop_bbox)          #对要crop输出的box进行大小调整和位移调整
        if self.scale:
            scale_x = (1.0 + Augmentation.random() * self.scale)
            scale_y = (1.0 + Augmentation.random() * self.scale)
            h, w = crop_bbox_center.h, crop_bbox_center.w
            scale_x = min(scale_x, float(im_w) / w)         #对要crop输出的box的w,h进行调整,取最小值是为了上搜索区域w,h不要超过图像区域
            scale_y = min(scale_y, float(im_h) / h)
            crop_bbox_center = Center(crop_bbox_center.x,
                                      crop_bbox_center.y,
                                      crop_bbox_center.w * scale_x,
                                      crop_bbox_center.h * scale_y)

        crop_bbox = center2corner(crop_bbox_center)

        if self.shift:
            sx = Augmentation.random() * self.shift             #siamese rpn++ 论文中讨论了shift最大范围的时候能够一定程度上解决网络学习过程中的位置偏见问题
            sy = Augmentation.random() * self.shift
           # print("shift", self.shift,sx,sy)
            x1, y1, x2, y2 = crop_bbox

            sx = max(-x1, min(im_w - 1 - x2, sx))   #min(im_w - 1 - x2, sx) 保证x2+sx不会超出图像右边界,也就是即使平移搜索区域,右边也不要超出右边图像边界,max(-x1,xxx)是保证x1+xxx不会小鱼0,也就是即使平移搜索区域,左边也不会超出左边图像边界
            sy = max(-y1, min(im_h - 1 - y2, sy))

            crop_bbox = Corner(x1 + sx, y1 + sy, x2 + sx, y2 + sy)

        # adjust target bounding box  要crop的box的变换上面已经确定,这里需要将他的gt信息也同样做调整
        x1, y1 = crop_bbox.x1, crop_bbox.y1
        # 以要crop输出的box的左上角为参考点,计算bbox新的坐标,也就是相应得修改gt的信息,与要crop的内容保持一致
        # 输出的bbox是相对与127*127或者255*255图像下的坐标,
        bbox = Corner(bbox.x1 - x1, bbox.y1 - y1,
                      bbox.x2 - x1, bbox.y2 - y1)

        if self.scale:
            bbox = Corner(bbox.x1 / scale_x, bbox.y1 / scale_y,
                          bbox.x2 / scale_x, bbox.y2 / scale_y)

        image = self._crop_roi(image, crop_bbox, size)     #扣取出要crop的区域
        return image, bbox
Ejemplo n.º 4
0
 def _get_bbox(self, image, shape):
     imh, imw = image.shape[:2]
     if len(shape) == 4:
         w, h = shape[2] - shape[0], shape[3] - shape[1]
     else:
         w, h = shape
     context_amount = 0.5
     exemplar_size = cfg.TRAIN.EXEMPLAR_SIZE
     wc_z = w + context_amount * (w + h)
     hc_z = h + context_amount * (w + h)
     s_z = np.sqrt(wc_z * hc_z)
     scale_z = exemplar_size / s_z
     w = w * scale_z
     h = h * scale_z
     cx, cy = imw // 2, imh // 2
     bbox = center2corner(Center(cx, cy, w, h))
     return bbox
 def _get_bbox(self, s_z):
     # imh, imw = image.shape[:2]
     # if len(shape) == 4:
     #     w, h = shape[2]-shape[0], shape[3]-shape[1]
     # else:
     #     w, h = shape
     # context_amount = cfg.TRACK.CONTEXT_AMOUNT
     exemplar_size = cfg.TRACK.EXEMPLAR_SIZE
     # wc_z = w + context_amount * (w+h)
     # hc_z = h + context_amount * (w+h)
     # s_z = np.sqrt(wc_z * hc_z)
     scale_z = exemplar_size / s_z
     w,h = self.size
     imh, imw = cfg.TRACK.INSTANCE_SIZE, cfg.TRACK.INSTANCE_SIZE
     w = w*scale_z
     h = h*scale_z
     cx, cy = imw//2, imh//2
     bbox = center2corner(Center(cx, cy, w, h))
     return bbox
Ejemplo n.º 6
0
    def _shift_scale_aug(self, image, bbox, crop_bbox, size):
        im_h, im_w = image.shape[:2]

        # adjust crop bounding box
        crop_bbox_center = corner2center(crop_bbox)
        if self.scale:
            scale_x = (1.0 + Augmentation.random() * self.scale)
            scale_y = (1.0 + Augmentation.random() * self.scale)
            h, w = crop_bbox_center.h, crop_bbox_center.w
            scale_x = min(scale_x, float(im_w) / w)
            scale_y = min(scale_y, float(im_h) / h)
            crop_bbox_center = Center(crop_bbox_center.x,
                                      crop_bbox_center.y,
                                      crop_bbox_center.w * scale_x,
                                      crop_bbox_center.h * scale_y)

        crop_bbox = center2corner(crop_bbox_center)
        if self.shift:
            sx = Augmentation.random() * self.shift
            sy = Augmentation.random() * self.shift

            x1, y1, x2, y2 = crop_bbox

            sx = max(-x1, min(im_w - 1 - x2, sx))
            sy = max(-y1, min(im_h - 1 - y2, sy))

            crop_bbox = Corner(x1 + sx, y1 + sy, x2 + sx, y2 + sy)

        # adjust target bounding box
        x1, y1 = crop_bbox.x1, crop_bbox.y1
        bbox = Corner(bbox.x1 - x1, bbox.y1 - y1,
                      bbox.x2 - x1, bbox.y2 - y1)

        if self.scale:
            bbox = Corner(bbox.x1 / scale_x, bbox.y1 / scale_y,
                          bbox.x2 / scale_x, bbox.y2 / scale_y)

        image = self._crop_roi(image, crop_bbox, size)
        return image, bbox
Ejemplo n.º 7
0
    def __call__(self, image, bbox, size, gray=False):
        shape = image.shape
        crop_bbox = center2corner(
            Center(shape[0] // 2, shape[1] // 2, size - 1, size - 1))
        # gray augmentation
        if gray:
            image = self._gray_aug(image)

        # shift scale augmentation - 在这里已经把原图crop成255 x 255 x 3
        image, bbox = self._shift_scale_aug(image, bbox, crop_bbox, size)

        # color augmentation
        if self.color > np.random.random():
            image = self._color_aug(image)

        # blur augmentation
        if self.blur > np.random.random():
            image = self._blur_aug(image)

        # flip augmentation
        if self.flip and self.flip > np.random.random():
            image, bbox = self._flip_aug(image, bbox)
        return image, bbox
Ejemplo n.º 8
0
    def __call__(self, image, bbox, size, gray=False):
        shape = image.shape
        # size for template and search region are defined as : [127, 255]
        crop_bbox = center2corner(Center(shape[0]//2, shape[1]//2,
                                         size-1, size-1))
        # gray augmentation
        if gray:
            image = self._gray_aug(image)

        # shift scale augmentation, two types augmentation !
        image, bbox = self._shift_scale_aug(image, bbox, crop_bbox, size)

        # color augmentation
        if self.color > np.random.random():
            image = self._color_aug(image)

        # blur augmentation
        if self.blur > np.random.random():
            image = self._blur_aug(image)

        # flip augmentation
        if self.flip and self.flip > np.random.random():
            image, bbox = self._flip_aug(image, bbox)
        return image, bbox
    def track(self, img):
        """
        args:
            img(np.ndarray): BGR image
        return:
            bbox(list):[x, y, width, height]
        """
        w_z = self.size[0] + cfg.TRACK.CONTEXT_AMOUNT * np.sum(self.size)
        h_z = self.size[1] + cfg.TRACK.CONTEXT_AMOUNT * np.sum(self.size)
        s_z = np.sqrt(w_z * h_z)
        scale_z = cfg.TRACK.EXEMPLAR_SIZE / s_z
        s_x = s_z * (cfg.TRACK.INSTANCE_SIZE / cfg.TRACK.EXEMPLAR_SIZE)
        x_crop = self.get_subwindow(img, self.center_pos,
                                    cfg.TRACK.INSTANCE_SIZE,
                                    round(s_x), self.channel_average)

        outputs = self.model.track(x_crop)

        score = self._convert_score(outputs['cls'])
        pred_bbox = self._convert_bbox(outputs['loc'], self.anchors)

        def change(r):
            return np.maximum(r, 1. / r)

        def sz(w, h):
            pad = (w + h) * 0.5
            return np.sqrt((w + pad) * (h + pad))

        # scale penalty
        s_c = change(sz(pred_bbox[2, :], pred_bbox[3, :]) /
                     (sz(self.size[0]*scale_z, self.size[1]*scale_z)))

        # aspect ratio penalty
        r_c = change((self.size[0]/self.size[1]) /
                     (pred_bbox[2, :]/pred_bbox[3, :]))
        penalty = np.exp(-(r_c * s_c - 1) * cfg.TRACK.PENALTY_K)
        pscore = penalty * score

        # window penalty
        pscore = pscore * (1 - cfg.TRACK.WINDOW_INFLUENCE) + \
            self.window * cfg.TRACK.WINDOW_INFLUENCE
        best_idx = np.argmax(pscore)

        bbox = pred_bbox[:, best_idx] / scale_z
        lr = penalty[best_idx] * score[best_idx] * cfg.TRACK.LR

        cx = bbox[0] + self.center_pos[0]
        cy = bbox[1] + self.center_pos[1]

        # smooth bbox
        width = self.size[0] * (1 - lr) + bbox[2] * lr
        height = self.size[1] * (1 - lr) + bbox[3] * lr

        # clip boundary
        cx, cy, width, height = self._bbox_clip(cx, cy, width,
                                                height, img.shape[:2])

        # udpate state
        self.center_pos = np.array([cx, cy])
        self.size = np.array([width, height])

        bbox = [cx - width / 2,
                cy - height / 2,
                width,
                height]
        best_score = score[best_idx]
        # for getting the label which is used to obtain gradident
        # bbox0: the bbox on x_crop
        bbox0 = pred_bbox[:, best_idx]
        imh, imw = cfg.TRACK.INSTANCE_SIZE, cfg.TRACK.INSTANCE_SIZE
        w = width * scale_z
        h = height * scale_z
        cx, cy = imw // 2, imh // 2
        cx = bbox0[0] + cx
        cy = bbox0[1] + cy
        bbox0 = center2corner(Center(cx, cy, w, h))

        return {
                'bbox': bbox,
                'best_score': best_score,
                'bbox0': bbox0,
                'cls_feas': outputs['cls_feas'] if 'cls_feas' in outputs.keys() else None,
                'loc_feas': outputs['loc_feas'] if 'loc_feas' in outputs.keys() else None
        }
Ejemplo n.º 10
0
    def __call__(self, image, bbox, size, data, gray=False):
        shape = image.shape
        cv2.imwrite('511.jpg', image)  # image:[511,511,3]

        if data == 'template':
            image1 = np.zeros((127, 127, 3))
            for i in range(127):
                for j in range(127):
                    for k in range(3):
                        if k == 0:
                            image1[i, j, k] = 87
                        elif k == 1:
                            image1[i, j, k] = 135
                        elif k == 2:
                            image1[i, j, k] = 123

        crop_bbox = center2corner(
            Center(shape[0] // 2, shape[1] // 2, size - 1, size - 1))
        # gray augmentation
        if gray:
            image = self._gray_aug(image)

        # shift scale augmentation
        image, bbox = self._shift_scale_aug(image, bbox, crop_bbox, size)
        #cv2.imwrite('127_255.jpg', image)  # image:[127,127,3] 或 [255,255,3]
        crop_bbox = center2corner(
            Center(shape[0] // 2, shape[1] // 2, size - 1, size - 1))
        # color augmentation
        if self.color > np.random.random():
            image = self._color_aug(image)

        # blur augmentation
        if self.blur > np.random.random():
            image = self._blur_aug(image)

        # flip augmentation
        if self.flip and self.flip > np.random.random():
            image, bbox = self._flip_aug(image, bbox)

        if data == 'template':
            # visual bounding box
            cv2.rectangle(image, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), (0, 0, 255),
                          thickness=1)  # 红[0,0,255]
            cv2.imwrite('127_bbox.jpg', image)  # image:[255,255,3]

            image_l = image1
            image_t = image1.copy()
            image_b = image1.copy()
            image_r = image1.copy()
            image_l[int(bbox[1]):(int(bbox[3])), int(bbox[0]):(int(bbox[0]+cfg.corners.crop_size)), :] =\
                image[int(bbox[1]):(int(bbox[3])), int(bbox[0]):(int(bbox[0]+cfg.corners.crop_size)), :]
            cv2.imwrite('crop_l.jpg', image_l)  # image:[255,255,3]

            #cv2.imwrite('127_bbox——2.jpg', image)  # image:[255,255,3]
            #cv2.imwrite('127_bbox--3.jpg', image_t)  # image:[255,255,3]
            image_t[int(bbox[1]):(int(bbox[1]+cfg.corners.crop_size)), int(bbox[0]):(int(bbox[2])), :] = \
                image[int(bbox[1]):(int(bbox[1]+cfg.corners.crop_size)), int(bbox[0]):(int(bbox[2])), :]
            cv2.imwrite('crop_t.jpg', image_t)  # image:[255,255,3]

            image_b[(int(bbox[3] - cfg.corners.crop_size)):int(bbox[3]), int(bbox[0]):(int(bbox[2])), :] = \
                image[(int(bbox[3] - cfg.corners.crop_size)):int(bbox[3]), int(bbox[0]):(int(bbox[2])), :]
            cv2.imwrite('crop_b.jpg', image_b)  # image:[255,255,3]

            image_r[int(bbox[1]):(int(bbox[3])), (int(bbox[2]-cfg.corners.crop_size)):int(bbox[2]), :] = \
                image[int(bbox[1]):(int(bbox[3])), (int(bbox[2]-cfg.corners.crop_size)):int(bbox[2]), :]
            cv2.imwrite('crop_r.jpg', image_r)  # image:[255,255,3]

        if data == 'search':
            attentions = [
                np.zeros((1, cfg.atts.att_size, cfg.atts.att_size),
                         dtype=np.float32)
            ]  # 25 为attention map大小
            # tl_heats_map
            tl_heats = np.zeros(
                (1, cfg.corners.cor_size, cfg.corners.cor_size),
                dtype=np.float32)  # [1,25,25]
            br_heats = np.zeros(
                (1, cfg.corners.cor_size, cfg.corners.cor_size),
                dtype=np.float32)
            # tl_valids
            tl_regrs = np.zeros((cfg.corners.offs_max_objects, 2),
                                dtype=np.float32)
            br_regrs = np.zeros((cfg.corners.offs_max_objects, 2),
                                dtype=np.float32)
            tl_tags = np.zeros((cfg.corners.offs_max_objects), dtype=np.int64)
            br_tags = np.zeros((cfg.corners.offs_max_objects), dtype=np.int64)
            tl_valids = np.zeros(
                (1, cfg.corners.cor_size, cfg.corners.cor_size),
                dtype=np.float32)  # [1,25,25]
            br_valids = np.zeros(
                (1, cfg.corners.cor_size, cfg.corners.cor_size),
                dtype=np.float32)
            tag_masks = np.ones((cfg.corners.offs_max_objects), dtype=np.uint8)
            tag_lens = 0

            #atts_map, x_int, y_int, x_float, y_float = self.create_attention_mask(attentions, cfg.TRAIN.ratios, bbox) # image:[255,255,3] x_int,y_int为目标中心点坐标
            atts_map = []

            xtl, ytl = bbox[0], bbox[1]  # 图大小为255的坐标
            xbr, ybr = bbox[2], bbox[3]

            det_height = int(ybr) - int(ytl)
            det_width = int(xbr) - int(xtl)
            det_max = max(det_height, det_width)

            min_scale = 16
            valid = det_max >= min_scale  # min_scale:16

            fxtl = (xtl * cfg.corners.Ratios)  # width_ratio:由255-->25的缩放比例
            fytl = (ytl * cfg.corners.Ratios)
            fxbr = (xbr * cfg.corners.Ratios)
            fybr = (ybr * cfg.corners.Ratios)

            xtl = int(fxtl)
            ytl = int(fytl)
            xbr = int(fxbr)
            ybr = int(fybr)

            width = bbox[2] - bbox[0]
            height = bbox[3] - bbox[1]

            # visual bounding box
            #cv2.rectangle(image, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (0, 0, 255), thickness=1)  # 红[0,0,255]
            #cv2.imwrite('255.jpg', image) # image:[255,255,3]

            width = math.ceil(width * cfg.corners.Ratios)
            height = math.ceil(height * cfg.corners.Ratios)

            if cfg.corners.gaussian_rad == -1:
                radius = gaussian_radius((height, width),
                                         cfg.corners.gaussian_iou)
                radius = max(0, int(radius))
            else:
                radius = cfg.corners.gaussian_rad

            if valid:
                draw_gaussian(tl_heats[0], [xtl, ytl], radius)
                draw_gaussian(br_heats[0], [xbr, ybr], radius)
                tl_regrs[0, :] = [fxtl - xtl, fytl - ytl]  # tl_regrs:[5,128,2]
                br_regrs[0, :] = [fxbr - xbr, fybr - ybr]
                tl_tags[0] = max(
                    0,
                    min(ytl * cfg.corners.cor_size + xtl,
                        cfg.corners.cor_size * cfg.corners.cor_size -
                        1))  # 坐标索引 ytl为取整后
                br_tags[0] = max(
                    0,
                    min(ybr * cfg.corners.cor_size + xbr,
                        cfg.corners.cor_size * cfg.corners.cor_size - 1))
            else:
                draw_gaussian(tl_valids[b_ind, category], [xtl, ytl],
                              radius)  # 得到上左masked_heatmap
                draw_gaussian(br_valids[b_ind, category], [xbr, ybr], radius)

            tl_valids = (tl_valids == 0).astype(np.float32)
            br_valids = (br_valids == 0).astype(np.float32)

            #tag_masks[:1] = 1

        else:
            atts_map, tl_heats, br_heats, tl_valids, br_valids, tag_masks, tl_regrs, br_regrs, tl_tags, br_tags = [], [],\
            [], [], [], [], [], [], [], []
        '''
        if x_int:
            tag_masks = np.ones((cfg.offs.max_objects), dtype=np.uint8)
            tl_regrs  = np.zeros((cfg.offs.max_objects, 2), dtype=np.float32)  # max_objects:1
            tl_regrs[0, :] = [x_float - x_int, y_float - y_int]  # tl_regrs:[5,128,2]
            tl_tags = np.zeros((cfg.offs.max_objects), dtype=np.int64)
            tl_tags[0] = y_int * cfg.offs.off_size + x_int  # 坐标索引 ytl为取整后
        else:
            tl_heats, br_heats, tl_valids, br_valids, tag_masks, tl_regrs, br_regrs, tl_tags, br_tags = [], [], [], [], [], [], [], [], []
        '''
        if data == 'template':
            return image_t, image_l, image_b, image_r, bbox, atts_map, tl_heats, br_heats, tl_valids, br_valids, tag_masks, tl_regrs, br_regrs, tl_tags,\
                br_tags
        else:
            return image, bbox, atts_map, tl_heats, br_heats, tl_valids, br_valids, tag_masks, tl_regrs, br_regrs, tl_tags, \
                   br_tags