Example #1
0
    def track(self, im):
        for i in range(self.config.num_scale):  # crop multi-scale search region
            window_sz = self.target_sz * (self.config.scale_factor[i] * (1 + self.config.padding))
            bbox = cxy_wh_2_bbox(self.target_pos, window_sz)
            self.patch_crop[i, :] = crop_chw(im, bbox, self.config.crop_sz)

        search = self.patch_crop - self.config.net_average_image

        if self.gpu:
            response = self.net(torch.Tensor(search).cuda())
        else:
            response = self.net(torch.Tensor(search))
        peak, idx = torch.max(response.view(self.config.num_scale, -1), 1)
        peak = peak.data.cpu().numpy() * self.config.scale_penalties
        best_scale = np.argmax(peak)
        r_max, c_max = np.unravel_index(idx[best_scale], self.config.net_input_size)

        if r_max > self.config.net_input_size[0] / 2:
            r_max = r_max - self.config.net_input_size[0]
        if c_max > self.config.net_input_size[1] / 2:
            c_max = c_max - self.config.net_input_size[1]
        window_sz = self.target_sz * (self.config.scale_factor[best_scale] * (1 + self.config.padding))

        self.target_pos = self.target_pos + np.array([c_max, r_max]) * window_sz / self.config.net_input_size
        self.target_sz = np.minimum(np.maximum(window_sz / (1 + self.config.padding), self.min_sz), self.max_sz)

        # model update
        window_sz = self.target_sz * (1 + self.config.padding)
        bbox = cxy_wh_2_bbox(self.target_pos, window_sz)
        patch = crop_chw(im, bbox, self.config.crop_sz)
        target = patch - self.config.net_average_image
        self.net.update(torch.Tensor(np.expand_dims(target, axis=0)).cuda(), lr=self.config.interp_factor)

        return cxy_wh_2_rect1(self.target_pos, self.target_sz)  # 1-index
def mouse_opt(event, x, y, flags, param):
    global tmp_p_1, tmp_p_2, target_pos, target_sz, n_reset, is_visible, res, is_labeling
    if event == cv2.EVENT_LBUTTONDOWN:

        if not is_labeling:
            tmp_p_1 = (min(max(0, x), half_image_w if image_w > 1080 else image_w), min(max(0, y), half_image_h if image_w > 1080 else image_h))
            print("mouse on click at (%d, %d)" % (x, y))
            is_labeling = True
        else:
        # tmp_p_1 = (x, y)
    # elif event == cv2.EVENT_LBUTTONUP:
            tmp_p_2 = (min(max(0, x), half_image_w if image_w > 1080 else image_w), min(max(0, y), half_image_h if image_w > 1080 else image_h))
            # tmp_p_2 = (x, y)
            n_reset += 1

            # update target bounding box
            x_c = (tmp_p_1[0] + tmp_p_2[0]) / 2.0
            y_c = (tmp_p_1[1] + tmp_p_2[1]) / 2.0
            w = float(abs(tmp_p_2[0] - tmp_p_1[0]))
            h = float(abs(tmp_p_2[1] - tmp_p_1[1]))
            target_pos = np.array([x_c, y_c])
            target_sz = np.array([w, h])
            # try:
            # res[n_frame] = np.concatenate((cxy_wh_2_bbox(target_pos, target_sz), np.array([is_visible])))
            # except Exception as e:
            #     print(e)
            #     save_results(res, result_path, image_w, image_h)
            #     print("save file in %s" % result_path)
            #     sys.exit()
            if len(res) == 0:
                print("no record found, append data")
                res.append(np.concatenate((cxy_wh_2_bbox(target_pos, target_sz), np.array([is_visible]))))            
            else:
                res[n_frame-1] = np.concatenate((cxy_wh_2_bbox(target_pos, target_sz), np.array([is_visible])))
            # update template
            update_template()

            is_visible = 1
            is_labeling = False
    elif event == cv2.EVENT_MOUSEMOVE:
        if is_labeling:
            tmp_p_2 = (min(max(0, x), half_image_w if image_w > 1080 else image_w), min(max(0, y), half_image_h if image_w > 1080 else image_h))

            # update target bounding box
            x_c = (tmp_p_1[0] + tmp_p_2[0]) / 2.0
            y_c = (tmp_p_1[1] + tmp_p_2[1]) / 2.0
            w = float(abs(tmp_p_2[0] - tmp_p_1[0]))
            h = float(abs(tmp_p_2[1] - tmp_p_1[1]))
            target_pos = np.array([x_c, y_c])
            target_sz = np.array([w, h])
            is_visible = 1
            draw()
            cv2.imshow('video', im_show)
            cv2.waitKey(1)
Example #3
0
    def __init__(self, im, init_rect, config=TrackerConfig(), gpu=True):
        self.gpu = gpu
        self.config = config
        self.net = DCFNet(config)
        self.net.load_param(config.feature_path)
        self.net.eval()
        if gpu:
            self.net.cuda()

        # confine results
        target_pos, target_sz = rect1_2_cxy_wh(init_rect)
        self.min_sz = np.maximum(config.min_scale_factor * target_sz, 4)
        self.max_sz = np.minimum(im.shape[:2],
                                 config.max_scale_factor * target_sz)

        # crop template
        window_sz = target_sz * (1 + config.padding)
        bbox = cxy_wh_2_bbox(target_pos, window_sz)
        patch = crop_chw(im, bbox, self.config.crop_sz)

        target = patch - config.net_average_image
        self.net.update(torch.Tensor(np.expand_dims(target, axis=0)).cuda())
        self.target_pos, self.target_sz = target_pos, target_sz
        self.patch_crop = np.zeros(
            (config.num_scale, patch.shape[0], patch.shape[1], patch.shape[2]),
            np.float32)  # buff
def update_template():
    # confine results
    min_sz = np.maximum(config.min_scale_factor * target_sz, 4)
    max_sz = np.minimum(im.shape[:2], config.max_scale_factor * target_sz)

    window_sz = target_sz * (1 + config.padding)
    bbox = cxy_wh_2_bbox(target_pos, window_sz)
    patch = crop_chw(im, bbox, config.crop_sz)

    target = patch - config.net_average_image
    net.update(torch.Tensor(np.expand_dims(target, axis=0)).cuda())

    patch_crop = np.zeros((config.num_scale, patch.shape[0], patch.shape[1], patch.shape[2]), np.float32)
Example #5
0
def preprocess_patch(im, target_pos, target_sz, config):
    """
    preprocess patch for the input of network
    :param im: input image (h, w, 3)
    :param target_pos: center coordinate of the target box, tuple (cx, cy)
    :param target_sz: width and hight of the target box, tuple (w, h)
    :param config: TrackConfig
    :return: patch: (3, config.crop_sz, config.crop_sz)
    """
    # enlarge the cropping scale
    window_sz = target_sz * (1 + config.padding)
    # change the expression of box coordinate
    bbox = cxy_wh_2_bbox(target_pos, window_sz)
    # crop z in the next frame
    patch = crop_chw(im, bbox, config.crop_sz)
    # mean deduction
    patch = patch - config.net_average_image
    return patch
Example #6
0
        n_images = len(image_files)

        tic = time.time()  # time start

        target_pos, target_sz = rect1_2_cxy_wh(
            init_rect)  # OTB label is 1-indexed

        im = cv2.imread(image_files[0])  # HxWxC

        # confine results
        min_sz = np.maximum(config.min_scale_factor * target_sz, 4)
        max_sz = np.minimum(im.shape[:2], config.max_scale_factor * target_sz)

        # crop template
        window_sz = target_sz * (1 + config.padding)
        bbox = cxy_wh_2_bbox(target_pos, window_sz)
        patch = crop_chw(im, bbox, config.crop_sz)

        target = patch - config.net_average_image
        net.update(torch.Tensor(np.expand_dims(target, axis=0)).cuda())

        res = [cxy_wh_2_rect1(target_pos, target_sz)]  # save in .txt
        patch_crop = np.zeros(
            (config.num_scale, patch.shape[0], patch.shape[1], patch.shape[2]),
            np.float32)
        for f in range(1, n_images):  # track
            im = cv2.imread(image_files[f])

            for i in range(config.num_scale):  # crop multi-scale search region
                window_sz = target_sz * (config.scale_factor[i] *
                                         (1 + config.padding))
                cv2.setMouseCallback('video', mouse_opt)
                key_first_frame = cv2.waitKey(1)
                # if cv2.getWindowProperty('video', 1) == -1: # can't work in MobaXterm
                #     sys.exit()

                # press space and first frame bounding box is choosed by user, then continue the video loop
                if key_first_frame == KEY_PAUSE:
                    if target_pos is not None:
                        is_visible = 1
                        # confine results
                        min_sz = np.maximum(config.min_scale_factor * target_sz, 4)
                        max_sz = np.minimum(im.shape[:2], config.max_scale_factor * target_sz)

                        # crop template
                        window_sz = target_sz * (1 + config.padding)
                        bbox = cxy_wh_2_bbox(target_pos, window_sz)
                        patch = crop_chw(im, bbox, config.crop_sz)

                        target = patch - config.net_average_image
                        net.update(torch.Tensor(np.expand_dims(target, axis=0)).cuda())

                        patch_crop = np.zeros((config.num_scale, patch.shape[0], patch.shape[1], patch.shape[2]), np.float32)
                    else:
                        is_visible = 0
                    if len(res) > 0:
                        res = res[:n_frame]
                    res.append(np.concatenate((cxy_wh_2_bbox(target_pos, target_sz), np.array([is_visible]))))  # save in .txt
                    start_from_half = False
                    break
                # previous frame
                elif key_first_frame == KEY_LEFT: