Beispiel #1
0
    def track(self, image):
        image = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
        left = int(max(round(self.position[0] - float(self.window) / 2), 0))
        top = int(max(round(self.position[1] - float(self.window) / 2), 0))

        right = int(
            min(round(self.position[0] + float(self.window) / 2),
                image.shape[1] - 1))
        bottom = int(
            min(round(self.position[1] + float(self.window) / 2),
                image.shape[0] - 1))

        if right - left < self.template.shape[
                1] or bottom - top < self.template.shape[0]:
            return vot.Rectangle(self.position[0] + self.size[0] / 2,
                                 self.position[1] + self.size[1] / 2,
                                 self.size[0], self.size[1])

        cut = image[top:bottom, left:right]

        matches = cv2.matchTemplate(cut, self.template, cv2.TM_CCOEFF_NORMED)
        min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(matches)

        self.position = (left + max_loc[0] + float(self.size[0]) / 2,
                         top + max_loc[1] + float(self.size[1]) / 2)

        return vot.Rectangle(left + max_loc[0], top + max_loc[1], self.size[0],
                             self.size[1])
Beispiel #2
0
    def track(self, image):

        left = max(round(self.position[0] - float(self.window) / 2), 0)
        top = max(round(self.position[1] - float(self.window) / 2), 0)

        right = min(round(self.position[0] + float(self.window) / 2),
                    image.shape[1] - 1)
        bottom = min(round(self.position[1] + float(self.window) / 2),
                     image.shape[0] - 1)

        if right - left < self.template.shape[
                1] or bottom - top < self.template.shape[0]:
            return vot.Rectangle(self.position[0] + self.size[0] / 2,
                                 self.position[1] + self.size[1] / 2,
                                 self.size[0], self.size[1])

        cut = image[int(top):int(bottom), int(left):int(right)]

        matches = cv2.matchTemplate(cut, self.template, cv2.TM_CCOEFF_NORMED)
        min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(matches)

        self.position = (left + max_loc[0] + float(self.size[0]) / 2,
                         top + max_loc[1] + float(self.size[1]) / 2)

        return vot.Rectangle(left + max_loc[0], top + max_loc[1], self.size[0],
                             self.size[1])
Beispiel #3
0
    def track(self, image):

        newg = cv2.cvtColor(image, cv2.cv.CV_BGR2GRAY)

        # self.bb = [left, top, self.region.width, self.region.height]

        print(self.bb)
        if self.bb[0] < 0 or self.bb[1] < 0 or self.bb[1] >= image.shape[
                1] or self.bb[3] >= image.shape[0]:
            newbb, shift = fbtrack(self.oldg, newg, self.bb, 12, 12, 3, 12)
            self.bb = newbb
        self.oldg = newg
        self.position = (self.bb[0] + self.size[0] / 2,
                         self.bb[1] + self.size[1] / 2)
        self.window = max(self.bb[2] - self.bb[0], self.bb[3] - self.bb[1]) * 2

        left = int(max(round(self.position[0] - float(self.window) / 2), 0))
        top = int(max(round(self.position[1] - float(self.window) / 2), 0))
        right = int(
            min(round(self.position[0] + float(self.window) / 2),
                image.shape[1] - 1))
        bottom = int(
            min(round(self.position[1] + float(self.window) / 2),
                image.shape[0] - 1))

        if self.bb[0] < 0 or self.bb[1] < 0 or self.bb[
                1] >= image.shape[1] - 1 or self.bb[3] >= image.shape[0] - 1:
            print("NOTER JE PRISLO")
            return vot.Rectangle(1, 1, self.size[0], self.size[1])

        return vot.Rectangle(self.bb[0], self.bb[1], self.bb[2] - self.bb[0],
                             self.bb[3] - self.bb[1])
Beispiel #4
0
def track(tracker, processor, frame, position, size):

    x, y, w, h = position
    x1, y1 = x + w, y + h

    position = NP.array([x, y, x1, y1])
    position = NP.expand_dims(position, axis=0)
    position = NP.expand_dims(position, axis=1)

    originalSize = frame.shape[:2][::-1]  # imageSize must be (width, height)
    frame = SCPM.imresize(frame, size)
    frame = NP.expand_dims(frame, axis=0)
    frame = NP.expand_dims(frame, axis=1)

    position = Preprocess.scalePosition(position, originalSize)
    position = Preprocess.rescalePosition(position, size)

    frame, position = processor.preprocess(frame, position)
    position = tracker.forward([frame], position[:, 0, :])

    x, y, x1, y1 = position[0, 0, :]

    #logging.info("Tracker prediction: [%s, %s, %s, %s]", x, y, x1, y1)

    return vot.Rectangle(x, y, x1 - x, y1 - y)
Beispiel #5
0
    def track(self, image):

        p1 = (int(self.bbox[0]), int(self.bbox[1]))
        p2 = (int(self.bbox[0] + self.bbox[2]),
              int(self.bbox[1] + self.bbox[3]))

        vis = image.copy()
        self.gray = cv2.cvtColor(vis, cv2.COLOR_BGR2GRAY)
        h, w, _ = vis.shape
        flow = np.zeros((h, w, 1), np.float32)
        return self.region
        flow = cv2.calcOpticalFlowFarneback(self.prevgray, self.gray, flow,
                                            0.5, 5, 15, 3, 5, 1,
                                            cv2.OPTFLOW_FARNEBACK_GAUSSIAN)
        newflow = flow[int(p1[1]):int(p2[1]), int(p1[0]):int(p2[0]), :]
        fx = newflow[..., 0]
        fy = newflow[..., 1]
        xavg = np.average(fx)
        yavg = np.average(fy)
        xsum = 0
        count = 0
        return self.region
        for x in np.nditer(fx):
            if xavg < 0:
                if x <= -2.0:
                    xsum += x
                    count += 1
            else:
                if x >= 2.0:
                    xsum += x
                    count += 1
        if count > 0:
            xsum /= count
        deltax = xsum

        ysum = 0
        count = 0
        for y in np.nditer(fy):
            if yavg < 0:
                if y <= -2.0:
                    ysum += y
                    count += 1
            else:
                if y >= 2.0:
                    ysum += y
                    count += 1
        if count > 0:
            ysum /= count

        deltay = ysum
        self.bbox = (int(self.bbox[0] + deltax), int(self.bbox[1] + deltay),
                     int(self.bbox[2]), int(self.bbox[3]))

        p1 = (int(self.bbox[0]), int(self.bbox[1]))
        p2 = (int(self.bbox[0] + self.bbox[2]),
              int(self.bbox[1] + self.bbox[3]))
        self.prevgray = self.gray

        return vot.Rectangle(int(self.bbox[0]), int(self.bbox[1]),
                             int(self.bbox[2]), int(self.bbox[3]))
    def track(self, image):
        test_patch = utils.get_subwindow(image, self.pos, self.sz, scale_factor=self.currentScaleFactor)
        hog_feature_t = pyhog.features_pedro(test_patch / 255., 1)
        hog_feature_t = np.lib.pad(hog_feature_t, ((1, 1), (1, 1), (0, 0)), 'edge')
        xt = np.multiply(hog_feature_t, self.cos_window[:, :, None])
        xtf = np.fft.fft2(xt, axes=(0, 1))
        response = np.real(np.fft.ifft2(np.divide(np.sum(np.multiply(self.x_num, xtf),
                                                         axis=2), (self.x_den + self.lamda))))

        v_centre, h_centre = np.unravel_index(response.argmax(), response.shape)
        vert_delta, horiz_delta = \
            [(v_centre - response.shape[0] / 2) * self.currentScaleFactor,
             (h_centre - response.shape[1] / 2) * self.currentScaleFactor]

        self.pos = [self.pos[0] + vert_delta, self.pos[1] + horiz_delta]

        st = utils.get_scale_subwindow(image, self.pos, self.base_target_size,
                                 self.currentScaleFactor * self.scaleSizeFactors, self.scale_window,
                                 self.scale_model_sz)
        stf = np.fft.fftn(st, axes=[0])

        scale_reponse = np.real(np.fft.ifftn(np.sum(np.divide(np.multiply(self.s_num, stf),
                                                              (self.s_den[:, None] + self.lamda_scale)), axis=1)))
        recovered_scale = np.argmax(scale_reponse)
        self.currentScaleFactor = self.currentScaleFactor * self.scaleFactors[recovered_scale]

        if self.currentScaleFactor < self.min_scale_factor:
            self.currentScaleFactor = self.min_scale_factor
        elif self.currentScaleFactor > self.max_scale_factor:
            self.currentScaleFactor = self.max_scale_factor

        # update
        update_patch = utils.get_subwindow(image, self.pos, self.sz, scale_factor=self.currentScaleFactor)
        hog_feature_l = pyhog.features_pedro(update_patch / 255., 1)
        hog_feature_l = np.lib.pad(hog_feature_l, ((1, 1), (1, 1), (0, 0)), 'edge')
        xl = np.multiply(hog_feature_l, self.cos_window[:, :, None])
        xlf = np.fft.fft2(xl, axes=(0, 1))
        new_x_num = np.multiply(self.yf[:, :, None], np.conj(xlf))
        new_x_den = np.real(np.sum(np.multiply(xlf, np.conj(xlf)), axis=2))

        sl = utils.get_scale_subwindow(image, self.pos, self.base_target_size,
                                       self.currentScaleFactor * self.scaleSizeFactors, self.scale_window,
                                       self.scale_model_sz)
        slf = np.fft.fftn(sl, axes=[0])
        new_s_num = np.multiply(self.ysf[:, None], np.conj(slf))
        new_s_den = np.real(np.sum(np.multiply(slf, np.conj(slf)), axis=1))

        self.x_num = (1 - self.interp_factor) * self.x_num + self.interp_factor * new_x_num
        self.x_den = (1 - self.interp_factor) * self.x_den + self.interp_factor * new_x_den
        self.s_num = (1 - self.interp_factor) * self.s_num + self.interp_factor * new_s_num
        self.s_den = (1 - self.interp_factor) * self.s_den + self.interp_factor * new_s_den

        self.target_size = self.base_target_size * self.currentScaleFactor

        return vot.Rectangle(self.pos[1] - self.target_size[1] / 2,
                             self.pos[0] - self.target_size[0] / 2,
                             self.target_size[1],
                             self.target_size[0]
                             )
    def track(self, image):
        ok, bbox = self.tracker.update(image)
        if ok:
            val = 0.5
        else:
            val = 0.05

        return vot.Rectangle(bbox[0], bbox[1], bbox[2], bbox[3]), val
Beispiel #8
0
 def track(self, image, i):
     image = self._tracker._read_image(imagefile)
     res_rect = self._tracker.track(image)
     print("res_rect", res_rect)
     tracked_bb = np.array(res_rect).astype(int)
     print("tracked_bb ", tracked_bb)
     return vot.Rectangle(res_rect[0], res_rect[1], res_rect[2],
                          res_rect[3])
Beispiel #9
0
    def track(self, image):
        left = int(max(round(self.position[0] - float(self.window) / 2), 0))
        top = int(max(round(self.position[1] - float(self.window) / 2), 0))

        right = int(min(round(self.position[0] + float(self.window) / 2), image.shape[1] - 1))
        bottom = int(min(round(self.position[1] + float(self.window) / 2), image.shape[0] - 1))

        if right - left < self.size[1] or bottom - top < self.size[0]:
            return vot.Rectangle(self.position[0] + self.size[0] / 2, self.position[1] + self.size[1] / 2, self.size[0],
                                 self.size[1])

        img = image[top:bottom, left:right]
        hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
        dst = cv2.calcBackProject([hsv], [0], self.roi_hist, [0, 180], 1)
        ret, track_window = cv2.meanShift(dst, (
        int(self.bb[0] - left), int(self.bb[1] - top), int(self.size[0]), int(self.size[1])), self.term_crit)
        self.position = (
        left + track_window[0] + int(track_window[2] / 2), top + track_window[1] + int(track_window[3] / 2))
        self.bb = [left + track_window[0], top + track_window[1], track_window[2], track_window[3]]
        self.size = (track_window[2], track_window[3])
        return vot.Rectangle(left + track_window[0], top + track_window[1], track_window[2], track_window[3])
Beispiel #10
0
def run_vot_exp(tracker_name, para_name, vis=False):

    torch.set_num_threads(1)
    save_root = os.path.join(
        '/data/sda/v-yanbi/iccv21/LittleBoy/vot20_lt_debug', para_name)
    if vis and (not os.path.exists(save_root)):
        os.makedirs(save_root)
    tracker = stark_vot20_lt(tracker_name=tracker_name, para_name=para_name)
    handle = vot.VOT("rectangle")
    selection = handle.region()
    imagefile = handle.frame()
    init_box = [selection.x, selection.y, selection.width, selection.height]
    if not imagefile:
        sys.exit(0)
    if vis:
        '''for vis'''
        seq_name = imagefile.split('/')[-3]
        save_v_dir = os.path.join(save_root, seq_name)
        if not os.path.exists(save_v_dir):
            os.mkdir(save_v_dir)
        cur_time = int(time.time() % 10000)
        save_dir = os.path.join(save_v_dir, str(cur_time))
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)

    image = cv2.cvtColor(cv2.imread(imagefile), cv2.COLOR_BGR2RGB)  # Right
    tracker.initialize(image, init_box)

    while True:
        imagefile = handle.frame()
        if not imagefile:
            break
        image = cv2.cvtColor(cv2.imread(imagefile), cv2.COLOR_BGR2RGB)  # Right
        b1, conf = tracker.track(image)
        x1, y1, w, h = b1
        handle.report(vot.Rectangle(x1, y1, w, h), conf)
        if vis:
            '''Visualization'''
            # original image
            image_ori = image[:, :, ::-1].copy()  # RGB --> BGR
            image_name = imagefile.split('/')[-1]
            save_path = os.path.join(save_dir, image_name)
            cv2.imwrite(save_path, image_ori)
            # tracker box
            image_b = image_ori.copy()
            cv2.rectangle(image_b, (int(b1[0]), int(b1[1])),
                          (int(b1[0] + b1[2]), int(b1[1] + b1[3])),
                          (0, 0, 255), 2)
            image_b_name = image_name.replace('.jpg', '_bbox.jpg')
            save_path = os.path.join(save_dir, image_b_name)
            cv2.imwrite(save_path, image_b)
Beispiel #11
0
    def __init__(self, image, mask):
        region = self._rect_from_mask(mask)
        region = vot.Rectangle(region[0], region[1], region[2], region[3])
        self.window = max(region.width, region.height) * 2

        left = max(region.x, 0)
        top = max(region.y, 0)

        right = min(region.x + region.width, image.shape[1] - 1)
        bottom = min(region.y + region.height, image.shape[0] - 1)

        self.template = image[int(top):int(bottom), int(left):int(right)]
        self.position = (region.x + region.width / 2,
                         region.y + region.height / 2)
        self.size = (region.width, region.height)
    def track(self, image):
        index = 0
        for scale_factor in self.scale_factors:
            test = utils.get_subwindow(image, self.pos, self.sz,
                                       self.scaling * scale_factor)
            test = transform.resize(test, (224, 224))
            test = (test - imgMean) / imgStd
            test = np.transpose(test, (2, 0, 1))
            feature = model(
                Variable(torch.from_numpy(test[None, :, :, :]).float()))
            feature = feature.data[0].numpy().transpose((1, 2, 0))
            xt = ndimage.zoom(
                feature,
                (float(self.cos_window.shape[0]) / feature.shape[0],
                 float(self.cos_window.shape[1]) / feature.shape[1], 1),
                order=1)
            xt = np.multiply(xt, self.cos_window[:, :, None])
            xtf = np.fft.fft2(xt, axes=(0, 1))
            response = np.real(
                np.fft.ifft2(
                    np.divide(np.sum(np.multiply(self.x_num, xtf), axis=2),
                              (self.x_den + self.lamda))))
            if index == 0:
                max = response.argmax()
                response_final = response
                scale_factor_final = scale_factor
            index += 1
            if response.argmax() > max:
                max = response.argmax()
                response_final = response
                scale_factor_final = scale_factor

        self.scaling *= scale_factor_final
        v_centre, h_centre = np.unravel_index(response_final.argmax(),
                                              response_final.shape)
        vert_delta, horiz_delta = \
                [(v_centre - response_final.shape[0] / 2) * self.scaling * self.cell_size,
                 (h_centre - response_final.shape[1] / 2) * self.scaling * self.cell_size]

        self.pos = [self.pos[0] + vert_delta, self.pos[1] + horiz_delta] - \
              self.target_size * self.scaling / 2.

        return vot.Rectangle(self.pos[1], self.pos[0],
                             self.target_size[1] * self.scaling,
                             self.target_size[0] * self.scaling)
Beispiel #13
0
    def track(self, image):
        # ---------------------------------------track--------------------------------- #
        test_patch = utils.get_subwindow(image, self.pos, self.sz)

        hog_feature_t = pyhog.features_pedro(test_patch / 255., 1)
        hog_feature_t = np.lib.pad(hog_feature_t, ((1, 1), (1, 1), (0, 0)),
                                   'edge')
        xt = np.multiply(hog_feature_t, self.cos_window[:, :, None])
        xtf = np.fft.fft2(xt, axes=(0, 1))
        #计算响应,直接多通道叠加
        response = np.real(
            np.fft.ifft2(
                np.divide(np.sum(np.multiply(self.x_num, xtf), axis=2),
                          (self.x_den + self.lamda))))
        #找响应最大值
        v_centre, h_centre = np.unravel_index(response.argmax(),
                                              response.shape)
        vert_delta, horiz_delta = \
            [(v_centre - response.shape[0] / 2),
             (h_centre - response.shape[1] / 2)]
        #新的位置
        self.pos = [self.pos[0] + vert_delta, self.pos[1] + horiz_delta]

        # ---------------------------------------update--------------------------------- #
        update_patch = utils.get_subwindow(image, self.pos, self.sz)
        hog_feature_l = pyhog.features_pedro(update_patch / 255., 1)
        hog_feature_l = np.lib.pad(hog_feature_l, ((1, 1), (1, 1), (0, 0)),
                                   'edge')
        xl = np.multiply(hog_feature_l, self.cos_window[:, :, None])
        xlf = np.fft.fft2(xl, axes=(0, 1))
        #更新位置滤波器
        new_x_num = np.multiply(self.yf[:, :, None], np.conj(xlf))
        new_x_den = np.real(np.sum(np.multiply(xlf, np.conj(xlf)), axis=2))

        #滤波器学习
        self.x_num = (1 - self.interp_factor
                      ) * self.x_num + self.interp_factor * new_x_num
        self.x_den = (1 - self.interp_factor
                      ) * self.x_den + self.interp_factor * new_x_den
        self.target_size = self.base_target_size

        return vot.Rectangle(self.pos[1] - self.target_size[1] / 2,
                             self.pos[0] - self.target_size[0] / 2,
                             self.target_size[1], self.target_size[0])
Beispiel #14
0
    def track(self, image):

        # tracking(image,target_position,window_size,num,den,cos_window,scalefactor)
        self.pos, self.update_flag = tracking(image, self.pos, self.pos,
                                              self.sz, self.resnet_num,
                                              self.resnet_den, self.cos_window,
                                              self.current_scale_factor,
                                              self.update_flag, self.cell_size,
                                              self.lam)

        # scale_variation(image,target_position,target_size,scale_num,scale_den,scale_factor,ScaleFactors,scale_window,model_size)
        self.current_scale_factor = scale_variation(
            image, self.pos, self.target_size, self.s_num, self.s_den,
            self.current_scale_factor, self.scaleFactors, self.scale_window,
            self.scale_model_sz, self.lam)

        if self.current_scale_factor < self.min_scale_factor:
            self.current_scale_factor = self.min_scale_factor
        elif self.current_scale_factor > self.max_scale_factor:
            self.current_scale_factor = self.max_scale_factor

        # update
        # update_position_filter(image, target_position, window_size, scale_factor, position_yf, position_cos_window,
        #                      position_num, position_den, update_rate)
        if self.update_flag == True:
            self.resnet_num, self.resnet_den = update_position_filter(
                image, self.pos, self.sz, self.current_scale_factor, self.yf,
                self.cos_window, self.resnet_num, self.resnet_den,
                self.interp_factor)

        # update_scale_filter(image,target_position,target_size,scale_num,scale_den,scale_factor,ScaleFactors,scale_window,model_size,scale_ysf,update_rate)
        self.s_num, self.s_den = update_scale_filter(
            image, self.pos, self.target_size, self.s_num, self.s_den,
            self.current_scale_factor, self.scaleFactors, self.scale_window,
            self.scale_model_sz, self.ysf, self.interp_factor_scale)

        self.final_size = self.target_size * self.current_scale_factor

        return vot.Rectangle(self.pos[1] - self.final_size[1] / 2,
                             self.pos[0] - self.final_size[0] / 2,
                             self.final_size[1], self.final_size[0])
Beispiel #15
0
    def track(self, image):

        test_crop = utils.get_subwindow(image, self.pos, self.patch_size)
        z = np.multiply(test_crop - test_crop.mean(), self.cos_window[:, :, None])
        zf = np.fft.fft2(z, axes=(0, 1))
        k_test = utils.dense_gauss_kernel(self.feature_bandwidth_sigma, self.xf, self.x, zf, z)
        kf_test = np.fft.fft2(k_test, axes=(0, 1))
        response = np.real(np.fft.ifft2(np.multiply(self.alphaf, kf_test)))

        # Max position in response map
        v_centre, h_centre = np.unravel_index(response.argmax(), response.shape)
        vert_delta, horiz_delta = [v_centre - response.shape[0] / 2,
                                   h_centre - response.shape[1] / 2]

        # Predicted position
        self.pos = [self.pos[0] + vert_delta, self.pos[1] + horiz_delta]
        return vot.Rectangle(self.pos[1] - self.target_size[1] / 2,
                             self.pos[0] - self.target_size[0] / 2,
                             self.target_size[1],
                             self.target_size[0]
                             )
Beispiel #16
0
    def track(self, image):
        ft_target = self.target / self.stride
        anchors = generate_anchors(ft_target)

        search_window = generate_search_window(np.shape(image), self.target,
                                               self.window_scale).astype(int)
        im_input = image[search_window[0]:search_window[2],
                         search_window[1]:search_window[3], :]
        shape_in = np.round(np.shape(im_input)[:2] *
                            self.scale_factor).astype(int)
        im_input = cv2.resize(im_input,
                              dsize=(shape_in[1], shape_in[0]),
                              interpolation=cv2.INTER_CUBIC)
        im_input = np.expand_dims(im_input, axis=0)
        features = np.squeeze(self.model.predict(im_input), axis=0)

        ft_shape = np.shape(features)
        pad = FLAGS.template_size // 2

        pad_shape = np.array([
            ft_shape[0] + self.template_size, ft_shape[1] + self.template_size,
            ft_shape[2]
        ])
        ft_pad = np.zeros(pad_shape)
        ft_pad[pad:ft_shape[0] + pad, pad:ft_shape[1] + pad, :] = features
        features = ft_pad

        sim_map = self.compute_distance_map(features)
        sim_map_mean = sim_map.mean()
        orig_sim_map = sim_map.copy()

        if self.use_gauss:
            gauss_filter = gauss_kernel(np.shape(sim_map), FLAGS.gauss_sigma)
            d = np.round(
                (to_yxhw(ft_target)[:2] - to_yxhw(search_window // 8)[:2]) *
                self.scale_factor).astype(int)
            gauss_filter = shift(gauss_filter, d, cval=0)
            sim_map = np.multiply(gauss_filter, sim_map)

        new_target, max_score, max_slice = self.compute_target(
            anchors, sim_map, features, ft_target)

        strength = max_score**2 / sim_map_mean
        self.scores.insert(0, strength)
        if len(self.scores) > self.strength_queue_length:
            self.scores.pop()

        confidence = np.mean(self.scores) / strength

        if confidence > FLAGS.bad_detection_thresh:
            t_target = to_yxhw(ft_target)
            t_target[:2] = to_yxhw(new_target)[:2]
            ft_target = to_y1x1y2x2(t_target)
            ft_target += np.tile(search_window[:2] // 8, 2)
            self.window_scale = FLAGS.search_window_scale * 2

            self.use_gauss = False
        else:
            ft_target = new_target
            ft_target += np.tile(search_window[:2] // 8, 2)
            self.window_scale = FLAGS.search_window_scale

            self.use_gauss = True

        if confidence < FLAGS.good_detection_thresh and np.shape(
                max_slice)[:2] == (self.template_size, self.template_size):
            self.template = self.template * (
                1 - FLAGS.update_alpha) + max_slice * FLAGS.update_alpha

            # if confidence < FLAGS.good_detection_thresh / 2:
            #     train(frame, ft_target, orig_sim_map, search_window, epochs=FLAGS.tuning_epochs, learning_rate=FLAGS.tuning_learning_rate)

        self.target = np.multiply(ft_target, self.stride)

        target_w = self.target[3] - self.target[1]
        target_h = self.target[2] - self.target[0]
        pad_amount = (target_w + target_h) / (self.padding_divider + 2)

        self.target = to_yxhw(self.target)
        self.target[2] -= pad_amount
        self.target[3] -= pad_amount

        self.scale_factor = (self.template_size / target_h / self.stride,
                             FLAGS.template_size / target_w / self.stride)

        return vot.Rectangle(self.target[1],
                             self.target[0], int(self.target[3]),
                             int(self.target[2])), confidence
Beispiel #17
0
    def track(self, image):

        return vot.Rectangle(self.position[0] - self.size[0] / 2,
                             self.position[1] - self.size[1] / 2, self.size[0],
                             self.size[1])
Beispiel #18
0
    def track(self, image, i):

        # Estimate target bbox
        opts['n_samples'] = 512
        samples = gen_samples(self.sample_generator, self.target_bbox,
                              opts['n_samples'])
        sample_scores = forward_samples(self.model,
                                        image,
                                        samples,
                                        out_layer='fc6')
        top_scores, top_idx = sample_scores[:, 1].topk(5)
        top_idx = top_idx.cpu().numpy()
        target_score = top_scores.mean()
        self.target_bbox = samples[top_idx].mean(axis=0)

        success = target_score > opts['success_thr']

        # Expand search area at failure

        if success:
            self.sample_generator.set_trans_f(opts['trans_f'])
        else:
            self.sample_generator.set_trans_f(opts['trans_f_expand'])

        # Bbox regression
        if success:
            bbreg_samples = samples[top_idx]
            bbreg_feats = forward_samples(self.model, image, bbreg_samples)
            bbreg_samples = self.bbreg.predict(bbreg_feats, bbreg_samples)
            self.bbreg_bbox = bbreg_samples.mean(axis=0)
        else:
            bbreg_bbox = self.target_bbox

        # Copy previous result at failure
        if not success:
            self.target_bbox = self.result[-1]
            self.bbreg_bbox = self.result_bb[-1]

        # Save result
        self.result.append(self.target_bbox)
        self.result_bb.append(self.bbreg_bbox)

        # Data collect
        if success:

            # Draw pos/neg samples
            pos_examples = gen_samples(self.pos_generator, self.target_bbox,
                                       opts['n_pos_update'],
                                       opts['overlap_pos_update'])
            if len(pos_examples) == 0:
                pos_examples = np.tile(self.target_bbox[None, :],
                                       (opts['n_pos_init'], 1))
            neg_examples = gen_samples(self.neg_generator, self.target_bbox,
                                       opts['n_neg_update'],
                                       opts['overlap_neg_update'])

            # Extract pos/neg features
            pos_feats = forward_samples(self.model, image, pos_examples)
            neg_feats = forward_samples(self.model, image, neg_examples)
            self.pos_feats_all.append(pos_feats)
            self.neg_feats_all.append(neg_feats)
            if len(self.pos_feats_all) > opts['n_frames_long']:
                del self.pos_feats_all[0]
            if len(self.neg_feats_all) > opts['n_frames_short']:
                del self.neg_feats_all[0]
            print('====================================')
            print('Distractor suppression!')
            print('====================================')
            ds_samples = gen_samples(self.ds_generator, self.target_bbox,
                                     opts['n_samples'])
            ds_sample_scores = forward_samples(self.model,
                                               image,
                                               ds_samples,
                                               out_layer='fc6')
            ds_idx = ds_sample_scores[:, 1].gt(0.0).nonzero().cpu().numpy()
            if len(ds_idx) > 0:
                print('Distractor suppression!')
                #ipdb.set_trace()
                for ds_i, ds_id in enumerate(ds_idx):
                    if ds_i == 0:
                        ds_neg_examples = gen_samples(
                            self.pos_generator, ds_samples[ds_id[0]],
                            opts['n_pos_update'], opts['overlap_pos_update'])
                    else:
                        ds_neg_examples = np.concatenate(
                            (ds_neg_examples,
                             gen_samples(self.pos_generator,
                                         ds_samples[ds_id[0]],
                                         opts['n_pos_update'],
                                         opts['overlap_pos_update'])),
                            axis=0)
                ds_neg_feats = forward_samples(self.model, image,
                                               ds_neg_examples)
                self.neg_feats_all.append(ds_neg_feats)
                nframes = min(opts['n_frames_short'], len(self.pos_feats_all))
                pos_data = torch.stack(self.pos_feats_all[-nframes:],
                                       0).view(-1, self.feat_dim)
                neg_data = stackList(self.neg_feats_all).view(
                    -1, self.feat_dim)
                train(self.model, self.criterion, self.update_optimizer,
                      pos_data, neg_data, opts['maxiter_update'])
        # Short term update
        if not success:

            nframes = min(opts['n_frames_short'], len(self.pos_feats_all))
            pos_data = stackList(self.pos_feats_all[-nframes:])
            neg_data = stackList(self.neg_feats_all)
            train(self.model, self.criterion, self.update_optimizer, pos_data,
                  neg_data, opts['maxiter_update'])

        # Long term update
        elif i % opts['long_interval'] == 0:

            pos_data = stackList(self.pos_feats_all)
            neg_data = stackList(self.neg_feats_all)
            train(self.model, self.criterion, self.update_optimizer, pos_data,
                  neg_data, opts['maxiter_update'])
        return vot.Rectangle(self.result_bb[-1][0], self.result_bb[-1][1],
                             self.result_bb[-1][2], self.result_bb[-1][3])
    def track(self, image):
        test = utils.get_subwindow(image, self.pos, self.sz,
                                   self.current_scale_factor)
        test = transform.resize(test, (224, 224))
        test = (test - imgMean) / imgStd
        test = np.transpose(test, (2, 0, 1))
        feature_ensemble = model(
            Variable(torch.from_numpy(test[None, :, :, :]).float()).cuda())

        for i in range(numlayers):

            feature = feature_ensemble[i].data[0].cpu().numpy().transpose(
                (1, 2, 0))
            xt = ndimage.zoom(
                feature,
                (float(self.cos_window.shape[0]) / feature.shape[0],
                 float(self.cos_window.shape[1]) / feature.shape[1], 1),
                order=1)
            xt = np.multiply(xt, self.cos_window[:, :, None])
            xtf = np.fft.fft2(xt, axes=(0, 1))
            response = np.real(
                np.fft.ifft2(
                    np.divide(np.sum(np.multiply(self.x_num[i], xtf), axis=2),
                              (self.x_den[i] + self.lamda)))) * layerweights[i]
            if i == 0:
                response_final = response
            else:
                response_final = np.add(response_final, response)

        v_centre, h_centre = np.unravel_index(response_final.argmax(),
                                              response_final.shape)
        vert_delta, horiz_delta = \
            [(v_centre - response_final.shape[0] / 2) * self.current_scale_factor * self.cell_size,
             (h_centre - response_final.shape[1] / 2) * self.current_scale_factor * self.cell_size]

        self.pos = [self.pos[0] + vert_delta, self.pos[1] + horiz_delta]

        st = utils.get_scale_subwindow(
            image, self.pos, self.target_size,
            self.current_scale_factor * self.scaleFactors, self.scale_window,
            self.scale_model_sz)
        stf = np.fft.fftn(st, axes=[0])

        scale_reponse = np.real(
            np.fft.ifftn(
                np.sum(np.divide(np.multiply(self.s_num, stf),
                                 (self.s_den[:, None] + self.lamda)),
                       axis=1)))
        recovered_scale = np.argmax(scale_reponse)
        self.current_scale_factor = self.current_scale_factor * self.scaleFactors[
            recovered_scale]

        if self.current_scale_factor < self.min_scale_factor:
            self.current_scale_factor = self.min_scale_factor
        elif self.current_scale_factor > self.max_scale_factor:
            self.current_scale_factor = self.max_scale_factor

        # update

        update_patch = utils.get_subwindow(
            image, self.pos, self.sz, scale_factor=self.current_scale_factor)

        update_patch = transform.resize(update_patch, (224, 224))
        update_patch = (update_patch - imgMean) / imgStd
        update_patch = np.transpose(update_patch, (2, 0, 1))
        feature_ensemble = model(
            Variable(torch.from_numpy(
                update_patch[None, :, :, :]).float()).cuda())

        for i in range(numlayers):
            feature = feature_ensemble[i].data[0].cpu().numpy().transpose(
                (1, 2, 0))
            xl = ndimage.zoom(
                feature,
                (float(self.cos_window.shape[0]) / feature.shape[0],
                 float(self.cos_window.shape[1]) / feature.shape[1], 1),
                order=1)
            xl = np.multiply(xl, self.cos_window[:, :, None])
            xlf = np.fft.fft2(xl, axes=(0, 1))
            self.x_num[i] = (1 - self.interp_factor) * self.x_num[
                i] + self.interp_factor * np.multiply(self.yf[:, :, None],
                                                      np.conj(xlf))
            self.x_den[i] = (1 - self.interp_factor) * self.x_den[
                i] + self.interp_factor * np.real(
                    np.sum(np.multiply(xlf, np.conj(xlf)), axis=2))

        sl = utils.get_scale_subwindow(
            image, self.pos, self.target_size,
            self.current_scale_factor * self.scaleFactors, self.scale_window,
            self.scale_model_sz)
        slf = np.fft.fftn(sl, axes=[0])
        new_s_num = np.multiply(self.ysf[:, None], np.conj(slf))
        new_s_den = np.real(np.sum(np.multiply(slf, np.conj(slf)), axis=1))
        self.s_num = (1 - self.interp_factor
                      ) * self.s_num + self.interp_factor * new_s_num
        self.s_den = (1 - self.interp_factor
                      ) * self.s_den + self.interp_factor * new_s_den

        self.final_size = self.target_size * self.current_scale_factor

        return vot.Rectangle(self.pos[1] - self.final_size[1] / 2,
                             self.pos[0] - self.final_size[0] / 2,
                             self.final_size[1], self.final_size[0])
Beispiel #20
0
        # Extract pos/neg features
        pos_feats = forward_samples(model, image, pos_examples)
        neg_feats = forward_samples(model, image, neg_examples)
        pos_feats_all.append(pos_feats)
        neg_feats_all.append(neg_feats)
        if len(pos_feats_all) > opts['n_frames_long']:
            del pos_feats_all[0]
        if len(neg_feats_all) > opts['n_frames_short']:
            del neg_feats_all[0]

    # Short term update
    if not success:

        nframes = min(opts['n_frames_short'], len(pos_feats_all))
        pos_data = stackList(pos_feats_all[-nframes:])
        neg_data = stackList(neg_feats_all)
        train(model, criterion, update_optimizer, pos_data, neg_data,
              opts['maxiter_update'])

    # Long term update
    elif i % opts['long_interval'] == 0:
        pos_data = stackList(pos_feats_all)
        neg_data = stackList(neg_feats_all)
        train(model, criterion, update_optimizer, pos_data, neg_data,
              opts['maxiter_update'])

    region = vot.Rectangle(target_bbox[0] + 1, target_bbox[1] + 1,
                           target_bbox[2], target_bbox[3])

    handle.report(region)
Beispiel #21
0
    def track(self, imagepath):
        #Calculate the scaled params, scales are calculated in __init__ method.
        scaled_exemplar = self.z_sz * self.scale_factors
        scaled_search_area = self.x_sz * self.scale_factors
        scaled_target_w = self.target_w * self.scale_factors
        scaled_target_h = self.target_h * self.scale_factors

        #Calculate Siamese scores wrt template
        image_, scores_, scores_original_, templates_x_, templates_z_ = self.CalcSiamScores(
            imagepath, scaled_search_area)
        self.siam_ret['image_'] = image_
        # self.siam_ret['scores_'] = self.NormScoreVector(np.squeeze(scores_))
        self.siam_ret['scores_original'] = self.NormScoreVector(
            np.squeeze(scores_original_))
        self.siam_ret['scores'] = np.squeeze(scores_)
        # self.siam_ret['scores_original'] = np.squeeze(scores_original_)
        self.siam_ret['templates_x_'] = templates_x_
        self.siam_ret['templates_z_'] = templates_z_

        #Calcualate Color scores wrt template
        scores_ = self.CalcColorScores(imagepath, scaled_search_area)
        self.color_ret['scores_'] = self.NormScoreVector(np.squeeze(scores_))

        #Calculate weighted average of the scores
        alpha = 0.9
        scores_ = alpha * self.siam_ret['scores_original'] + (
            1.0 - alpha) * self.color_ret['scores_']
        scores_ = np.moveaxis(scores_, 0, -1)
        scores_ = cv2.resize(scores_,
                             dsize=(257, 257),
                             interpolation=cv2.INTER_CUBIC)
        scores_ = np.moveaxis(scores_, 2, 0)

        # penalize change of scale
        scores_[0, :, :] = self.hp.scale_penalty * scores_[0, :, :]
        scores_[2, :, :] = self.hp.scale_penalty * scores_[2, :, :]
        # find scale with highest peak (after penalty)
        new_scale_id = np.argmax(np.amax(scores_, axis=(1, 2)))
        # update scaled sizes
        self.x_sz = (
            1 - self.hp.scale_lr
        ) * self.x_sz + self.hp.scale_lr * scaled_search_area[new_scale_id]
        self.target_w = (
            1 - self.hp.scale_lr
        ) * self.target_w + self.hp.scale_lr * scaled_target_w[new_scale_id]
        self.target_h = (
            1 - self.hp.scale_lr
        ) * self.target_h + self.hp.scale_lr * scaled_target_h[new_scale_id]
        # select response with new_scale_id
        score_ = scores_[new_scale_id, :, :]
        score_ = score_ - np.min(score_)
        score_ = score_ / np.sum(score_)
        # apply displacement penalty
        score_ = (1 - self.hp.window_influence
                  ) * score_ + self.hp.window_influence * self.penalty
        # Calculate the new center location and confidence
        self.pos_x, self.pos_y, confidence = self._update_target_position(
            self.pos_x, self.pos_y, score_, self.final_score_sz,
            self.design.tot_stride, self.design.search_sz, self.hp.response_up,
            self.x_sz)

        # update the target representation with a rolling average
        if self.hp.z_lr > 0:
            with self.graph_siam.as_default():
                #Update siam tracker template
                new_templates_z_ = self.siam_sess.run(
                    [self.siam_params['templates_z']],
                    feed_dict={
                        siam.pos_x_ph: self.pos_x,
                        siam.pos_y_ph: self.pos_y,
                        siam.z_sz_ph: self.z_sz,
                        self.siam_params['image']: image_
                    })

            self.siam_ret['templates_z_'] = (1 - self.hp.z_lr) * np.asarray(
                self.siam_ret['templates_z_']) + self.hp.z_lr * np.asarray(
                    new_templates_z_)

            #Update color tracker template
            with self.graph_exemplar.as_default():
                new_templates_z_, z_crops_ = self.session_exemplar.run(
                    [
                        self.color_params['features_z'],
                        self.color_params['z_crops']
                    ],
                    feed_dict={
                        self.exemplar_ph['filename_ph']: imagepath,
                        self.exemplar_ph['pos_x_ph']: self.pos_x,
                        self.exemplar_ph['pos_y_ph']: self.pos_y,
                        self.exemplar_ph['z_sz_ph']: self.z_sz
                    })

            self.color_ret['templates_z_'] = (1 - self.hp.z_lr) * np.asarray(
                self.color_ret['templates_z_']) + self.hp.z_lr * np.asarray(
                    new_templates_z_)

        # update template patch size
        self.z_sz = (
            1 - self.hp.scale_lr
        ) * self.z_sz + self.hp.scale_lr * scaled_exemplar[new_scale_id]

        # convert <cx,cy,w,h> to <x,y,w,h> and save output
        return vot.Rectangle(self.pos_x - self.target_w / 2,
                             self.pos_y - self.target_h / 2, self.target_w,
                             self.target_h), confidence
Beispiel #22
0
    def track(self, image):
        self.i += 1

        cur_ori_img = image

        cur_img_array, win_loc, scale \
            = crop_search_region(cur_ori_img, self.last_gt, 300, mean_rgb=128)

        detection_box_ori, scores = self.sess.run(
            [self.pre_box_tensor, self.scores_tensor],
            feed_dict={
                self.input_cur_image: cur_img_array,
                self.initConstantOp: self.init_feature_maps
            })

        detection_box_ori[:,
                          0] = detection_box_ori[:, 0] * scale[0] + win_loc[0]
        detection_box_ori[:,
                          1] = detection_box_ori[:, 1] * scale[1] + win_loc[1]
        detection_box_ori[:,
                          2] = detection_box_ori[:, 2] * scale[0] + win_loc[0]
        detection_box_ori[:,
                          3] = detection_box_ori[:, 3] * scale[1] + win_loc[1]

        A_candis = (
            (detection_box_ori[:self.k, 3] - detection_box_ori[:self.k, 1]) *
            (detection_box_ori[:self.k, 2] - detection_box_ori[:self.k, 0]))

        A_lastgt = ((self.last_gt[3] - self.last_gt[1]) *
                    (self.last_gt[2] - self.last_gt[0]))
        x1 = np.maximum(detection_box_ori[:self.k, 1], self.last_gt[1])
        y1 = np.maximum(detection_box_ori[:self.k, 0], self.last_gt[0])
        x2 = np.minimum(detection_box_ori[:self.k, 3], self.last_gt[3])
        y2 = np.minimum(detection_box_ori[:self.k, 2], self.last_gt[2])
        inter = np.maximum((x2 - x1), 0) * np.maximum((y2 - y1), 0)
        IOU = inter / (A_candis + A_lastgt - inter)
        ID = np.arange(self.k)

        threshold = 0.4
        I_mask = IOU > threshold
        ID_iou = ID[I_mask]

        if np.sum(I_mask) > 0:

            best_idx = ID_iou[np.argmax(scores[0, :self.k][I_mask])]
        else:
            best_idx = 0

        search_box1 = detection_box_ori[best_idx]
        search_box1[0] = np.clip(search_box1[0], 0, cur_ori_img.shape[0] - 1)
        search_box1[2] = np.clip(search_box1[2], 0, cur_ori_img.shape[0] - 1)
        search_box1[1] = np.clip(search_box1[1], 0, cur_ori_img.shape[1] - 1)
        search_box1[3] = np.clip(search_box1[3], 0, cur_ori_img.shape[1] - 1)

        if (int(search_box1[0]) == int(search_box1[2])
                or int(search_box1[1]) == int(search_box1[3])):
            dist_min = self.LargeDist
        else:
            unscaled_win = image[int(search_box1[0]):int(search_box1[2]),
                                 int(search_box1[1]):int(search_box1[3])]
            win = cv2.resize(unscaled_win, (128, 128)).astype(np.float64)
            win -= self.mean
            win_input = win[np.newaxis, :]
            candidate_feat = self.sess.run(
                self.V_feat_op, feed_dict={self.V_image_op: win_input})

            dist_min = np.sum(np.square(self.template_feat - candidate_feat))

        # if score_max < self.classi_threshold:

        if dist_min > self.V_thres:
            search_box1 = detection_box_ori[:self.k]
            search_box = np.zeros_like(search_box1)  # x1 y1 x2 y2
            search_box[:, 0] = search_box1[:, 1]
            search_box[:, 1] = search_box1[:, 0]
            search_box[:, 2] = search_box1[:, 3]
            search_box[:, 3] = search_box1[:, 2]
            search_box[:, 2] = search_box[:, 2] - search_box[:, 0]  # x y w h
            search_box[:, 3] = search_box[:, 3] - search_box[:, 1]

            search_box[:, 2] = np.maximum(search_box[:, 2], 3)
            search_box[:, 3] = np.maximum(search_box[:, 3], 3)

            search_box[:, 0] = np.maximum(search_box[:, 0], 0)
            search_box[:, 1] = np.maximum(search_box[:, 1], 0)

            search_box[:, 0] = np.minimum(
                search_box[:, 0], cur_ori_img.shape[1] - search_box[:, 2] - 1)

            search_box[:, 1] = np.minimum(
                search_box[:, 1], cur_ori_img.shape[0] - search_box[:, 3] - 1)

            if scores[0, 0] > self.Object_thres_low:
                O_mask = (scores[0, :self.k] > self.Object_thres_low)
                ID_obj = ID[O_mask]
                num_object = int(np.sum(O_mask))

                win_input = np.zeros((num_object, 128, 128, 3))

                starty = search_box[O_mask, 1]
                startx = search_box[O_mask, 0]
                endy = search_box[O_mask, 3] + search_box[O_mask, 1]
                endx = search_box[O_mask, 2] + search_box[O_mask, 0]

                for i in range(num_object):
                    unscaled_win = image[int(starty[i]):int(endy[i]),
                                         int(startx[i]):int(endx[i])]
                    win_input[i] = cv2.resize(unscaled_win,
                                              (128, 128)).astype(np.float64)

                win_input = win_input - self.mean.reshape((1, 1, 1, 3))
                candidate_feats = self.sess.run(
                    self.V_feat_op, feed_dict={self.V_image_op: win_input})
                dists = np.sum(np.square(self.template_feat - candidate_feats),
                               axis=-1)

                dists1 = dists.copy()
                for i in range(num_object):
                    if ID_obj[i] not in ID_iou:
                        dists1[i] = self.LargeDist  # IOU < threshold

                if np.min(dists1) < self.V_thres:

                    best_idx = ID_obj[np.argmin(dists1)]
                    dist_min = np.min(dists1)
                elif np.min(dists) < self.V_thres:

                    best_idx = ID_obj[np.argmin(dists)]
                    dist_min = np.min(dists)
                else:

                    dist_min = self.LargeDist

        detection_box = detection_box_ori[best_idx]

        if scores[0, best_idx] < self.Object_thres_low:

            scores, best_idx, detection_box, dist_min \
                = self.center_search(
                cur_ori_img,
                (self.last_gt[2] - self.last_gt[0]),
                (self.last_gt[3] - self.last_gt[1]),
                scores, best_idx, detection_box, dist_min)

            if dist_min > self.V_thres:

                scores, best_idx, detection_box, dist_min \
                    = self.center_search(
                    cur_ori_img,
                    self.first_h,
                    self.first_w,
                    scores, best_idx, detection_box, dist_min)

            if dist_min > self.V_thres:

                scores, best_idx, detection_box, dist_min \
                    = self.center_search(
                    cur_ori_img,
                    self.first_h / 2.0,
                    self.first_w / 2.0,
                    scores, best_idx, detection_box, dist_min)

            if dist_min > self.V_thres:

                scores, best_idx, detection_box, dist_min \
                    = self.center_search(
                    cur_ori_img,
                    self.first_h * 2.0,
                    self.first_w * 2.0,
                    scores, best_idx, detection_box, dist_min)

        # print scores[0,max_idx]
        if scores[0, best_idx] < self.Object_thres_low:
            """-------------------------------------------------------------------------"""
            softmax_test_, pos_i = gen_search_patch_Hao(
                cur_ori_img, self.first_w, self.first_h)
            softmax_test = softmax_test_.astype(np.float32)
            print 'global'
            batch_sz = 64

            if softmax_test.shape[0] <= batch_sz:
                kk = softmax_test
                cls_out = self.branch_search.predict(
                    [self.z_feat.repeat(kk.shape[0], axis=0), kk]).reshape(-1)

            elif softmax_test.shape[0] > batch_sz:
                cls_out_list = []

                for_i = softmax_test.shape[0] / batch_sz
                for jj in range(for_i):
                    kk = softmax_test[batch_sz * jj:batch_sz * (jj + 1)]
                    cls_out_list.append(
                        self.branch_search.predict(
                            [self.z_feat.repeat(kk.shape[0], axis=0),
                             kk]).reshape(-1))

                if softmax_test.shape[0] % batch_sz == 0:
                    pass
                else:
                    kk = softmax_test[batch_sz * (jj + 1):]
                    cls_out_list.append(
                        self.branch_search.predict(
                            [self.z_feat.repeat(kk.shape[0], axis=0),
                             kk]).reshape(-1))

                cls_out = np.concatenate(cls_out_list)

            search_rank = np.argsort(-cls_out)
            pos_i = pos_i[search_rank]
            cls_out = cls_out[search_rank]
            """-------------------------------------------------------------------------"""

            self.SEARCH_K = np.minimum(pos_i.shape[0], self.SEARCH_K)
            if self.SEARCH_K > 1:
                search_num = self.SEARCH_K - 1
            else:
                search_num = 1

            detection_box1_all = np.zeros([search_num, 4])
            scores1_all = np.zeros([1, search_num])

            for s_i in range(search_num):
                search_gt = pos_i[s_i]

                cur_img_array1, win_loc1, scale1 \
                    = crop_search_region(cur_ori_img, search_gt, 300, mean_rgb=128)
                detection_box1, scores1 = self.sess.run(
                    [self.pre_box_tensor, self.scores_tensor],
                    feed_dict={
                        self.input_cur_image: cur_img_array1,
                        self.initConstantOp: self.init_feature_maps
                    })

                detection_box1[
                    0, 0] = detection_box1[0, 0] * scale1[0] + win_loc1[0]
                detection_box1[
                    0, 1] = detection_box1[0, 1] * scale1[1] + win_loc1[1]
                detection_box1[
                    0, 2] = detection_box1[0, 2] * scale1[0] + win_loc1[0]
                detection_box1[
                    0, 3] = detection_box1[0, 3] * scale1[1] + win_loc1[1]

                scores1_all[0, s_i] = scores1[0, 0]
                detection_box1_all[s_i] = detection_box1[0].copy()

            rank_idx = np.argsort(-scores1_all).reshape(-1)
            scores1 = scores1_all[:, rank_idx]
            detection_box1 = detection_box1_all[rank_idx, :]

            if scores1[0, 0] > self.Object_thres_high:
                detection_box_ori = detection_box1.copy()
                # max_idx = 0
                search_box1 = detection_box_ori[0]
                search_box1[0] = np.clip(search_box1[0], 0,
                                         cur_ori_img.shape[0] - 1)
                search_box1[2] = np.clip(search_box1[2], 0,
                                         cur_ori_img.shape[0] - 1)
                search_box1[1] = np.clip(search_box1[1], 0,
                                         cur_ori_img.shape[1] - 1)
                search_box1[3] = np.clip(search_box1[3], 0,
                                         cur_ori_img.shape[1] - 1)
                if (int(search_box1[0]) == int(search_box1[2])
                        or int(search_box1[1]) == int(search_box1[3])):
                    # score_max = -1
                    # score_max = 0  # 0 is the minimum score for SINT
                    dist_min = self.LargeDist
                else:
                    search_box1 = [
                        search_box1[1], search_box1[0],
                        search_box1[3] - search_box1[1],
                        search_box1[2] - search_box1[0]
                    ]

                    search_box1 = np.reshape(search_box1, (4, ))

                    unscaled_win = image[
                        int(search_box1[1]):int(search_box1[3] +
                                                search_box1[1]),
                        int(search_box1[0]):int(search_box1[2] +
                                                search_box1[0])]

                    win = cv2.resize(unscaled_win,
                                     (128, 128)).astype(np.float64)
                    win -= self.mean
                    win_input = win[np.newaxis, :]
                    candidate_feat = self.sess.run(
                        self.V_feat_op, feed_dict={self.V_image_op: win_input})

                    dist_min = np.sum(
                        np.square(self.template_feat - candidate_feat))

                if dist_min < self.global_V_thres:

                    scores = scores1.copy()
                    best_idx = 0
                    detection_box = detection_box_ori[best_idx]

                elif dist_min > self.global_V_thres and self.SEARCH_K - search_num > 0:

                    search_gt = pos_i[search_num]

                    cur_img_array1, win_loc1, scale1 \
                        = crop_search_region(cur_ori_img, search_gt, 300, mean_rgb=128)
                    detection_box1, scores1 = self.sess.run(
                        [self.pre_box_tensor, self.scores_tensor],
                        feed_dict={
                            self.input_cur_image: cur_img_array1,
                            self.initConstantOp: self.init_feature_maps
                        })

                    detection_box1[
                        0, 0] = detection_box1[0, 0] * scale1[0] + win_loc1[0]
                    detection_box1[
                        0, 1] = detection_box1[0, 1] * scale1[1] + win_loc1[1]
                    detection_box1[
                        0, 2] = detection_box1[0, 2] * scale1[0] + win_loc1[0]
                    detection_box1[
                        0, 3] = detection_box1[0, 3] * scale1[1] + win_loc1[1]
                    detection_box_ori = detection_box1.copy()
                    # max_idx = 0
                    search_box1 = detection_box_ori[0]
                    search_box1[0] = np.clip(search_box1[0], 0,
                                             cur_ori_img.shape[0] - 1)
                    search_box1[2] = np.clip(search_box1[2], 0,
                                             cur_ori_img.shape[0] - 1)
                    search_box1[1] = np.clip(search_box1[1], 0,
                                             cur_ori_img.shape[1] - 1)
                    search_box1[3] = np.clip(search_box1[3], 0,
                                             cur_ori_img.shape[1] - 1)
                    if (int(search_box1[0]) == int(search_box1[2])
                            or int(search_box1[1]) == int(search_box1[3])):
                        dist_min = self.LargeDist
                    else:
                        search_box1 = [
                            search_box1[1], search_box1[0],
                            search_box1[3] - search_box1[1],
                            search_box1[2] - search_box1[0]
                        ]

                        search_box1 = np.reshape(search_box1, (4, ))

                        unscaled_win = image[
                            int(search_box1[1]):int(search_box1[3] +
                                                    search_box1[1]),
                            int(search_box1[0]):int(search_box1[2] +
                                                    search_box1[0])]

                        win = cv2.resize(unscaled_win,
                                         (128, 128)).astype(np.float64)
                        win -= self.mean
                        win_input = win[np.newaxis, :]
                        candidate_feat = self.sess.run(
                            self.V_feat_op,
                            feed_dict={self.V_image_op: win_input})

                        dist_min = np.sum(
                            np.square(self.template_feat - candidate_feat))

                    if dist_min < self.global_V_thres:

                        scores = scores1.copy()
                        best_idx = 0
                        detection_box = detection_box_ori[best_idx]

        if scores[0, best_idx] < self.Object_thres_low:

            x_c = (detection_box[3] + detection_box[1]) / 2.0
            y_c = (detection_box[0] + detection_box[2]) / 2.0
            w1 = self.last_gt[3] - self.last_gt[1]
            h1 = self.last_gt[2] - self.last_gt[0]
            x1 = x_c - w1 / 2.0
            y1 = y_c - h1 / 2.0
            x2 = x_c + w1 / 2.0
            y2 = y_c + h1 / 2.0
            self.last_gt = np.float32([y1, x1, y2, x2])
        else:

            self.last_gt = detection_box
            self.target_w = detection_box[3] - detection_box[1]
            self.target_h = detection_box[2] - detection_box[0]

        if self.last_gt[0] < 0:
            self.last_gt[0] = 0
            self.last_gt[2] = self.target_h
        if self.last_gt[1] < 0:
            self.last_gt[1] = 0
            self.last_gt[3] = self.target_w
        if self.last_gt[2] > cur_ori_img.shape[0]:
            self.last_gt[2] = cur_ori_img.shape[0] - 1
            self.last_gt[0] = cur_ori_img.shape[0] - 1 - self.target_h
        if self.last_gt[3] > cur_ori_img.shape[1]:
            self.last_gt[3] = cur_ori_img.shape[1] - 1
            self.last_gt[1] = cur_ori_img.shape[1] - 1 - self.target_w

        self.target_w = (self.last_gt[3] - self.last_gt[1])
        self.target_h = (self.last_gt[2] - self.last_gt[0])

        width = self.last_gt[3] - self.last_gt[1]
        height = self.last_gt[2] - self.last_gt[0]

        if self.dis:
            show_res(image,
                     np.array(self.last_gt, dtype=np.int32),
                     '2',
                     score=scores[0, best_idx],
                     score_max=dist_min)

        if (scores[0, best_idx] > self.Object_thres_high
                and dist_min < self.V_thres):

            confidence_score = 0.99

        elif (scores[0, best_idx] < self.Object_thres_low
              and dist_min > self.V_thres):

            confidence_score = np.nan

        elif dist_min < self.EXTREM:

            confidence_score = 0.99

        else:
            confidence_score = scores[0, best_idx]

        if self.vot:
            return vot.Rectangle(float(self.last_gt[1]),
                                 float(self.last_gt[0]), float(width),
                                 float(height)), confidence_score
        else:
            return np.array([
                float(self.last_gt[1]),
                float(self.last_gt[0]),
                float(width),
                float(height)
            ]), confidence_score
Beispiel #23
0
    def track(self, img):
        # print('entry')
        params['height'], params['width'] = img.shape[:2]
        curr_bbox_old = self.curr_bbox
        self.move_counter = 0
        target_score = 0

        num_action_step_max = 20
        bb_step = np.zeros([num_action_step_max, 4])
        score_step = np.zeros([num_action_step_max, 1])
        self.is_negative = False
        prev_score = -9999
        self.this_actions = np.zeros([params['num_show_actions'], 1])
        action_history_oh_old = self.action_history_oh

        while (self.move_counter < num_action_step_max):
            bb_step[self.move_counter] = self.curr_bbox
            score_step[self.move_counter] = prev_score

            self.action_history_oh *= 0

            for i, act in enumerate(
                    self.action_history[:params['num_action_history']]):
                if act < 11:
                    self.action_history_oh[i, int(act)] = 1

            pred, pred_score = sess.run(
                [nodes['action'], nodes['soft_conf']],
                feed_dict={
                    nodes['image']: [img],
                    nodes['cropped']:
                    1.0,
                    nodes['full_training']:
                    1.0,
                    nodes['boxes_ind']:
                    np.array([0]),
                    nodes['boxes']:
                    tutil.refine_box(np.expand_dims(self.curr_bbox, 0),
                                     params),
                    nodes['action_hist']:
                    self.action_history_oh.reshape(1, -1)
                })
            curr_score = pred_score[0, 1]
            max_action = np.argmax(pred[0])
            if (curr_score < params['failedThre']):
                self.is_negative = True
                curr_score = prev_score
                self.action_history[1:] = self.action_history[:-1]
                self.action_history[0] = 12
                self.cont_negatives += 1
                break

            self.curr_bbox = tutil.do_action(self.curr_bbox, max_action,
                                             params)

            if ((len(
                    np.where(
                        np.sum(
                            np.equal(np.round(bb_step), np.round(
                                self.curr_bbox)), 1) == 4)[0]) > 0)
                    & (max_action != params['stop_action'])):
                max_action = params['stop_action']

            self.action_history[1:] = self.action_history[:-1]
            self.action_history[0] = max_action
            target_score = curr_score

            if max_action == params['stop_action']:
                break

            self.move_counter += 1
            prev_score = curr_score

        #%% Tracking Fail --> Re-detection
        if ((self.f > 0) & (self.is_negative == True)):
            #                        print (f)
            #                        cv2.waitKey(0)
            self.total_pos_data['%d' % self.f] = np.zeros([0, 3, 3, 512])
            self.total_neg_data['%d' % self.f] = np.zeros([0, 3, 3, 512])
            self.total_pos_action_labels['%d' % self.f] = np.zeros([0, 11])
            self.total_pos_examples['%d' % self.f] = np.zeros([0, 4])
            self.total_neg_examples['%d' % self.f] = np.zeros([0, 4])

            samples_redet = tutil.gen_samples(
                'gaussian', curr_bbox_old, params['redet_samples'], params,
                min(1.5, 0.6 * 1.15**self.cont_negatives),
                params['finetune_scale_factor'])
            red_score_pred = sess.run(
                nodes['soft_conf'],
                feed_dict={
                    nodes['image']: [img],
                    nodes['cropped']:
                    1.0,
                    nodes['full_training']:
                    1.0,
                    nodes['boxes_ind']:
                    np.array([0] * samples_redet.shape[0]),
                    nodes['boxes']:
                    tutil.refine_box(samples_redet, params),
                    nodes['action_hist']:
                    np.vstack([self.action_history_oh.reshape(1, -1)] *
                              samples_redet.shape[0]),
                    nodes['is_training']:
                    0.0
                })

            idx = np.lexsort(
                (np.array(range(params['redet_samples'])), red_score_pred[:,
                                                                          1]))
            target_score = np.mean(red_score_pred[(idx[-5:]), 1])
            if target_score > curr_score:
                self.curr_bbox = np.mean(samples_redet[(idx[-5:]), :], 0)
            self.move_counter += params['redet_samples']

        #%% Tracking Success --> generate samples
        if ((self.f > 0) & ((self.is_negative == False) |
                            (target_score > params['successThre']))):
            self.cont_negatives = 0
            self.pos_examples = tutil.gen_samples(
                'gaussian', self.curr_bbox, params['pos_on'] * 2, params,
                params['finetune_trans'], params['finetune_scale_factor'])
            self.r = tutil.overlap_ratio(self.pos_examples, self.curr_bbox)
            self.pos_examples = self.pos_examples[np.where(
                self.r > params['pos_thr_on'])]
            self.pos_examples = self.pos_examples[np.random.choice(
                self.pos_examples.shape[0],
                min(params['pos_on'], self.pos_examples.shape[0]),
                replace=False)]

            self.neg_examples = tutil.gen_samples('uniform', self.curr_bbox,
                                                  params['neg_on'] * 2, params,
                                                  2, 5)
            self.r = tutil.overlap_ratio(self.neg_examples, self.curr_bbox)
            self.neg_examples = self.neg_examples[np.where(
                self.r < params['neg_thr_on'])]
            self.neg_examples = self.neg_examples[np.random.choice(
                self.neg_examples.shape[0],
                min(params['neg_on'], self.neg_examples.shape[0]),
                replace=False)]

            self.examples = np.vstack((self.pos_examples, self.neg_examples))
            self.feat_conv = tutil.get_conv_feature(
                sess,
                nodes['conv_feat'],
                feed_dict={
                    nodes['cropped']: 1.0,
                    nodes['boxes_ind']: np.array([0] * self.examples.shape[0]),
                    nodes['image']: [img],
                    nodes['boxes']: tutil.refine_box(self.examples, params)
                })

            self.total_pos_data[
                '%d' % self.f] = self.feat_conv[:self.pos_examples.shape[0]]
            self.total_neg_data[
                '%d' % self.f] = self.feat_conv[self.pos_examples.shape[0]:]

            self.pos_action_labels = tutil.gen_action_labels(
                params, self.pos_examples, self.curr_bbox)

            self.total_pos_action_labels['%d' %
                                         self.f] = self.pos_action_labels
            self.total_pos_examples['%d' % self.f] = self.pos_examples
            self.total_neg_examples['%d' % self.f] = self.neg_examples

            self.frame_window.append(self.f)

            if (len(self.frame_window) > params['frame_long']):
                self.total_pos_data[
                    '%d' %
                    self.frame_window[-params['frame_long']]] = np.zeros(
                        [0, 3, 3, 512])
                self.total_pos_action_labels[
                    '%d' %
                    self.frame_window[-params['frame_long']]] = np.zeros(
                        [0, 11])
                self.total_pos_examples[
                    '%d' %
                    self.frame_window[-params['frame_long']]] = np.zeros(
                        [0, 4])

            if (len(self.frame_window) > params['frame_short']):
                self.total_neg_data[
                    '%d' %
                    self.frame_window[-params['frame_short']]] = np.zeros(
                        [0, 3, 3, 512])
                self.total_neg_examples[
                    '%d' %
                    self.frame_window[-params['frame_short']]] = np.zeros(
                        [0, 4])

        #%% Do online-training
        if (((self.f + 1) % params['iterval'] == 0) |
            (self.is_negative == True)):
            if (self.f + 1) % params['iterval'] == 0:
                f_st = max(0, len(self.frame_window) - params['frame_long'])

                self.pos_data = []
                self.pos_action_labels = []
                for wind in self.frame_window[f_st:]:
                    self.pos_data.append(self.total_pos_data['%d' % wind])
                    self.pos_action_labels.append(
                        self.total_pos_action_labels['%d' % wind])

                self.pos_data = np.vstack(self.pos_data)
                self.pos_action_labels = np.vstack(self.pos_action_labels)

            else:
                f_st = max(0, len(self.frame_window) - params['frame_short'])

                self.pos_data = []
                self.pos_action_labels = []
                for wind in self.frame_window[f_st:]:
                    self.pos_data.append(self.total_pos_data['%d' % wind])
                    self.pos_action_labels.append(
                        self.total_pos_action_labels['%d' % wind])

                self.pos_data = np.vstack(self.pos_data)
                self.pos_action_labels = np.vstack(self.pos_action_labels)

            f_st = max(0, len(self.frame_window) - params['frame_short'])
            self.neg_data = []
            for wind in self.frame_window[f_st:]:
                self.neg_data.append(self.total_neg_data['%d' % wind])

            self.neg_data = np.vstack(self.neg_data)

            self.feat_conv = np.vstack((self.pos_data, self.neg_data))
            #                            if check == 5:
            _ = sess.run(variables, feed_dict={reset: 1.0})
            #                                check = 0
            iteration = params['iter_on']
            #                            if self.is_negative:
            #                                iteration = params['iter_on']//2
            tutil.train_fc(sess, nodes, self.feat_conv, self.pos_action_labels,
                           iteration, params, params['on_learning_rate'])

        self.full_history.append(self.curr_bbox)
        self.full_gt.append(gt)
        self.total_moves += self.move_counter

        frame = np.copy(img)
        # frame = cv2.rectangle(frame,(int(gt[0]),int(gt[1])),
        #                             (int(gt[0]+gt[2]),int(gt[1]+gt[3])),[0,0,255],2)
        frame = cv2.rectangle(frame,
                              (int(self.curr_bbox[0]), int(self.curr_bbox[1])),
                              (int(self.curr_bbox[0] + self.curr_bbox[2]),
                               int(self.curr_bbox[1] + self.curr_bbox[3])),
                              [255, 0, 0], 2)

        # cv2.imwrite('results/'+frames[self.f][-8:],frame)
        # cv2.imshow('f',frame)
        # key = cv2.waitKey(1) & 0xff
        # if key == ord('s'):
        # 	return
        self.f += 1
        max_val = .99
        return vot.Rectangle(self.curr_bbox[0], self.curr_bbox[1],
                             self.curr_bbox[2], self.curr_bbox[3]), max_val
Beispiel #24
0
    def tracking(self, image):
        self.i += 1
        mask = None
        candidate_bboxes = None
        # state, pyscore = self.pymdnet_track(image)
        # self.last_gt = [state[1], state[0], state[1] + state[3], state[0] + state[2]]
        self.local_Tracker.pos = torch.FloatTensor(
            [(self.last_gt[0] + self.last_gt[2] - 1) / 2, (self.last_gt[1] + self.last_gt[3] - 1) / 2])
        self.local_Tracker.target_sz = torch.FloatTensor(
            [(self.last_gt[2] - self.last_gt[0]), (self.last_gt[3] - self.last_gt[1])])
        tic = time.time()
        local_state, self.score_map, update, local_score, dis, flag = self.local_track(image)

        md_score = self.pymdnet_eval(image, np.array(local_state).reshape([-1, 4]))[0]
        self.score_max = md_score

        if md_score > 0 and flag == 'normal':
            self.flag = 'found'
            if self.p.use_mask:
                self.siamstate['target_pos'] = self.local_Tracker.pos.numpy()[::-1]
                self.siamstate['target_sz'] = self.local_Tracker.target_sz.numpy()[::-1]
                siamscore, mask = self.siammask_track(cv2.cvtColor(image, cv2.COLOR_RGB2BGR))
                self.local_Tracker.pos = torch.FloatTensor(self.siamstate['target_pos'][::-1].copy())
                self.local_Tracker.target_sz = torch.FloatTensor(self.siamstate['target_sz'][::-1].copy())
                local_state = torch.cat(
                    (self.local_Tracker.pos[[1, 0]] - (self.local_Tracker.target_sz[[1, 0]] - 1) / 2,
                     self.local_Tracker.target_sz[[1, 0]])).data.cpu().numpy()
            self.last_gt = np.array(
                [local_state[1], local_state[0], local_state[1] + local_state[3], local_state[0] + local_state[2]])
        elif md_score < 0 or flag == 'not_found':
            self.count += 1
            self.flag = 'not_found'
            candidate_bboxes = self.Global_Track_eval(image, 10)
            candidate_scores = self.pymdnet_eval(image, candidate_bboxes)
            max_id = np.argmax(candidate_scores)
            if candidate_scores[max_id] > 0:
                redet_bboxes = candidate_bboxes[max_id]
                if self.count >= 5:
                    self.last_gt = np.array([redet_bboxes[1], redet_bboxes[0], redet_bboxes[1] + redet_bboxes[3],
                                             redet_bboxes[2] + redet_bboxes[0]])
                    self.local_Tracker.pos = torch.FloatTensor(
                        [(self.last_gt[0] + self.last_gt[2] - 1) / 2, (self.last_gt[1] + self.last_gt[3] - 1) / 2])
                    self.local_Tracker.target_sz = torch.FloatTensor(
                        [(self.last_gt[2] - self.last_gt[0]), (self.last_gt[3] - self.last_gt[1])])
                    self.score_max = candidate_scores[max_id]
                    self.count = 0
        if update:
            self.collect_samples_pymdnet(image)

        self.pymdnet_long_term_update()

        width = self.last_gt[3] - self.last_gt[1]
        height = self.last_gt[2] - self.last_gt[0]
        toc = time.time() - tic
        print(toc)
        # if self.flag == 'found' and self.score_max > 0:
        #     confidence_score = 0.99
        # elif self.flag == 'not_found':
        #     confidence_score = 0.0
        # else:
        #     confidence_score = np.clip((local_score+np.arctan(0.2*self.score_max)/math.pi+0.5)/2, 0, 1)
        confidence_score = np.clip((local_score + np.arctan(0.2 * self.score_max) / math.pi + 0.5) / 2, 0, 1)
        if self.p.visualization:
            show_res(cv2.cvtColor(image, cv2.COLOR_RGB2BGR), np.array(self.last_gt, dtype=np.int32), '2',
                     update=update, can_bboxes=candidate_bboxes,
                     frame_id=self.i, tracker_score=md_score, mask=mask)

        return vot.Rectangle(float(self.last_gt[1]), float(self.last_gt[0]), float(width),
                float(height)), confidence_score
Beispiel #25
0
    def track(self, image_path):
        #Calculate the scaled params, scales are calculated in __init__ method.
        scaled_exemplar = self.z_sz * self.scale_factors
        scaled_search_area = self.x_sz * self.scale_factors
        scaled_target_w = self.target_w * self.scale_factors
        scaled_target_h = self.target_h * self.scale_factors

        #Run the network
        with self.sess.as_default():
            image_, scores_, scores_original_, self.templates_x_, self.templates_z_ = self.sess.run(
                [
                    self.image, self.scores, self.scores_original,
                    self.templates_x, self.templates_z
                ],
                feed_dict={
                    siam.pos_x_ph: self.pos_x,
                    siam.pos_y_ph: self.pos_y,
                    siam.x_sz0_ph: scaled_search_area[0],
                    siam.x_sz1_ph: scaled_search_area[1],
                    siam.x_sz2_ph: scaled_search_area[2],
                    self.templates_z: np.squeeze(self.templates_z_),
                    self.filename: image_path,
                },
                **self.run_opts)
            scores_ = np.squeeze(scores_)
            # penalize change of scale
            scores_[0, :, :] = self.hp.scale_penalty * scores_[0, :, :]
            scores_[2, :, :] = self.hp.scale_penalty * scores_[2, :, :]
            # find scale with highest peak (after penalty)
            new_scale_id = np.argmax(np.amax(scores_, axis=(1, 2)))
            # update scaled sizes
            self.x_sz = (
                1 - self.hp.scale_lr
            ) * self.x_sz + self.hp.scale_lr * scaled_search_area[new_scale_id]
            self.target_w = (
                1 - self.hp.scale_lr
            ) * self.target_w + self.hp.scale_lr * scaled_target_w[new_scale_id]
            self.target_h = (
                1 - self.hp.scale_lr
            ) * self.target_h + self.hp.scale_lr * scaled_target_h[new_scale_id]
            # select response with new_scale_id
            score_ = scores_[new_scale_id, :, :]
            score_ = score_ - np.min(score_)
            score_ = score_ / np.sum(score_)
            # apply displacement penalty
            score_ = (1 - self.hp.window_influence
                      ) * score_ + self.hp.window_influence * self.penalty
            #Calculate the new center location and confidence
            self.pos_x, self.pos_y, confidence = self._update_target_position(
                self.pos_x, self.pos_y, score_, self.final_score_sz,
                self.design.tot_stride, self.design.search_sz,
                self.hp.response_up, self.x_sz)

            # update the target representation with a rolling average
            if self.hp.z_lr > 0:
                new_templates_z_ = self.sess.run(
                    [self.templates_z],
                    feed_dict={
                        siam.pos_x_ph: self.pos_x,
                        siam.pos_y_ph: self.pos_y,
                        siam.z_sz_ph: self.z_sz,
                        self.image: image_
                    })

                self.templates_z_ = (1 - self.hp.z_lr) * np.asarray(
                    self.templates_z_) + self.hp.z_lr * np.asarray(
                        new_templates_z_)

            # update template patch size
            self.z_sz = (
                1 - self.hp.scale_lr
            ) * self.z_sz + self.hp.scale_lr * scaled_exemplar[new_scale_id]

            # convert <cx,cy,w,h> to <x,y,w,h> and save output
            return vot.Rectangle(self.pos_x - self.target_w / 2,
                                 self.pos_y - self.target_h / 2, self.target_w,
                                 self.target_h), confidence
Beispiel #26
0
    def track(self, image):
        print(self.cell_size)
        start3 = time.time()
    

        test_crop = utils.get_subwindow(image, self.pos, self.patch_size)
        cv2.imshow('hahaha',test_crop)

        #hog_feature_t = pyhog.features_pedro(test_crop / 255., self.cell_size)
        #hog_feature_t = np.lib.pad(hog_feature_t, ((1, 1), (1, 1), (0, 0)), 'edge')
        #hog_feature_t = hog_feature_t[:,:,0:18]
        hog_feature_t = utils.get_feature_map(test_crop,self.feature_list,self.num_feature_ch,self.patch_size_cell,self.w2c,
            self.cell_size,self.projection_matrix)

        z = np.multiply(hog_feature_t, self.cos_window[:, :, None])
        print(z.shape)
        zf = np.fft.fft2(z, axes=(0, 1))
        angle_index_series = (np.array((self.angle_index-1,self.angle_index,self.angle_index+1))+24)%24
        response_map_series = np.zeros((24,np.int(self.patch_size_cell[0]),np.int(self.patch_size_cell[1])))
        j = 0

        end3 = time.time()
        print ('生成检测用时:'+str(end3-start3))
        start4 = time.time()
    
        for i in angle_index_series:
            k_test = utils.dense_gauss_kernel(self.feature_bandwidth_sigma,self.xf_sequence[i,:,:,:],self.x_sequence[i,:,:,:],zf,z)
            kf_test = np.fft.fft2(k_test, axes=(0, 1))
            alphaf_test = self.filter_sequence[i,:,:]
            response = np.real(np.fft.ifft2(np.multiply(alphaf_test, kf_test)))
            response_map_series[i,:,:] = response
            
            #plt.imshow(response, extent=[0, 1, 0, 1])
            self.response_series[j] = np.max(response)
            self.v_centre[j], self.h_centre[j] = np.unravel_index(response.argmax(), response.shape)
            j = j + 1
        print('response_series')
        f2.write(str(np.max(self.response_series))+'\n')
        max_response_index = np.where(self.response_series==np.max(self.response_series))[0][0]
        print(self.response_series)
        v = self.v_centre[max_response_index]
        h = self.h_centre[max_response_index]
        self.angle_index = angle_index_series[max_response_index]
        response4show = np.reshape(response_map_series[self.angle_index,:,:],
            (np.int(self.patch_size_cell[0]),np.int(self.patch_size_cell[1])))
        cv2.imshow('response',response4show)
        #plt.matshow(response4show)
        #plt.colorbar()
        #plt.show()
        #plt.pause(0.033)
        print(self.angle_index)
        print(self.angle_index)
        print(self.angle_index)
        print(self.angle_index)
        print(self.angle_index)
        print(self.angle_index)
        f1.write(str(self.angle_index)+'\n')
        end4 = time.time()
        print ('三个滤波器用时:'+str(end4-start4))
        vert_delta, horiz_delta = [v - response.shape[0] / 2,h - response.shape[1] / 2]
        self.pos = [self.pos[0] + vert_delta*self.cell_size, self.pos[1] + horiz_delta*self.cell_size]
       
        return vot.Rectangle(self.pos[1] - self.target_size[1] / 2,
                             self.pos[0] - self.target_size[0] / 2,
                             self.target_size[1],
                             self.target_size[0]
                            )
Beispiel #27
0
handle = vot.VOT("polygon")
selection = handle.region()
selection = selection.points
gt_bbox = []
for i in selection:
    gt_bbox.append(i.x)
    gt_bbox.append(i.y)
if len(gt_bbox) == 4:
    gt_bbox = [
        gt_bbox[0], gt_bbox[1], gt_bbox[0], gt_bbox[1] + gt_bbox[3] - 1,
        gt_bbox[0] + gt_bbox[2] - 1, gt_bbox[1] + gt_bbox[3] - 1,
        gt_bbox[0] + gt_bbox[2] - 1, gt_bbox[1]
    ]
cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox))
gt_bbox = [cx - (w - 1) / 2, cy - (h - 1) / 2, w, h]
imagefile = handle.frame()
if not imagefile:
    sys.exit(0)
image = cv.cvtColor(cv.imread(imagefile), cv.COLOR_BGR2RGB)
params = parameters()
tracker = DRNet(params)
tracker.initialize(image, list(gt_bbox))
while True:
    imagefile = handle.frame()
    if not imagefile:
        break
    image = cv.cvtColor(cv.imread(imagefile), cv.COLOR_BGR2RGB)
    region = tracker.track(image)
    region = vot.Rectangle(region[0], region[1], region[2], region[3])
    handle.report(region, 0.9)
Beispiel #28
0
    def track(self, image):

        res_rect = self._tracker.track(image)

        return vot.Rectangle(res_rect.x, res_rect.y, res_rect.w, res_rect.h)
Beispiel #29
0
#      Then get the initializaton region
#      and the first image
# *****************************************

handle = vot.VOT("rectangle")
selection = handle.region()

# Process the first frame
imagefile = handle.frame()
tracker = SiamVGGTracker(imagefile, selection)

if not imagefile:
    sys.exit(0)

while True:
    # *****************************************
    # VOT: Call frame method to get path of the
    #      current image frame. If the result is
    #      null, the sequence is over.
    # *****************************************
    imagefile = handle.frame()
    if not imagefile:
        break
    region, confidence = tracker.track(imagefile)
    region = vot.Rectangle(region.x, region.y, region.width, region.height)
    # *****************************************
    # VOT: Report the position of the object
    #      every frame using report method.
    # *****************************************
    handle.report(region, confidence)
Beispiel #30
0
    def track(self, image_curr, tracknet, velocity, sess):
        """TODO: Docstring for tracker.
        :returns: TODO

        """
        target_pad, _, _, _ = cropPadImage(self.bbox_prev_tight,
                                           self.image_prev)
        cur_search_region, search_location, edge_spacing_x, edge_spacing_y = cropPadImage(
            self.bbox_curr_prior_tight, image_curr)

        # image, BGR(training type)
        cur_search_region_resize = self.preprocess(cur_search_region)
        target_pad_resize = self.preprocess(target_pad)

        cur_search_region_expdim = np.expand_dims(cur_search_region_resize,
                                                  axis=0)
        target_pad_expdim = np.expand_dims(target_pad_resize, axis=0)

        re_fc4_image, fc4_adj = sess.run(
            [tracknet.re_fc4_image, tracknet.fc4_adj],
            feed_dict={
                tracknet.image: cur_search_region_expdim,
                tracknet.target: target_pad_expdim
            })
        bbox_estimate, object_bool, objectness = calculate_box(
            re_fc4_image, fc4_adj)

        print('objectness_s is: ', objectness)

        ########### original method ############
        # this box is NMS result, TODO, all bbox check

        if not len(bbox_estimate) == 0:
            bbox_estimate = BoundingBox(bbox_estimate[0][0],
                                        bbox_estimate[0][1],
                                        bbox_estimate[0][2],
                                        bbox_estimate[0][3])

            # Inplace correction of bounding box
            bbox_estimate.unscale(cur_search_region)
            bbox_estimate.uncenter(image_curr, search_location, edge_spacing_x,
                                   edge_spacing_y)

            # self.image_prev = image_curr
            # self.bbox_prev_tight = bbox_estimate
            self.bbox_curr_prior_tight = bbox_estimate
        else:
            # self.image_prev = self.image_prev
            # self.bbox_prev_tight = self.bbox_prev_tight
            self.bbox_curr_prior_tight = self.bbox_curr_prior_tight
            bbox_estimate = self.bbox_curr_prior_tight

        ########### original method ############

        ############ trick method ############

        # if object_bool:
        # # if not len(bbox_estimate) == 0:
        #     # current_box_wh = np.array([(bbox_estimate.[0][2] - bbox_estimate.[0][0]), (bbox_estimate.[0][3] - bbox_estimate.[0][1])], dtype=np.float32)
        #     # prev_box_wh = np.array([5., 5.], dtype=np.float32)
        #
        #     bbox_estimate = BoundingBox(bbox_estimate[0][0], bbox_estimate[0][1], bbox_estimate[0][2], bbox_estimate[0][3])
        #
        #     # relative distance from center point [5. 5.]
        #     relative_current_box = np.array([(bbox_estimate.x2 + bbox_estimate.x1) / 2,
        #                             (bbox_estimate.y2 + bbox_estimate.y1) / 2],
        #                            dtype=np.float32)
        #     relative_distance = np.linalg.norm(relative_current_box - np.array([5., 5.]))
        #
        #     # Inplace correction of bounding box
        #     bbox_estimate.unscale(cur_search_region)
        #     bbox_estimate.uncenter(image_curr, search_location, edge_spacing_x, edge_spacing_y)
        #
        #     # image's width height , center point
        #     current_box = np.array([(bbox_estimate.x2 + bbox_estimate.x1) / 2, (bbox_estimate.y2 + bbox_estimate.y1) / 2], dtype=np.float32)
        #     prev_box = np.array([(self.bbox_curr_prior_tight.x2 + self.bbox_curr_prior_tight.x1) / 2, (self.bbox_curr_prior_tight.y2 + self.bbox_curr_prior_tight.y1) / 2],
        #                         dtype=np.float32)
        #
        #     if relative_distance < 2:
        #         self.DeltaBox = self.lambdaBox * (current_box - prev_box) + (1 - self.lambdaBox) * self.DeltaBox
        #
        #
        #         self.image_prev = image_curr
        #         self.bbox_prev_tight = bbox_estimate
        #         self.bbox_curr_prior_tight = bbox_estimate
        #         print(self.DeltaBox)
        #     else:
        #         # under prev img, box block is no update
        #         self.image_prev = self.image_prev
        #         self.bbox_prev_tight = self.bbox_prev_tight
        #         # self.bbox_curr_prior_tight = self.bbox_prev_tight
        #         self.bbox_curr_prior_tight = BoundingBox(self.bbox_curr_prior_tight.x1 + self.DeltaBox[0],
        #                                                  self.bbox_curr_prior_tight.y1 + self.DeltaBox[1],
        #                                                  self.bbox_curr_prior_tight.x2 + self.DeltaBox[0],
        #                                                  self.bbox_curr_prior_tight.y2 + self.DeltaBox[1])
        #         bbox_estimate = self.bbox_curr_prior_tight
        #         print('distance is {:>3}'.format(relative_distance))
        #         print(self.DeltaBox)
        # else:
        #     # under prev img, box block is no update
        #     self.image_prev = self.image_prev
        #     self.bbox_prev_tight = self.bbox_prev_tight
        #     # self.bbox_curr_prior_tight = self.bbox_prev_tight
        #     self.bbox_curr_prior_tight = BoundingBox(self.bbox_curr_prior_tight.x1 + self.DeltaBox[0],
        #                                              self.bbox_curr_prior_tight.y1 + self.DeltaBox[1],
        #                                              self.bbox_curr_prior_tight.x2 + self.DeltaBox[0],
        #                                              self.bbox_curr_prior_tight.y2 + self.DeltaBox[1])
        #     bbox_estimate = self.bbox_curr_prior_tight
        #     print('occlusion is detected')
        #     print(self.DeltaBox)
        #
        # ############ trick method ############

        left_x = bbox_estimate.x1
        left_y = bbox_estimate.y1
        width = bbox_estimate.x2 - bbox_estimate.x1
        height = bbox_estimate.y2 - bbox_estimate.y1
        return vot.Rectangle(left_x, left_y, width, height)