예제 #1
0
def main():
    # load net
    net_file = join(realpath(dirname(__file__)), 'SiamRPNBIG.model')
    net = SiamRPNBIG()
    net.load_state_dict(torch.load(net_file))
    net.eval().cuda()

    # warm up
    for i in range(10):
        net.temple(
            torch.autograd.Variable(torch.FloatTensor(1, 3, 127, 127)).cuda())
        net(torch.autograd.Variable(torch.FloatTensor(1, 3, 255, 255)).cuda())

    # start to track
    handle = vot.VOT("polygon")
    Polygon = handle.region()
    cx, cy, w, h = get_axis_aligned_bbox(Polygon)

    image_file = handle.frame()
    if not image_file:
        sys.exit(0)

    target_pos, target_sz = np.array([cx, cy]), np.array([w, h])
    im = cv2.imread(image_file)  # HxWxC
    state = SiamRPN_init(im, target_pos, target_sz, net)  # init tracker
    while True:
        image_file = handle.frame()
        if not image_file:
            break
        im = cv2.imread(image_file)  # HxWxC
        state = SiamRPN_track(state, im)  # track
        res = cxy_wh_2_rect(state['target_pos'], state['target_sz'])

        handle.report(Rectangle(res[0], res[1], res[2], res[3]))
예제 #2
0
    def on_tracking(self):
        # warm up
        for i in range(10):
            self.net.template(
                torch.autograd.Variable(torch.FloatTensor(1, 3, 127,
                                                          127)).cuda())
            self.net(
                torch.autograd.Variable(torch.FloatTensor(1, 3, 255,
                                                          255)).cuda())

        i = 1
        pred_bbx = self.gt_first
        print("{}th frame: {} {} {} {}".format(i, pred_bbx[0], pred_bbx[1],
                                               pred_bbx[2], pred_bbx[3]))
        cx, cy, w, h = pred_bbx[0] + pred_bbx[2] / 2.0, pred_bbx[
            1] + pred_bbx[3] / 2.0, pred_bbx[2], pred_bbx[3]
        i += 1

        target_pos, target_sz = np.array([cx, cy]), np.array([w, h])
        im = cv2.imread(self.path_seq + '/imgs/0001.jpg')  # HxWxC
        state = SiamRPN_init(im, target_pos, target_sz,
                             self.net)  # init tracker

        while i <= self.num_frames:
            self.index_frame = i
            im = cv2.imread(self.path_seq + '/imgs/' + str(i).zfill(4) +
                            '.jpg')
            state = SiamRPN_track(state, im)

            # convert cx, cy, w, h into rect
            res = cxy_wh_2_rect(state['target_pos'], state['target_sz'])
            print(f"{i}th frame: ", res)
            i += 1
예제 #3
0
def track_video(model, video):
    toc, regions = 0, []
    image_files, gt = video['image_files'], video['gt']
    for f, image_file in enumerate(image_files):
        im = cv2.imread(image_file)  # TODO: batch load
        tic = cv2.getTickCount()
        if f == 0:  # init
            target_pos, target_sz = rect_2_cxy_wh(gt[f])
            state = SiamRPN_init(im, target_pos, target_sz,
                                 model)  # init tracker
            location = cxy_wh_2_rect(state['target_pos'], state['target_sz'])
            regions.append(gt[f])
        elif f > 0:  # tracking
            state = SiamRPN_track(state, im)  # track
            location = cxy_wh_2_rect(state['target_pos'] + 1,
                                     state['target_sz'])
            regions.append(location)
        toc += cv2.getTickCount() - tic

        if args.visualization and f >= 0:  # visualization
            if f == 0:
                cv2.destroyAllWindows()
            if len(gt[f]) == 8:
                cv2.polylines(im,
                              [np.array(gt[f], np.int).reshape(
                                  (-1, 1, 2))], True, (0, 255, 0), 3)
            else:
                cv2.rectangle(im, (gt[f, 0], gt[f, 1]),
                              (gt[f, 0] + gt[f, 2], gt[f, 1] + gt[f, 3]),
                              (0, 255, 0), 3)
            if len(location) == 8:
                cv2.polylines(im, [location.reshape((-1, 1, 2))], True,
                              (0, 255, 255), 3)
            else:
                location = [int(l) for l in location]  #
                cv2.rectangle(
                    im, (location[0], location[1]),
                    (location[0] + location[2], location[1] + location[3]),
                    (0, 255, 255), 3)
            cv2.putText(im, str(f), (40, 40), cv2.FONT_HERSHEY_SIMPLEX, 1,
                        (0, 255, 255), 2)

            cv2.imshow(video['name'], im)
            cv2.waitKey(1)
    toc /= cv2.getTickFrequency()

    # save result
    video_path = join('../test', args.dataset, 'SiamRPN_AlexNet_OTB2015')
    if not isdir(video_path): makedirs(video_path)
    result_path = join(video_path, '{:s}.txt'.format(video['name']))
    with open(result_path, "w") as fin:
        for x in regions:
            fin.write(','.join([str(i) for i in x]) + '\n')

    print('({:d}) Video: {:12s} Time: {:02.1f}s Speed: {:3.1f}fps'.format(
        v_id, video['name'], toc, f / toc))
    return f / toc
예제 #4
0
파일: dasiamrpn.py 프로젝트: songheony/A3T
    def initialize(self, image_file, box):
        self.net = SiamRPNotb()
        self.net.load_state_dict(torch.load(self.net_file))
        self.net.eval().cuda()

        image = cv2.imread(image_file)
        box = box - np.array([1, 1, 0, 0])
        self.state = SiamRPN_init(image, box[:2] + box[2:] / 2.0, box[2:],
                                  self.net)  # init tracker
예제 #5
0
파일: test.py 프로젝트: Cris-zj/DaSiamRPN
def main(imagedir, gtdir):
    # load net
    net_file = join(realpath(dirname(__file__)), 'SiamRPNBIG.model')
    net = SiamRPNBIG()
    net.load_state_dict(torch.load(net_file))
    net.eval().cuda()

    # warm up
    for i in range(10):
        net.temple(
            torch.autograd.Variable(torch.FloatTensor(1, 3, 127, 127)).cuda())
        net(torch.autograd.Variable(torch.FloatTensor(1, 3, 255, 255)).cuda())

    # start to track
    # get the first frame groundtruth
    gt_file = os.path.join(gtdir, 'gt.txt')
    with open(gt_file, 'r') as f:
        lines = f.readlines()
    gt = []
    for line in lines:
        line = line.split(' ')
        gt.append([int(float(x)) for x in line])
    init_bbox = gt[0]  # top-left x y,w,h
    target_pos, target_sz = rect_2_cxy_wh(
        init_bbox)  # top-left x y,w,h --> center x y,w,h

    image_list = glob.glob(os.path.join(imagedir, '*.jpg'))
    image_list.sort()
    im = cv2.imread(image_list[0])  # HxWxC

    state = SiamRPN_init(im, target_pos, target_sz, net)  # init tracker
    bboxes = []
    for i in range(1, len(gt)):
        im = cv2.imread(image_list[i])  # HxWxC
        state = SiamRPN_track(state, im)  # track
        res = cxy_wh_2_rect(
            state['target_pos'],
            state['target_sz'])  # center x y,w,h --> top-left x y,w,h
        bboxes.append(res.tolist())

    _, precision, precision_auc, iou = _compile_results(gt[1:], bboxes)
    print(' -- Precision ' + "(20 px)"  + ': ' + "%.2f" % precision +\
            ' -- Precision AUC: ' + "%.2f" % precision_auc + \
            ' -- IOU: ' + "%.2f" % iou + ' --')

    isSavebbox = True
    if isSavebbox:
        print('saving bbox...')
        res_bbox_file = os.path.join('results_bbox.json')
        json.dump(bboxes, open(res_bbox_file, 'w'), indent=2)

    isSavevideo = True
    if isSavevideo:
        print('saving video...')
        save_video(image_list, bboxes)
    print('done')
예제 #6
0
    def track_video(self, rgb_msg):
        color_image = self._bridge.imgmsg_to_cv2(rgb_msg)

        if self._is_first_frame and self._inital_bbox is not None:  # init
            print('init')
            target_pos, target_sz = rect_2_cxy_wh(self._inital_bbox)
            self.state = SiamRPN_init(color_image, target_pos, target_sz,
                                      self.model, device)  # init tracker
            location = cxy_wh_2_rect(self.state['target_pos'],
                                     self.state['target_sz'])
            self._last_bbox = self._inital_bbox
            self._is_first_frame = False

        elif not self._is_first_frame:  # tracking
            self.state = SiamRPN_track(self.state, color_image,
                                       device)  # track
            location = cxy_wh_2_rect(self.state['target_pos'] + 1,
                                     self.state['target_sz'])
            self._last_bbox = rect_2_cxy_wh(location)

        if self._last_bbox is not None:

            bbox_message = BoundingBox2D()
            bbox_message.size_x = self._last_bbox[1][0]
            bbox_message.size_y = self._last_bbox[1][1]
            bbox_message.center.theta = 0
            bbox_message.center.x = self._last_bbox[0][
                0] + self._last_bbox[1][0] / 2
            bbox_message.center.y = self._last_bbox[0][
                1] + self._last_bbox[1][1] / 2
            self._pub_bbox.publish(bbox_message)

            status_message = Int8()
            status_message.data = self._current_status
            self._pub_status.publish(status_message)

            location = [int(l) for l in location]  #
            cv2.rectangle(
                color_image, (location[0], location[1]),
                (location[0] + location[2], location[1] + location[3]),
                (0, 255, 255), 3)
            cv2.putText(color_image, str("DaSiamRPN"), (40, 40),
                        cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2)
            imgmsg = self._bridge.cv2_to_imgmsg(color_image, encoding="mono8")
            self._pub_result_img.publish(imgmsg)
예제 #7
0
def main():

    vid_file = os.path.expanduser("~/Videos/VID_20190327_195111.mp4")

    cap = cv2.VideoCapture(vid_file)

    # load net
    net = SiamRPNvot()
    net.load_state_dict(
        torch.load(join(realpath(dirname(__file__)), 'SiamRPNVOT.model')))
    net.eval().cuda()

    # # image and init box
    # image_files = sorted(glob.glob('./bag/*.jpg'))
    init_rbox = [
        334.02, 128.36, 438.19, 188.78, 396.39, 260.83, 292.23, 200.41
    ]
    [cx, cy, w, h] = get_axis_aligned_bbox(init_rbox)

    # tracker init
    target_pos, target_sz = np.array([cx, cy]), np.array([w, h])
    ret, im = cap.read()

    state = SiamRPN_init(im, target_pos, target_sz, net, use_gpu=True)

    toc = 0
    while (True):
        # Capture frame-by-frame
        ret, im = cap.read()

        tic = cv2.getTickCount()
        state = SiamRPN_track(state, im, use_gpu=True)  # track
        toc += cv2.getTickCount() - tic
        res = cxy_wh_2_rect(state['target_pos'], state['target_sz'])
        res = [int(l) for l in res]
        cv2.rectangle(im, (res[0], res[1]), (res[0] + res[2], res[1] + res[3]),
                      (0, 255, 255), 3)
        cv2.imshow('SiamRPN', im)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    # When everything done, release the capture
    cap.release()
    cv2.destroyAllWindows()
예제 #8
0
def main():
    # load net
    net = SiamRPNBIG()
    net.load_state_dict(torch.load('SiamRPNBIG.model'))
    net.eval().cuda()
    
    # warm up
    for i in range(10):
        net.temple(torch.autograd.Variable(torch.FloatTensor(1, 3, 127, 127)).cuda())
        net(torch.autograd.Variable(torch.FloatTensor(1, 3, 255, 255)).cuda())
    
    # start to track
    cap = cv2.VideoCapture(0)
    while True:
        success,im=cap.read()
        if success is not True:
            break
        cv2.imshow('cam',im)
        k = cv2.waitKey(1)
        if k==27:
            x,y,w,h = cv2.selectROI('cam',im)
            cx,cy = (x+w/2,y+h/2)
            break
    
    target_pos, target_sz = np.array([cx, cy]), np.array([w, h])
    state = SiamRPN_init(im, target_pos, target_sz, net)  # init tracker
    while True:
        success,im=cap.read()  # HxWxC
        if success is not True:
            break
        state = SiamRPN_track(state, im)  # track
        bbox = np.array([state['target_pos'][0]-state['target_sz'][0]/2, 
                         state['target_pos'][1]-state['target_sz'][1]/2, 
                         state['target_pos'][0]+state['target_sz'][0]/2, 
                         state['target_pos'][1]+state['target_sz'][1]/2]).astype(int)
        pt = (state['target_pos'],state['target_sz'])
        im_bbox = cv2.rectangle(im,(bbox[0],bbox[1]),(bbox[2],bbox[3]),(0,255,0),3)
        cv2.imshow('cam',im_bbox)
        k = cv2.waitKey(1)
        if k==27:
            cap.release()
            cv2.destroyAllWindows()
            break
예제 #9
0
def SiamRPN_load(image, boxes, txt_path):
    if not os.path.exists(txt_path):
        os.makedirs(txt_path)
    multiTracker = cv2.MultiTracker_create()
    net = SiamRPNotb()
    net.load_state_dict(
        torch.load(join(realpath(dirname(__file__)), 'SiamRPNOTB.model')))
    net.eval().cuda()
    states, labels = [], []
    for bbox in boxes:
        #init_rbox = [bbox[0],bbox[1],bbox[0]+bbox[2],bbox[1],bbox[0]+bbox[2],bbox[1]+bbox[3],bbox[0],bbox[1]+bbox[3]]
        init_rbox = [
            bbox[0], bbox[1], bbox[2], bbox[1], bbox[2], bbox[3], bbox[0],
            bbox[3]
        ]
        [cx, cy, w, h] = get_axis_aligned_bbox(init_rbox)
        # print(cx, cy, w, h,'-----',init_rbox)
        target_pos, target_sz = np.array([cx, cy]), np.array([w, h])
        states.append(SiamRPN_init(image, target_pos, target_sz, net))
        labels.append(bbox[-1])
    return states, labels
예제 #10
0
def process_video(net, groundtruth_path, image_path, out_video):
    print('processing sequence', out_video)
    with open(groundtruth_path) as f:
        groundtruth = f.readlines()

    groundtruth = [x.rstrip() for x in groundtruth]

    image_filenames = [
        y for x in walk(image_path) for y in glob(join(x[0], '*.jpg'))
    ]
    image_filenames.sort()

    assert len(image_filenames) == len(groundtruth)

    image = cv2.imread(image_filenames[0])
    height, width = image.shape[:2]
    writer = cv2.VideoWriter(out_video,
                             cv2.VideoWriter_fourcc('X', 'V', 'I', 'D'), 15,
                             (width, height))

    if not writer.isOpened():
        print('Failed to open video')
        return

    # VOT sequence
    # polygon_ = parse_region(groundtruth[0])
    # cx, cy, w, h = get_axis_aligned_bbox(polygon_)
    # target_pos, target_sz = np.array([cx, cy]), np.array([w, h])

    polygon = [float(x) for x in groundtruth[0].split(',')]
    target_pos, target_sz = np.array(
        [polygon[0] + polygon[2] / 2,
         polygon[1] + polygon[3] / 2]), np.array([polygon[2], polygon[3]])
    state = SiamRPN_init(image, target_pos, target_sz, net)  # init tracker

    for i in range(len(image_filenames)):
        image = cv2.imread(image_filenames[i])
        polygon = [float(x) for x in groundtruth[i].split(',')]
        polygon = [int(x) for x in polygon]

        # VOT sequence
        # cv2.line(image, (polygon[0], polygon[1]), (polygon[2], polygon[3]), (0, 0, 255), 2)
        # cv2.line(image, (polygon[2], polygon[3]), (polygon[4], polygon[5]), (0, 0, 255), 2)
        # cv2.line(image, (polygon[4], polygon[5]), (polygon[6], polygon[7]), (0, 0, 255), 2)
        # cv2.line(image, (polygon[6], polygon[7]), (polygon[0], polygon[1]), (0, 0, 255), 2)

        cv2.rectangle(image, (polygon[0], polygon[1]),
                      (polygon[0] + polygon[2], polygon[1] + polygon[3]),
                      (0, 0, 255), 2)

        # Start timer
        timer = cv2.getTickCount()

        state = SiamRPN_track(state, image)  # track
        res = cxy_wh_2_rect(state['target_pos'], state['target_sz'])
        res = [int(x) for x in res]

        cv2.rectangle(image, (res[0], res[1]),
                      (res[0] + res[2], res[1] + res[3]), (255, 0, 0), 2)

        # Calculate Frames per second (FPS)
        fps = cv2.getTickFrequency() / (cv2.getTickCount() - timer)

        cv2.rectangle(image, (res[0], res[1]),
                      (res[0] + res[2], res[1] + res[3]), (255, 0, 0), 2)

        # Display tracker type on frame
        cv2.putText(image, "SiamRPN", (50, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.75,
                    (230, 170, 50), 2)

        # Display FPS on frame
        cv2.putText(image, "FPS : " + str(int(fps)), (50, 50),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.75, (230, 170, 50), 2)

        writer.write(image)

    writer.release()
예제 #11
0
	def start_labeling(self):
		self.cap.set(1,self.frame_num)
		
		ret, im = self.cap.read()
		im = cv2.resize(im,None, fx=self.scale, fy=self.scale)
		height, width, channels = im.shape
		# # image and init box
		self.draw_bbox()
		sel_rect_wh = (
			self.sel_rect[0][0], self.sel_rect[0][1],
			self.sel_rect[1][0]- self.sel_rect[0][0],
			self.sel_rect[1][1] - self.sel_rect[0][1])
		
		rect_gc = self.color_cut(im, sel_rect_wh)
		# tracker init
		target_pos, target_sz = np.array(rect_gc[0:2]) + np.array(rect_gc[2:4])/2, np.array(rect_gc[2:4])

		state = SiamRPN_init(im, target_pos, target_sz, self.net, use_gpu=True)

		state_hist = [state]
		toc = 0
		old_frame_num = self.frame_num - 1
		# self.frame_num = 0

		while(True):
			if old_frame_num != self.frame_num:
				old_frame_num = self.frame_num
				self.cap.set(1,self.frame_num)
				# Capture frame-by-frame
				ret, im = self.cap.read()
				if not ret:
					self.save()
					break
				im = cv2.resize(im,None, fx=self.scale, fy=self.scale)
				if len(self.labels) <= self.frame_num - self.start_frame_num:
					state_hist.append(SiamRPN_track(state_hist[-1], im, use_gpu=True))  # track)
					state = state_hist[-1]
					rect_ccwh = np.concatenate([state['target_pos'], state['target_sz']])
					rect_xywh = BoxUtils.convert(rect_ccwh,
						BoxUtils.FMT_CCWH,
						BoxUtils.FMT_XYWH
						)

					rect_xywh = self.optimize_rect(rect_xywh, im)
					rect_xyxy = BoxUtils.convert(rect_xywh,
					BoxUtils.FMT_XYWH,
					BoxUtils.FMT_XYXY
					)
					
				else:
					state = state_hist[self.frame_num - self.start_frame_num] # Use previous frame state
					rect_xyxy = BoxUtils.unnormalize(self.labels[self.frame_num - self.start_frame_num].bounding_rect, BoxUtils.FMT_XYXY, [height, width])
					
				
				pt0, pt1 = tuple(np.asarray(rect_xyxy[:2], np.int32)), tuple(np.asarray(rect_xyxy[2:], np.int32))


				cv2.rectangle(im, pt0, pt1, (0, 255, 255), 3)

			k =  cv2.waitKey(33)

			if k & 0xFF == ord('d'):
				# Drawing Mode
				self.draw_bbox()
				rect_xyxy = [self.sel_rect[0][0],self.sel_rect[0][1], self.sel_rect[1][0], self.sel_rect[1][1]]
				# drawn_norm = 
				rect_xywh = BoxUtils.convert(
					rect_xyxy,
					BoxUtils.FMT_XYXY,
					BoxUtils.FMT_XYWH)
				# rect_xywh = self.optimize_rect(rect_xywh, im)
				# # self.labels[self.frame_num].bounding_rect = BoxUtils.normalize(rect_xywh, BoxUtils.FMT_XYWH, [height, width])
				# rect_xyxy = BoxUtils.convert(
				# 	rect_xywh,
				# 	BoxUtils.FMT_XYWH,
				# 	BoxUtils.FMT_XYXY)
				rect_ccwh = BoxUtils.convert(rect_xywh,
					BoxUtils.FMT_XYWH,
					BoxUtils.FMT_CCWH
					)

				state['target_pos'] = rect_ccwh[:2]
				state['target_sz']  = rect_ccwh[2:]
				old_frame_num = self.frame_num - 1


			if k & 0xFF == ord('r'):
				self.redo_annotation = not self.redo_annotation
				print("Annotation redo set to: ", self.redo_annotation)
			# If a new frame is being annotated, add it
			if len(self.labels) <= self.frame_num - self.start_frame_num:
				print("Normalized rect: ", BoxUtils.normalize(rect_xyxy, BoxUtils.FMT_XYXY, [height, width]))
				self.labels.append(Annotation(self.vid_file, self.frame_num, BoxUtils.normalize(rect_xyxy, BoxUtils.FMT_XYXY, [height, width]), self.redo_annotation))
				self.redo_annotation = False
			else: # Check if the cache should be used
				if k & 0xFF == ord('d'):
					self.labels[self.frame_num] = Annotation(self.vid_file, self.frame_num, BoxUtils.normalize(rect_xyxy, BoxUtils.FMT_XYXY, [height, width]), self.redo_annotation)
					self.redo_annotation = False
			cv2.imshow(self.disp_name, im)

			if k & 0xFF == ord('j'):
				if self.frame_num > 0:
					self.frame_num = self.frame_num - 1

			if k & 0xFF == ord('m'):
				self.mode_idx = self.mode_idx + 1
				self.mode = self.modes[self.mode_idx%len(self.modes)]
				print("Toggeling mode: ", self.mode)
				

			if k & 0xFF == ord('v'):
				print("Toggeling Video Mode: ", not self.run_video)
				self.run_video = not self.run_video

			if k & 0xFF == ord('k') or self.run_video:
				self.frame_num = self.frame_num + 1

			if k & 0xFF == ord('s') :
				self.save()

			if k & 0xFF == ord('q'):
				break

		# When everything done, release the capture
		self.cap.release()
		cv2.destroyAllWindows()
# load net
net = SiamRPNvot()
net.load_state_dict(torch.load(join(realpath(dirname(__file__)), 'SOT.model')))
net.eval().cuda()

# image and init box
image_files = sorted(glob.glob('./12_video/*.png'))
image_files.reverse()
#init_rbox = [334.02,128.36,438.19,188.78,396.39,260.83,292.23,200.41]
init_rbox = [135.0, 141.0, 187.0, 141.0, 187.0, 168.0, 135.0, 168.0]
[cx, cy, w, h] = get_axis_aligned_bbox(init_rbox)

# tracker init
target_pos, target_sz = np.array([cx, cy]), np.array([w, h])
im = cv2.imread(image_files[0])  # HxWxC
state = SiamRPN_init(im, target_pos, target_sz, net)

# tracking and visualization
toc = 0
for f, image_file in enumerate(image_files):
    im = cv2.imread(image_file)
    tic = cv2.getTickCount()
    state = SiamRPN_track(state, im)  # track
    toc += cv2.getTickCount() - tic
    res = cxy_wh_2_rect(state['target_pos'], state['target_sz'])
    res = [int(l) for l in res]
    cv2.rectangle(im, (res[0], res[1]), (res[0] + res[2], res[1] + res[3]),
                  (0, 255, 255), 3)
    #cv2.imshow('SiamRPN', im)
    #cv2.waitKey(1)
    cv2.imwrite(join('./results', str(f) + '.jpg'), im)
예제 #13
0
def trackerInit(im, p, net):
    target_pos, target_sz = np.array([p[0], p[1]]), np.array([p[2], p[3]])
    state = SiamRPN_init(im, target_pos, target_sz, net)
    return state
예제 #14
0
def test(score):

    net = SiamRPNvot()
    net.load_state_dict(
        torch.load('/home/traker_hao/code/learn/train_RPN/model/30.model'))
    net.eval().cuda()

    version_name = 'jiasu'

    sequence_path = '/media/traker_hao/data/dataset/UAV1/sequences'
    init_path = '/media/traker_hao/data/dataset/UAV1/annotations'
    result_path = '/home/traker_hao/result/visdrone/' + version_name
    if os.path.exists(result_path) is False:
        os.mkdir(result_path)

    sequence_names = os.listdir(sequence_path)
    random.shuffle(sequence_names)
    #sequence_names.sort()
    i = 0
    for sequence_name in sequence_names:
        print(sequence_name)
        #if sequence_name != 'Suv':
        #continue
        #sequence_name='uav0000054_00000_s'
        imagenames = os.listdir(sequence_path + '/' + sequence_name)
        imagenames.sort()
        print(i)
        i = i + 1
        print(sequence_path + '/' + sequence_name)
        f = open(
            result_path + '/' + sequence_name + '_' + version_name + '.txt',
            'w')
        inited = False
        fp = open(init_path + '/' + sequence_name + '.txt')
        j = 0
        for imagename in imagenames:
            j = j + 1
            image = cv2.imread(sequence_path + '/' + sequence_name + '/' +
                               imagename)
            #init the tracker
            if inited is False:
                data = fp.readline()
                data = data.strip('\n')
                data = data.split(',')
                [cx, cy, w,
                 h] = (int(data[0]) + int(data[2]) // 2,
                       int(data[1]) + int(data[3]) // 2, int(data[2]),
                       int(data[3]))
                #f.write(str(annos[0]['bbox'][0])+','+str(annos[0]['bbox'][1])+','+str(annos[0]['bbox'][2])+','+str(annos[0]['bbox'][3])+','+str(1.00)+'\n')
                f.write(data[0] + ',' + data[1] + ',' + data[2] + ',' +
                        data[3] + '\n')
                target_pos, target_sz = np.array([cx, cy]), np.array([w, h])
                state = SiamRPN_init(image, target_pos, target_sz, net)
                inited = True

                cv2.rectangle(image,
                              (int(cx) - int(w) // 2, int(cy) - int(h) // 2),
                              (int(cx) + int(w) // 2, int(cy) + int(h) // 2),
                              (0, 255, 0), 3)
                cv2.putText(image, sequence_name, (50, 50), 0, 5e-3 * 200,
                            (0, 255, 0), 2)
                cv2.putText(image, 'initing...', (100, 100), 0, 5e-3 * 200,
                            (0, 255, 0), 2)
                image2 = cv2.resize(image, (960, 540))
                cv2.imshow('aa2', image2)
                cv2.waitKey(1)
            else:

                data = fp.readline()
                data = data.strip('\n')
                data = data.split(',')
                try:
                    truth = (int(data[0]),
                             int(data[1]), int(data[0]) + int(data[2]),
                             int(data[1]) + int(data[3]))
                except:
                    truth = [0, 0, 0, 0]

                #update the tracker
                #print([cx, cy, w, h])
                tic = cv2.getTickCount()
                t1 = time.time()
                state = SiamRPN_track(state, image)  # track
                #state['target_sz'] = np.array( [int(data[2]), int(data[3])] )

                toc = (cv2.getTickCount() - tic) / cv2.getTickFrequency()
                #print(1/toc)
                #mytracker.target_sz = np.array([int(truth[2]),int(truth[3])])
                res = cxy_wh_2_rect(state['target_pos'], state['target_sz'])
                res = [int(l) for l in res]
                cv2.rectangle(image, (res[0], res[1]),
                              (res[0] + res[2], res[1] + res[3]),
                              (0, 255, 255), 2)

                #visualize the result

                cv2.rectangle(image, (int(truth[0]), int(truth[1])),
                              (int(truth[2]), int(truth[3])), (0, 255, 0), 2)
                #mytracker.target_sz=np.array([int(data[2]),int(data[3])])
                #cv2.putText(image, str(iou), (res[0] + res[2], res[1] + res[3]), 0, 5e-3*200, (0,255,0), 2)
            cv2.putText(image, sequence_name, (50, 50), 0, 5e-3 * 200,
                        (0, 255, 0), 2)
            image2 = cv2.resize(image, (960, 540))
            cv2.imshow('aa2', image2)
            if cv2.waitKey(1) == 97:
                break
            #if j>209:
            #cv2.waitKey(0)

        f.close()
예제 #15
0
    net(torch.autograd.Variable(torch.FloatTensor(1, 3, 255, 255)).cuda())

for obj_id in range(len(json_obj['annotations'][0]['data']['objects'])):
    # get the first frame
    frm = json_obj['annotations'][0]['data']['objects'][obj_id]['frames'][0]
    x1 = int(frm['boundingBox']['x1'] * width)
    y1 = int(frm['boundingBox']['y1'] * height)
    x2 = int(frm['boundingBox']['x2'] * width)
    y2 = int(frm['boundingBox']['y2'] * height)

    frm_idx = frm['frameNumber']
    cv2.rectangle(frames[frm_idx], (x1, y1), (x2, y2), (0, 0, 255), 2)

    target_pos, target_sz = np.array([(x1 + x2) / 2, (y1 + y2) / 2
                                      ]), np.array([x2 - x1, y2 - y1])
    state = SiamRPN_init(src_images[frm_idx], target_pos, target_sz,
                         net)  # init tracker

    color = (random.randint(1,
                            255), random.randint(1,
                                                 255), random.randint(1, 128))
    num_frames = len(
        json_obj['annotations'][0]['data']['objects'][obj_id]['frames'])
    for j in range(1, num_frames):
        frm = json_obj['annotations'][0]['data']['objects'][obj_id]['frames'][
            j]

        x1 = int(frm['boundingBox']['x1'] * width)
        y1 = int(frm['boundingBox']['y1'] * height)
        x2 = int(frm['boundingBox']['x2'] * width)
        y2 = int(frm['boundingBox']['y2'] * height)
예제 #16
0
# for i in range(10):
#     net.temple(torch.autograd.Variable(torch.FloatTensor(1, 3, 127, 127)).cuda()) # selonsy:cuda()表示使用GPU进行计算,FloatTensor(1, 3, 127, 127):浮点型四维张量
#     net(torch.autograd.Variable(torch.FloatTensor(1, 3, 255, 255)).cuda())

# start to track
handle = vot.VOT("polygon")
Polygon = handle.region() # region:将配置消息发送到客户端并接收初始化区域和第一个图像的路径。其返回值为初始化区域。
cx, cy, w, h = get_axis_aligned_bbox(Polygon) # get_axis_aligned_bbox:将坐标数据转换成 RPN 的格式

image_file = handle.frame() # frame 函数从客户端获取帧(图像路径)
if not image_file:
    sys.exit(0)

target_pos, target_sz = np.array([cx, cy]), np.array([w, h])
im = cv2.imread(image_file)  # HxWxC
state = SiamRPN_init(im, target_pos, target_sz, net)  # init tracker,SiamRPN_init:构造状态结构体并运行模板分支
# 从第一帧开始跟踪,表示很奇怪,难道直接给定的不准确么?   # selonsy:改进点
while True: # 进入跟踪循环
    image_file = handle.frame()
    if not image_file:
        break
    im = cv2.imread(image_file)  # HxWxC
    state = SiamRPN_track(state, im)  # track,SiamRPN_track:运行检测分支并更新状态变量
    res = cxy_wh_2_rect(state['target_pos'], state['target_sz']) # cxy_wh_2_rect:将坐标转换成矩形框的表示形式

    handle.report(Rectangle(res[0], res[1], res[2], res[3])) # report:将跟踪结果报告给客户端

print(handle.result)
print(handle.frames)

del handle
net.eval().cuda()

# warm up
for i in range(10):
    net.temple(torch.autograd.Variable(torch.FloatTensor(1, 3, 127, 127)).cuda())
    net(torch.autograd.Variable(torch.FloatTensor(1, 3, 255, 255)).cuda())

# start to track
handle = vot.VOT("polygon")
Polygon = handle.region()
cx, cy, w, h = get_axis_aligned_bbox(Polygon)

image_file = handle.frame()
if not image_file:
    sys.exit(0)

target_pos, target_sz = np.array([cx, cy]), np.array([w, h])
im = cv2.imread(image_file)  # HxWxC
state = SiamRPN_init(im, target_pos, target_sz, net)  # init tracker
while True:
    image_file = handle.frame()
    if not image_file:
        break
    im = cv2.imread(image_file)  # HxWxC
    state = SiamRPN_track(state, im)  # track

    # convert cx, cy, w, h into rect
    res = cxy_wh_2_rect(state['target_pos'], state['target_sz'])
    handle.report(Rectangle(res[0], res[1], res[2], res[3]))

예제 #18
0
def showImage():
    
    global x1, y1, x2, y2, drawing, init, flag, image, getim, start
    rospy.init_node('RPN', anonymous=True)
    
    flag=1
    init = False
    drawing = False
    getim = False
    start = False
    x1, x2, y1, y2 = -1, -1, -1, -1
    flag_lose = False
    count_lose = 0

    print('laoding model...........')
    net = SiamRPNvot()
    net.load_state_dict(torch.load(path + 'SiamRPNVOT.model'))
    net.eval().cuda()
    z = torch.Tensor(1, 3, 127, 127)
    net.temple(z.cuda())
    x = torch.Tensor(1, 3, 271, 271)
    net(x.cuda())
    print('ready for starting!')
    
    rospy.Subscriber('/camera/rgb/image_raw', Image, callback)
    pub = rospy.Publisher('/vision/target', Pose, queue_size=10) 
    cv2.namedWindow('image')
    cv2.setMouseCallback('image', draw_circle)
    rate = rospy.Rate(30)
    i = 1
    t = time.time()
    fps = 0
    while not rospy.is_shutdown():
      
        if getim:
            t1 = time.time()
            idd = readid(image)
            
            pose = Pose()
            pose.position.z = 0
            
            if start is False and init is True:
                target_pos = np.array([int((x1+x2)/2), int((y1+y2)/2)])
                target_sz = np.array([int(x2-x1), int(y2-y1)])
                state = SiamRPN_init(image, target_pos, target_sz, net)
                start = True
                flag_lose = False
                continue
                
            if start is True:
            
                state = SiamRPN_track(state, image)  # track              
                res = cxy_wh_2_rect(state['target_pos'], state['target_sz'])
                res = [int(l) for l in res]
                cv2.rectangle(image, (res[0], res[1]), (res[0] + res[2], res[1] + res[3]), (0, 255, 255), 2)
                pose.position.x = (state['target_pos'][0]-image.shape[1]/2) / (image.shape[1]/2)
                pose.position.y = (state['target_pos'][1]-image.shape[0]/2) / (image.shape[0]/2)
                cv2.putText(image, str(state['score']), (res[0] + res[2], res[1] + res[3]), cv2.FONT_HERSHEY_SIMPLEX , 0.5, (255,255,0), 1)
                pose.position.z = 1
                if state['score'] < 0.5:
                    count_lose = count_lose + 1
                else:
                    count_lose = 0
                if count_lose > 4:
                    flag_lose = True
                    
            if flag_lose is True:
                    cv2.putText(image, 'target is lost!', (200,200), cv2.FONT_HERSHEY_SIMPLEX , 2, (255,0,0), 3)
                    pose.position.z = -1
                   
            if drawing is True:              
                cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)
            
            cv2.putText(image, '#'+str(idd), (30,30), cv2.FONT_HERSHEY_SIMPLEX , 0.5, (0, 255, 255), 1)
            cx = int(image.shape[1]/2)
            cy = int(image.shape[0]/2)
            cv2.line(image,(cx-20,cy), (cx+20, cy), (255, 255, 255), 2)
            cv2.line(image,(cx, cy-20), (cx, cy+20), (255, 255, 255), 2)
            
            pub.publish(pose)
            
            if start is True:    
               
                i = i + 1
            if i > 5:
                i = 1
                fps = 5 / (time.time()-t)
                t = time.time()
            cv2.putText(image, 'fps='+str(fps), (200,30), cv2.FONT_HERSHEY_SIMPLEX , 0.5, (0, 255, 255), 1)
            
            cv2.imshow('image', image)
            cv2.waitKey(1)
            getim = False

        rate.sleep()
예제 #19
0
            cy = toks[1] + toks[3] * 0.5
            w = toks[2]
            h = toks[3]
            target_pos, target_sz = np.array([cx, cy]), np.array([w, h])

            # 第一张图作为模版
            im = cv2.imread(imgpath[0])
            toks = list(map(int, toks))
            print(toks)
            cv2.rectangle(im, (toks[0], toks[1]), (toks[2], toks[3]),
                          (0, 255, 0), 3)
            cv2.imshow('res', im)
            cv2.waitKey(1)
            cv2.imwrite('../tmp/0000.jpg', im)
            #break
            state, z_crop = SiamRPN_init(im, target_pos, target_sz, net)
            bbox = []
            totalframe = len(imgpath)
            ttime = 0
            save_template_dir = os.path.join('/home/ly/chz/srpn_tmp', name)
            if not os.path.exists(save_template_dir):
                os.makedirs(save_template_dir)
            save_template_file = os.path.join('/home/ly/chz/srpn_tmp', name,
                                              '000_a_template.jpg')
            template = state['template'].astype(np.int32)
            #cv2.imwrite(save_template_file, template)
            #print('save template at {}'.format(save_template_file))
            # 第二张图直到结尾作为搜索图
            if len(imgpath) == 1:
                print(name + 'have only one img')
                continue
예제 #20
0
def showImage(subscriber, camera_matrix, kcf_tracker_h):
    global x1, y1, x2, y2, drawing, init, flag, image, getim, start

    flag = 1
    init = False
    drawing = False
    getim = False
    start = False
    x1, x2, y1, y2 = -1, -1, -1, -1
    flag_lose = False
    count_lose = 0

    print('loading model...........')
    net = SiamRPNvot()
    net.load_state_dict(torch.load(path + 'SiamRPNVOT.model'))
    net.eval().cuda()
    z = torch.Tensor(1, 3, 127, 127)
    net.temple(z.cuda())
    x = torch.Tensor(1, 3, 271, 271)
    net(x.cuda())
    print('ready for starting!')

    rospy.Subscriber(subscriber, Image, callback)

    cv2.namedWindow('image')
    cv2.setMouseCallback('image', draw_circle)
    rate = rospy.Rate(50)
    while not rospy.is_shutdown():
        if getim:
            getim = False
            ## !
            d_info = DetectionInfo()
            d_info.frame = 0
            ## !

            if start is False and init is True:
                target_pos = np.array([int((x1 + x2) / 2), int((y1 + y2) / 2)])
                target_sz = np.array([int(x2 - x1), int(y2 - y1)])
                state = SiamRPN_init(image, target_pos, target_sz, net)
                start = True
                flag_lose = False
                continue
            if start is True:
                state = SiamRPN_track(state, image)  # track
                res = cxy_wh_2_rect(state['target_pos'], state['target_sz'])
                res = [int(l) for l in res]
                cv2.rectangle(image, (res[0], res[1]),
                              (res[0] + res[2], res[1] + res[3]),
                              (0, 255, 255), 2)

                ## !
                depth = kcf_tracker_h / state['target_sz'][1] * camera_matrix[
                    1, 1]
                cx = state['target_pos'][0] - image.shape[1] / 2
                cy = state['target_pos'][1] - image.shape[0] / 2
                d_info.position[0] = depth * cx / camera_matrix[0, 0]
                d_info.position[1] = depth * cy / camera_matrix[1, 1]
                d_info.position[2] = depth
                d_info.sight_angle[0] = cx / (image.shape[1] / 2) * math.atan(
                    (image.shape[1] / 2) / camera_matrix[0, 0])
                d_info.sight_angle[1] = cy / (image.shape[0] / 2) * math.atan(
                    (image.shape[0] / 2) / camera_matrix[1, 1])
                d_info.detected = True
                ## !

                cv2.putText(image, str(state['score']),
                            (res[0] + res[2], res[1] + res[3]),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 0), 1)

                if state['score'] < 0.5:
                    count_lose = count_lose + 1
                else:
                    count_lose = 0
                if count_lose > 4:
                    flag_lose = True
            if flag_lose is True:
                cv2.putText(image, 'target lost', (20, 40),
                            cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
                ## !
                d_info.detected = False
            if drawing is True:
                cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)

            cx = int(image.shape[1] / 2)
            cy = int(image.shape[0] / 2)
            cv2.line(image, (cx - 20, cy), (cx + 20, cy), (255, 255, 255), 2)
            cv2.line(image, (cx, cy - 20), (cx, cy + 20), (255, 255, 255), 2)
            ## !
            pub.publish(d_info)
            cv2.imshow('image', image)
            cv2.waitKey(1)

        rate.sleep()
def get_object_center(q, detect_class):

    # classes:
    # 1.Aeroplanes     2.Bicycles   3.Birds       4.Boats           5.Bottles
    # 6.Buses          7.Cars       8.Cats        9.Chairs          10.Cows
    # 11.Dining tables 12.Dogs      13.Horses     14.Motorbikes     15.People
    # 16.Potted plants 17.Sheep     18.Sofas      19.Trains         20.TV/Monitors

    slim = tf.contrib.slim

    # TensorFlow session: grow memory when needed. TF, DO NOT USE ALL MY GPU MEMORY!!!
    gpu_options = tf.GPUOptions(allow_growth=True)
    config = tf.ConfigProto(log_device_placement=False,
                            gpu_options=gpu_options)
    isess = tf.InteractiveSession(config=config)

    # Input placeholder.
    net_shape = (300, 300)
    data_format = 'NHWC'
    img_input = tf.placeholder(tf.uint8, shape=(None, None, 3))
    # Evaluation pre-processing: resize to SSD net shape.
    image_pre, labels_pre, bboxes_pre, bbox_img = ssd_vgg_preprocessing.preprocess_for_eval(
        img_input,
        None,
        None,
        net_shape,
        data_format,
        resize=ssd_vgg_preprocessing.Resize.WARP_RESIZE)
    image_4d = tf.expand_dims(image_pre, 0)

    # Define the SSD model.
    reuse = True if 'ssd_net' in locals() else None
    ssd_net = ssd_vgg_300.SSDNet()
    with slim.arg_scope(ssd_net.arg_scope(data_format=data_format)):
        predictions, localisations, _, _ = ssd_net.net(image_4d,
                                                       is_training=False,
                                                       reuse=reuse)

    # Restore SSD model.
    # ckpt_filename = 'checkpoints/ssd_300_vgg.ckpt'
    ckpt_filename = '../SSD-Tensorflow/checkpoints/VGG_VOC0712_SSD_300x300_ft_iter_120000.ckpt'

    isess.run(tf.global_variables_initializer())
    saver = tf.train.Saver()
    saver.restore(isess, ckpt_filename)

    # SSD default anchor boxes.
    ssd_anchors = ssd_net.anchors(net_shape)

    # Main image processing routine.
    def process_image(img,
                      select_threshold=0.5,
                      nms_threshold=.45,
                      net_shape=(300, 300)):
        # Run SSD network.
        rimg, rpredictions, rlocalisations, rbbox_img = isess.run(
            [image_4d, predictions, localisations, bbox_img],
            feed_dict={img_input: img})

        # Get classes and bboxes from the net outputs.
        rclasses, rscores, rbboxes = np_methods.ssd_bboxes_select(
            rpredictions,
            rlocalisations,
            ssd_anchors,
            select_threshold=select_threshold,
            img_shape=net_shape,
            num_classes=21,
            decode=True)

        rbboxes = np_methods.bboxes_clip(rbbox_img, rbboxes)
        rclasses, rscores, rbboxes = np_methods.bboxes_sort(rclasses,
                                                            rscores,
                                                            rbboxes,
                                                            top_k=400)
        rclasses, rscores, rbboxes = np_methods.bboxes_nms(
            rclasses, rscores, rbboxes, nms_threshold=nms_threshold)
        # Resize bboxes to original image shape. Note: useless for Resize.WARP!
        rbboxes = np_methods.bboxes_resize(rbbox_img, rbboxes)
        return rclasses, rscores, rbboxes

    def get_bboxes(rclasses, rbboxes):
        # get center location of object

        number_classes = rclasses.shape[0]
        object_bboxes = []
        for i in range(number_classes):
            object_bbox = dict()
            object_bbox['i'] = i
            object_bbox['class'] = rclasses[i]
            object_bbox['y_min'] = rbboxes[i, 0]
            object_bbox['x_min'] = rbboxes[i, 1]
            object_bbox['y_max'] = rbboxes[i, 2]
            object_bbox['x_max'] = rbboxes[i, 3]
            object_bboxes.append(object_bbox)
        return object_bboxes

    # load net
    net = SiamRPNvot()
    net.load_state_dict(
        torch.load(
            join(realpath(dirname(__file__)),
                 '../DaSiamRPN-master/code/SiamRPNVOT.model')))

    net.eval()

    # open video capture
    video = cv2.VideoCapture(0)

    if not video.isOpened():
        print("Could not open video")
        sys.exit()

    index = True
    while index:

        # Read first frame.
        ok, frame = video.read()
        if not ok:
            print('Cannot read video file')
            sys.exit()

        # Define an initial bounding box
        height = frame.shape[0]
        width = frame.shape[1]

        rclasses, rscores, rbboxes = process_image(frame)

        bboxes = get_bboxes(rclasses, rbboxes)
        for bbox in bboxes:
            if bbox['class'] == detect_class:
                print(bbox)
                ymin = int(bbox['y_min'] * height)
                xmin = int((bbox['x_min']) * width)
                ymax = int(bbox['y_max'] * height)
                xmax = int((bbox['x_max']) * width)
                cx = (xmin + xmax) / 2
                cy = (ymin + ymax) / 2
                h = ymax - ymin
                w = xmax - xmin
                new_bbox = (cx, cy, w, h)
                print(new_bbox)
                index = False
                break

    # tracker init
    target_pos, target_sz = np.array([cx, cy]), np.array([w, h])
    state = SiamRPN_init(frame, target_pos, target_sz, net)

    # tracking and visualization
    toc = 0
    count_number = 0

    while True:

        # Read a new frame
        ok, frame = video.read()
        if not ok:
            break

        # Start timer
        tic = cv2.getTickCount()

        # Update tracker
        state = SiamRPN_track(state, frame)  # track
        # print(state)

        toc += cv2.getTickCount() - tic

        if state:

            res = cxy_wh_2_rect(state['target_pos'], state['target_sz'])
            res = [int(l) for l in res]

            cv2.rectangle(frame, (res[0], res[1]),
                          (res[0] + res[2], res[1] + res[3]), (0, 255, 255), 3)

            count_number += 1
            # set object_center
            object_center = dict()
            object_center['x'] = state['target_pos'][0] / width
            object_center['y'] = state['target_pos'][1] / height
            q.put(object_center)

            if (not state) or count_number % 40 == 3:
                # Tracking failure
                cv2.putText(frame, "Tracking failure detected", (100, 80),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.75, (0, 0, 255), 2)
                index = True
                while index:
                    ok, frame = video.read()
                    rclasses, rscores, rbboxes = process_image(frame)
                    bboxes = get_bboxes(rclasses, rbboxes)
                    for bbox in bboxes:
                        if bbox['class'] == detect_class:
                            ymin = int(bbox['y_min'] * height)
                            xmin = int(bbox['x_min'] * width)
                            ymax = int(bbox['y_max'] * height)
                            xmax = int(bbox['x_max'] * width)
                            cx = (xmin + xmax) / 2
                            cy = (ymin + ymax) / 2
                            h = ymax - ymin
                            w = xmax - xmin
                            new_bbox = (cx, cy, w, h)
                            target_pos, target_sz = np.array(
                                [cx, cy]), np.array([w, h])
                            state = SiamRPN_init(frame, target_pos, target_sz,
                                                 net)

                            p1 = (int(xmin), int(ymin))
                            p2 = (int(xmax), int(ymax))
                            cv2.rectangle(frame, p1, p2, (0, 255, 0), 2, 1)

                            index = 0

                            break

        # 调整图片大小
        resized_frame = cv2.resize(frame,
                                   None,
                                   fx=0.65,
                                   fy=0.65,
                                   interpolation=cv2.INTER_AREA)
        # 水平翻转图片(为了镜像显示)
        horizontal = cv2.flip(resized_frame, 1, dst=None)

        # 显示图片
        cv2.namedWindow("SSD+SiamRPN", cv2.WINDOW_NORMAL)
        cv2.imshow('SSD+SiamRPN', horizontal)

        # Exit if ESC pressed
        k = cv2.waitKey(1) & 0xff
        if k == 27:
            break

    video.release()
    cv2.destroyAllWindows()
예제 #22
0
def track_and_predict(vid_path, net):
    """
    Returns:
        pred: Tracks the snitch, and return the class for the final
            coordinates of the snitch
        pred_gt: Uses the end point location of the snitch, projects to 2D,
            then computes the class coordinates. This is only returned as a
            sanity check, to make sure the projection etc functions are
            working reasonably -- i.e. using this output should get 100%
            accuracy.
    """
    start_coord, end_coord = get_snitch_start_end_coord(vid_path)
    start_coord_2d = proj_utils.project_3d_point(np.array([start_coord]))[0]
    end_coord_2d = proj_utils.project_3d_point(np.array([end_coord]))[0]
    cap = cv2.VideoCapture(vid_path)
    if not cap.isOpened():
        raise 'Unable to open video {}'.format(vid_path)
    frame_id = 0
    state = None  # State to be used by the tracker, will be init in the loop
    pred = None
    vid_out = None
    flag, frame = cap.read()
    out_vid_path = osp.join(DEBUG_STORE_VIDEO_PATH, osp.basename(vid_path))
    while flag:
        frame_id += 1
        if frame_id == 1:
            # tracker init
            h_frame, w_frame, _ = frame.shape
            cx, cy = start_coord_2d.tolist()
            cx, cy = un_normalize_img_coords(cx, cy, w_frame, h_frame)
            target_pos, target_sz = np.array([cx, cy]), np.array([30, 30])
            state = SiamRPN_init(frame, target_pos, target_sz, net)
            if DEBUG_STORE_VIDEO:
                vid_out = cv2.VideoWriter(
                    out_vid_path,
                    cv2.VideoWriter_fourcc('m', 'p', '4', 'v'), 30,
                    (w_frame, h_frame))
        else:
            state = SiamRPN_track(state, frame)  # track
            if DEBUG_STORE_VIDEO:
                cx, cy = state['target_pos']
                w_box, h_box = state['target_sz']
                cv2.rectangle(
                    frame,
                    (int(cx - w_box/2), int(cy - h_box/2)),
                    (int(cx + w_box/2), int(cy + h_box/2)), (0, 255, 255), 3)
                # Drawing the GT rectangle as well
                cx_gt, cy_gt = un_normalize_img_coords(
                    end_coord_2d[0], end_coord_2d[1], w_frame, h_frame)
                cv2.rectangle(
                    frame,
                    (int(cx_gt - 10), int(cy_gt - 10)),
                    (int(cx_gt + 10), int(cy_gt + 10)), (255, 0, 0), 3)
                vid_out.write(frame)
        flag, frame = cap.read()
    # Converting back to -1 1 coordinates, as that will be used to project
    # back to the CATER plane
    cx, cy = state['target_pos']
    pred = proj_utils.get_class_prediction(
        (cx * 2 / w_frame - 1), (cy * 2 / h_frame - 1),
        nrows=NROWS, ncols=NCOLS)
    pred_gt = proj_utils.get_class_prediction(
        end_coord_2d[0], end_coord_2d[1],
        nrows=NROWS, ncols=NCOLS)
    cap.release()
    if vid_out is not None:
        print('Written debugging video to {}'.format(out_vid_path))
        vid_out.release()
    return pred, pred_gt
예제 #23
0
            ymin = int(bbox['y_min'] * height)
            xmin = int((bbox['x_min']) * width)
            ymax = int(bbox['y_max'] * height)
            xmax = int((bbox['x_max']) * width)
            cx = (xmin + xmax) / 2
            cy = (ymin + ymax) / 2
            h = ymax - ymin
            w = xmax - xmin
            new_bbox = (cx, cy, w, h)
            print(new_bbox)
            index = False
            break

# tracker init
target_pos, target_sz = np.array([cx, cy]), np.array([w, h])
state = SiamRPN_init(frame, target_pos, target_sz, net)

# tracking and visualization
toc = 0
count_number = 0

while True:
    # Read a new frame
    ok, frame = video.read()
    if not ok:
        break

    # Start timer
    tic = cv2.getTickCount()

    # Update tracker
예제 #24
0
args = parser.parse_args()

# load net
net = eval(args.model)()
load_net('./{}.pth'.format(args.model), net)
net.eval().cuda()

# image and init box
image_files = sorted(glob.glob('./testData/*.jpg'))
init_rbox = [3641, 1778, 3810, 1778, 3810, 2313, 3641, 2313]
[cx, cy, w, h] = get_axis_aligned_bbox(init_rbox)

# tracker init
target_pos, target_sz = np.array([cx, cy]), np.array([w, h])
im = cv2.imread(image_files[0])  # HxWxC
state = SiamRPN_init(im, target_pos, target_sz, net, args.model)

# tracking and visualization
toc = 0
for f, image_file in enumerate(image_files):
    im = cv2.imread(image_file)
    # print(im.shape)
    tic = cv2.getTickCount()
    state = SiamRPN_track(state, im)  # track
    toc += cv2.getTickCount() - tic
    res = cxy_wh_2_rect(state['target_pos'], state['target_sz'])
    res = [int(l) for l in res]
    # print(res)
    cv2.rectangle(im, (res[0], res[1]), (res[0] + res[2], res[1] + res[3]),
                  (0, 255, 255), 3)
    cv2.imshow('SiamRPN', im)
예제 #25
0
net.cuda().eval()

# warm up
for i in range(10):
    net.temple(torch.autograd.Variable(torch.ones(1, 3, 127, 127)).cuda(), \
     torch.autograd.Variable(torch.ones(1, 3, 271, 271)).cuda())
    net(torch.autograd.Variable(torch.ones(1, 3, 271, 271)).cuda())

for seq in OTB_seqs:
    _data_provider.pick_seq(seq)
    exemplar_path, exemplar_gt, cur_img_num = _data_provider.eval_pick_exemplar(
    )

    exemplar = cv2.imread(exemplar_path)
    exemplar_pos, exemplar_sz = rect_2_cxy_wh(exemplar_gt)
    state = SiamRPN_init(exemplar, exemplar_pos, exemplar_sz, net)
    save_file = save_res_path + seq + '_ours.txt'
    tracking_res = open(save_file, 'w')

    for idx in range(cur_img_num):

        instance_path = _data_provider.eval_pick_instance()
        instance = cv2.imread(instance_path)
        state = SiamRPN_track(state, instance)
        print('seq:{}:{} , score:{}'.format(seq, idx, state['score']))
        res = cxy_wh_2_rect(state['target_pos'], state['target_sz'])
        tracking_res.write('{},{},{},{}'.format(res[0], res[1], res[2],
                                                res[3]))
        tracking_res.write('\n')

    tracking_res.close()