Example #1
0
	def __init__(self, vid_file=False, scale=.6, start_frame_num=0):
		self.labels = []
		self.scale = scale
		self.start_frame_num = start_frame_num
		self.frame_num = start_frame_num

		self.modes = ["ColorCut", "AdaptiveThreshold", "GrabCut"]
		self.mode_idx = 0
		self.mode = self.modes[self.mode_idx]
		self.use_grab_cut = False
		self.run_video = False
		self.vid_done = False
		self.redo_annotation = False
		self.root = Tk()      
		self.canvas = Canvas(self.root, width = 600, height = 600)      
		self.canvas.pack()      

		self.rect_pad = .1 # Amount to pad the DaSaiam rectangle for grabcut (relative units)
		self.threshold_content_limit = 90

		if not vid_file:
			self.vid_file = self.select_video()
		else:
			self.vid_file = vid_file
		
		self.disp_name = 'SiamRPN'
		self.create_window()
		
		self.down_point = None
		self.up_point = None
		self.sel_rect = None
		self.in_click = False
		self.x_cursor = None
		self.y_cursor = None
		cv2.setMouseCallback(self.disp_name, self.on_mouse)
		self.cap = cv2.VideoCapture(self.vid_file)
		self.init_rbox = None

		# load net
		self.net = SiamRPNvot()
		self.net.load_state_dict(torch.load(join(realpath(dirname(__file__)), 'SiamRPNVOT.model')))
		self.net.eval().cuda()
Example #2
0
def compute_preds(vid_line):
    # Most nodes has 4 GPUs, run on a random one
    torch.cuda.set_device(np.random.randint(4))
    # load net  <-- have to do for each as I'm running in mp
    net = SiamRPNvot()
    net.load_state_dict(model_wts)
    net.eval().cuda()
    vid_fpath, lbl = vid_line.split()
    pred, pred_gt = track_and_predict(vid_fpath, net)
    return pred, pred_gt, int(lbl), vid_fpath
Example #3
0
def main():
    global args, v_id
    args = parser.parse_args()

    net = SiamRPNvot()  #changed otb to vot
    net.load_state_dict(
        torch.load(join(realpath(dirname(__file__)),
                        'SiamRPNVOT.model')))  #changed OTB to VOT
    net.eval().cuda()

    dataset = load_dataset(args.dataset)
    fps_list = []
    for v_id, video in enumerate(dataset.keys()):
        fps_list.append(track_video(net, dataset[video]))
    print('Mean Running Speed {:.1f}fps'.format(np.mean(np.array(fps_list))))
Example #4
0
def main():

    vid_file = os.path.expanduser("~/Videos/VID_20190327_195111.mp4")

    cap = cv2.VideoCapture(vid_file)

    # load net
    net = SiamRPNvot()
    net.load_state_dict(
        torch.load(join(realpath(dirname(__file__)), 'SiamRPNVOT.model')))
    net.eval().cuda()

    # # image and init box
    # image_files = sorted(glob.glob('./bag/*.jpg'))
    init_rbox = [
        334.02, 128.36, 438.19, 188.78, 396.39, 260.83, 292.23, 200.41
    ]
    [cx, cy, w, h] = get_axis_aligned_bbox(init_rbox)

    # tracker init
    target_pos, target_sz = np.array([cx, cy]), np.array([w, h])
    ret, im = cap.read()

    state = SiamRPN_init(im, target_pos, target_sz, net, use_gpu=True)

    toc = 0
    while (True):
        # Capture frame-by-frame
        ret, im = cap.read()

        tic = cv2.getTickCount()
        state = SiamRPN_track(state, im, use_gpu=True)  # track
        toc += cv2.getTickCount() - tic
        res = cxy_wh_2_rect(state['target_pos'], state['target_sz'])
        res = [int(l) for l in res]
        cv2.rectangle(im, (res[0], res[1]), (res[0] + res[2], res[1] + res[3]),
                      (0, 255, 255), 3)
        cv2.imshow('SiamRPN', im)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    # When everything done, release the capture
    cap.release()
    cv2.destroyAllWindows()
Example #5
0
        super(SiamRPNvot_fe, self).__init__(size=1, feature_out=256)
        self.cfg = {
            'lr': 0.45,
            'window_influence': 0.44,
            'penalty_k': 0.04,
            'instance_size': 600,
            'adaptive': False
        }  # 0.355


# load net
from net import SiamRPNvot
from os.path import realpath, dirname, join
import os

net = SiamRPNvot()
net.load_state_dict(
    torch.load('/home/malick/Bureau/DaSiamRPN/code/SiamRPNVOT.model'))
net.eval()

#save and load model
fe = SiamRPNvot_fe()
fe_dict = fe.state_dict()
net_dict = net.state_dict()
new_dict = {k: v for k, v in net_dict.items() if k in fe_dict}
fe_dict.update(new_dict)
fe.load_state_dict(fe_dict)

#freeze parameters of feature extractor to avoid computing gradient

for param in fe.parameters():
Example #6
0
class Video_Labeler():

	def __init__(self, vid_file=False, scale=.6, start_frame_num=0):
		self.labels = []
		self.scale = scale
		self.start_frame_num = start_frame_num
		self.frame_num = start_frame_num

		self.modes = ["ColorCut", "AdaptiveThreshold", "GrabCut"]
		self.mode_idx = 0
		self.mode = self.modes[self.mode_idx]
		self.use_grab_cut = False
		self.run_video = False
		self.vid_done = False
		self.redo_annotation = False
		self.root = Tk()      
		self.canvas = Canvas(self.root, width = 600, height = 600)      
		self.canvas.pack()      

		self.rect_pad = .1 # Amount to pad the DaSaiam rectangle for grabcut (relative units)
		self.threshold_content_limit = 90

		if not vid_file:
			self.vid_file = self.select_video()
		else:
			self.vid_file = vid_file
		
		self.disp_name = 'SiamRPN'
		self.create_window()
		
		self.down_point = None
		self.up_point = None
		self.sel_rect = None
		self.in_click = False
		self.x_cursor = None
		self.y_cursor = None
		cv2.setMouseCallback(self.disp_name, self.on_mouse)
		self.cap = cv2.VideoCapture(self.vid_file)
		self.init_rbox = None

		# load net
		self.net = SiamRPNvot()
		self.net.load_state_dict(torch.load(join(realpath(dirname(__file__)), 'SiamRPNVOT.model')))
		self.net.eval().cuda()

	def create_window(self):
		cv2.namedWindow(self.disp_name)


	def select_video(self):
		Tk().withdraw() # we don't want a full GUI, so keep the root window from appearing
		filename = askopenfilename() # show an "Open" dialog box and return the path to the selected file
		return filename

	def adaptive_cut(self,img,rect):
		img_crop = img[rect[1]:rect[1]+rect[3], 
						rect[0]:rect[0]+rect[2]]
		img_gray = cv2.cvtColor(img_crop, cv2.COLOR_BGR2GRAY)
		# ret, img_thresh =  cv2.threshold(img_gray,127,255,cv2.THRESH_BINARY_INV)
		frame_threshold = cv2.adaptiveThreshold(img_gray,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV,11,2)
		cv2.imshow("thresh", frame_threshold)
		rect_2 = self.get_bounding_box(frame_threshold)

		return [rect[0]+rect_2[0], rect[1] + rect_2[1], rect_2[2], rect_2[3]]

	def color_cut(self, img, rect):
		frame_HSV = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
		hsv_cropped = frame_HSV[rect[1]:rect[1]+rect[3], 
								rect[0]:rect[0]+rect[2]]

		high_b =  np.asarray((180, 255, 120))
		low_b =    np.asarray((0, 0, 0))
		frame_threshold = cv2.inRange(hsv_cropped, low_b, high_b)
		cv2.imshow("thresh", frame_threshold)
		rect_2 = self.get_bounding_box(frame_threshold)
		return [rect[0]+rect_2[0], rect[1] + rect_2[1], rect_2[2], rect_2[3]]

	def edge_cut(self, img, rect):
		edges = cv2.Canny(img,100,200)


	def grab_cut(self, img, rect):
		mask = np.zeros(img.shape[:2],np.uint8)
		bgdModel = np.zeros((1,65),np.float64)
		fgdModel = np.zeros((1,65),np.float64)

		cv2.grabCut(img,mask,rect,bgdModel,fgdModel,5,cv2.GC_INIT_WITH_RECT)
		frame_threshold = 255*np.where((mask==2)|(mask==0),0,1).astype('uint8')
		kernel = np.ones((5,5),np.uint8)
		
		frame_threshold = cv2.erode(frame_threshold,kernel,iterations = 2)

		cv2.imshow("thresh", frame_threshold[rect[1]:rect[1]+rect[3], 
								rect[0]:rect[0]+rect[2]])
		img = img*frame_threshold[:,:,np.newaxis]

		im,contours,hierarchy = cv2.findContours(frame_threshold, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
		if len(contours) != 0:
			contours_sorted = sorted(contours, key = self.hull_convexity_ratio)
			c = contours_sorted[0]
			x,y,w,h = cv2.boundingRect(c)
			return [x,y,w,h]
		# img += 255*(1 - mask2[:,:,np.newaxis])
		# return self.get_bounding_box(frame_threshold)

	def hull_convexity_ratio(self, cnt):
		return cv2.contourArea(cv2.convexHull(cnt))/cv2.contourArea(cnt)

	def get_bounding_box(self, mask):
		im,contours,hierarchy = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
		if len(contours) != 0:
			#find the biggest area
			c = max(contours, key = cv2.contourArea)
			x,y,w,h = cv2.boundingRect(c)
			return [x,y,w,h]

	def check_rect(self, rect, im_size):
		# clip xywh-style rectangle to fit within image
		w, h = im_size

		rect = BoxUtils.convert(rect,
			BoxUtils.FMT_XYWH,
			BoxUtils.FMT_XYXY
			)

		rect[0] = np.clip(rect[0], 0, w)
		rect[1] = np.clip(rect[1], 0, h)
		rect[2] = np.clip(rect[2], 0, w)
		rect[3] = np.clip(rect[3], 0, h)

		rect = BoxUtils.convert(rect,
			BoxUtils.FMT_XYXY,
			BoxUtils.FMT_XYWH
			)

		return rect

	def optimize_rect(self, rect_xywh, im):
		height, width, channels = im.shape
		rect_xywh = np.int32(np.hstack([rect_xywh[0] - width*self.rect_pad / 2., rect_xywh[1] - height * self.rect_pad / 2., rect_xywh[2]  + 2*width*self.rect_pad , rect_xywh[3]  + 2*height*self.rect_pad]))
		rect_xywh = self.check_rect(rect_xywh, [width, height])
		if self.mode == "GrabCut":
			rect_xywh = self.grab_cut(im, tuple(np.int32(rect_xywh)))
		elif self.mode == "ColorCut":
			rect_xywh = self.color_cut(im, np.int32(rect_xywh)) # Perform color cut
		elif self.mode == "AdaptiveThreshold":
			rect_xywh = self.adaptive_cut(im, np.int32(rect_xywh))
		rect_xywh = self.check_rect(rect_xywh, (width, height))
		rect_xywh = np.int32(rect_xywh)
		return rect_xywh

	def save(self):
		print("File saved as: " + "{}_labels.p".format(os.path.splitext(ntpath.basename(self.vid_file))[0]))
		pickle.dump(self.labels, open("{}_labels.p".format(os.path.splitext(ntpath.basename(self.vid_file))[0]), "wb" ) )

	def start_labeling(self):
		self.cap.set(1,self.frame_num)
		
		ret, im = self.cap.read()
		im = cv2.resize(im,None, fx=self.scale, fy=self.scale)
		height, width, channels = im.shape
		# # image and init box
		self.draw_bbox()
		sel_rect_wh = (
			self.sel_rect[0][0], self.sel_rect[0][1],
			self.sel_rect[1][0]- self.sel_rect[0][0],
			self.sel_rect[1][1] - self.sel_rect[0][1])
		
		rect_gc = self.color_cut(im, sel_rect_wh)
		# tracker init
		target_pos, target_sz = np.array(rect_gc[0:2]) + np.array(rect_gc[2:4])/2, np.array(rect_gc[2:4])

		state = SiamRPN_init(im, target_pos, target_sz, self.net, use_gpu=True)

		state_hist = [state]
		toc = 0
		old_frame_num = self.frame_num - 1
		# self.frame_num = 0

		while(True):
			if old_frame_num != self.frame_num:
				old_frame_num = self.frame_num
				self.cap.set(1,self.frame_num)
				# Capture frame-by-frame
				ret, im = self.cap.read()
				if not ret:
					self.save()
					break
				im = cv2.resize(im,None, fx=self.scale, fy=self.scale)
				if len(self.labels) <= self.frame_num - self.start_frame_num:
					state_hist.append(SiamRPN_track(state_hist[-1], im, use_gpu=True))  # track)
					state = state_hist[-1]
					rect_ccwh = np.concatenate([state['target_pos'], state['target_sz']])
					rect_xywh = BoxUtils.convert(rect_ccwh,
						BoxUtils.FMT_CCWH,
						BoxUtils.FMT_XYWH
						)

					rect_xywh = self.optimize_rect(rect_xywh, im)
					rect_xyxy = BoxUtils.convert(rect_xywh,
					BoxUtils.FMT_XYWH,
					BoxUtils.FMT_XYXY
					)
					
				else:
					state = state_hist[self.frame_num - self.start_frame_num] # Use previous frame state
					rect_xyxy = BoxUtils.unnormalize(self.labels[self.frame_num - self.start_frame_num].bounding_rect, BoxUtils.FMT_XYXY, [height, width])
					
				
				pt0, pt1 = tuple(np.asarray(rect_xyxy[:2], np.int32)), tuple(np.asarray(rect_xyxy[2:], np.int32))


				cv2.rectangle(im, pt0, pt1, (0, 255, 255), 3)

			k =  cv2.waitKey(33)

			if k & 0xFF == ord('d'):
				# Drawing Mode
				self.draw_bbox()
				rect_xyxy = [self.sel_rect[0][0],self.sel_rect[0][1], self.sel_rect[1][0], self.sel_rect[1][1]]
				# drawn_norm = 
				rect_xywh = BoxUtils.convert(
					rect_xyxy,
					BoxUtils.FMT_XYXY,
					BoxUtils.FMT_XYWH)
				# rect_xywh = self.optimize_rect(rect_xywh, im)
				# # self.labels[self.frame_num].bounding_rect = BoxUtils.normalize(rect_xywh, BoxUtils.FMT_XYWH, [height, width])
				# rect_xyxy = BoxUtils.convert(
				# 	rect_xywh,
				# 	BoxUtils.FMT_XYWH,
				# 	BoxUtils.FMT_XYXY)
				rect_ccwh = BoxUtils.convert(rect_xywh,
					BoxUtils.FMT_XYWH,
					BoxUtils.FMT_CCWH
					)

				state['target_pos'] = rect_ccwh[:2]
				state['target_sz']  = rect_ccwh[2:]
				old_frame_num = self.frame_num - 1


			if k & 0xFF == ord('r'):
				self.redo_annotation = not self.redo_annotation
				print("Annotation redo set to: ", self.redo_annotation)
			# If a new frame is being annotated, add it
			if len(self.labels) <= self.frame_num - self.start_frame_num:
				print("Normalized rect: ", BoxUtils.normalize(rect_xyxy, BoxUtils.FMT_XYXY, [height, width]))
				self.labels.append(Annotation(self.vid_file, self.frame_num, BoxUtils.normalize(rect_xyxy, BoxUtils.FMT_XYXY, [height, width]), self.redo_annotation))
				self.redo_annotation = False
			else: # Check if the cache should be used
				if k & 0xFF == ord('d'):
					self.labels[self.frame_num] = Annotation(self.vid_file, self.frame_num, BoxUtils.normalize(rect_xyxy, BoxUtils.FMT_XYXY, [height, width]), self.redo_annotation)
					self.redo_annotation = False
			cv2.imshow(self.disp_name, im)

			if k & 0xFF == ord('j'):
				if self.frame_num > 0:
					self.frame_num = self.frame_num - 1

			if k & 0xFF == ord('m'):
				self.mode_idx = self.mode_idx + 1
				self.mode = self.modes[self.mode_idx%len(self.modes)]
				print("Toggeling mode: ", self.mode)
				

			if k & 0xFF == ord('v'):
				print("Toggeling Video Mode: ", not self.run_video)
				self.run_video = not self.run_video

			if k & 0xFF == ord('k') or self.run_video:
				self.frame_num = self.frame_num + 1

			if k & 0xFF == ord('s') :
				self.save()

			if k & 0xFF == ord('q'):
				break

		# When everything done, release the capture
		self.cap.release()
		cv2.destroyAllWindows()


	def draw_bbox(self):
		# preview
		self.sel_rect = None
		self.cap.set(1,self.frame_num)
		ret, im = self.cap.read()
		im = cv2.resize(im,None, fx=self.scale, fy=self.scale)
		height, width, channels = im.shape
		while(True):
			
			frame_draw = np.copy(im)
			if self.sel_rect != None:
				cv2.rectangle(frame_draw, tuple(self.sel_rect[0]), tuple(self.sel_rect[1]), (255, 255, 255))
			if self.x_cursor is not None:
				cv2.line(frame_draw, (self.x_cursor, 0), (self.x_cursor, height), (0,0,255))
				cv2.line(frame_draw, (0, self.y_cursor), (width, self.y_cursor), (0,0,255))
			cv2.imshow(self.disp_name, frame_draw)

			if cv2.waitKey(1) & 0xFF == ord('q'):
				break

		# return [334.02,128.36,438.19,188.78,396.39,260.83,292.23,200.41]

	def on_mouse(self, event, x, y, flags, param):
		self.x_cursor = x
		self.y_cursor = y

		if event == cv2.EVENT_LBUTTONDOWN:
			self.in_click = True
			self.down_point = [x, y]            

		if event == cv2.EVENT_LBUTTONUP:
			self.in_click = False
			self.up_point = [x, y]
			if self.up_point == self.down_point:
				self.sel_rect = None
			else:
				for i in range(0,2):
					if self.down_point[i] > self.up_point[i]:
						self.down_point[i], self.up_point[i] = self.up_point[i], self.down_point[i]
			if self.down_point != None:
				self.sel_rect = (self.down_point, self.up_point)
		

		if self.in_click:

			self.sel_rect = (self.down_point, (x, y))
# DaSiamRPN
# Licensed under The MIT License
# Written by Qiang Wang (wangqiang2015 at ia.ac.cn)
# --------------------------------------------------------
#!/usr/bin/python

import glob, cv2, torch
import numpy as np
from os.path import realpath, dirname, join

from net import SiamRPNvot
from run_SiamRPN import SiamRPN_init, SiamRPN_track
from utils import get_axis_aligned_bbox, cxy_wh_2_rect

# load net
net = SiamRPNvot()
net.load_state_dict(torch.load(join(realpath(dirname(__file__)), 'SOT.model')))
net.eval().cuda()

# image and init box
image_files = sorted(glob.glob('./12_video/*.png'))
image_files.reverse()
#init_rbox = [334.02,128.36,438.19,188.78,396.39,260.83,292.23,200.41]
init_rbox = [135.0, 141.0, 187.0, 141.0, 187.0, 168.0, 135.0, 168.0]
[cx, cy, w, h] = get_axis_aligned_bbox(init_rbox)

# tracker init
target_pos, target_sz = np.array([cx, cy]), np.array([w, h])
im = cv2.imread(image_files[0])  # HxWxC
state = SiamRPN_init(im, target_pos, target_sz, net)
Example #8
0
            'ants1', 'ants3', 'bag', 'ball1', 'ball2', 'basketball', 'birds1',
            'blanket', 'bmx', 'bolt1', 'bolt2', 'book', 'butterfly', 'car1',
            'conduction1', 'crabs1', 'crossing', 'dinosaur', 'drone_across',
            'drone_flip', 'drone1', 'fernando', 'fish1', 'fish2', 'fish3',
            'flamingo1', 'frisbee', 'girl', 'glove', 'godfather', 'graduate',
            'gymnastics1', 'gymnastics2', 'gymnastics3', 'hand', 'handball1',
            'handball2', 'helicopter', 'iceskater1', 'iceskater2', 'leaves',
            'matrix', 'motocross1', 'motocross2', 'nature', 'pedestrian1',
            'rabbit', 'racing', 'road', 'shaking', 'sheep', 'singer2',
            'singer3', 'soccer1', 'soccer2', 'soldier', 'tiger', 'traffic',
            'wiper', 'zebrafish1']
"""
vot = VOT(root)
video_names = vot.get_video_names()
# load net
net = SiamRPNvot()
net.load_state_dict(torch.load(join('/home/jianingq/bgflow/DaSiamRPN/code/', 'model','SiamRPNVOT.model')))
net.eval().cuda()

score_net = ScoreNet(6, 1)
score_net.cuda()
score_net.load_state_dict(torch.load('models/ckpt_0.pth'))

for video_name in ['gymnastics3']:

    total_iou = 0
    total_failure = 0
    warped_images = []
    video_length = vot.get_frame_length(video_name)
    #ground truth bounding box
    gts = vot.get_gts(video_name)
Example #9
0
        self._pick_img_pairs(index)
        self._pad_crop_resize_template()
        self._pad_crop_resize_detection()
        self._generate_pos_neg_diff()  # 生成框的1445*5的张量,代表每个框的类别,dx,dy,dw,dh
        # self._tranform()  # PIL to Tensor
        self.count += 1
        return self.ret

    def __len__(self):
        return len(self.sub_class_dir)



if __name__ == '__main__':
    # we will do a test for dataloader
    net = SiamRPNvot()
    loader = TrainDataLoader('D:\\uav_frame\\00',net ,check = True)
    #print(loader.__len__())
    index_list = range(loader.__len__())
    for i in range(1000):
        ret = loader.__get__(random.choice(index_list))
        label = ret['pos_neg_diff'][:, 0].reshape(-1)
        pos_index = list(np.where(label == 1)[0])
        pos_num = len(pos_index)
        print(pos_index)
        print(pos_num)
        if pos_num != 0 and pos_num != 16:
            print(pos_num)
            sys.exit(0)
        print(i)
Example #10
0
# --------------------------------------------------------
#!/usr/bin/python

import glob, cv2, torch
import numpy as np
from os.path import realpath, dirname, join

from net import SiamRPNvot
from run_SiamRPN import SiamRPN_init, SiamRPN_track
from utils import get_axis_aligned_bbox, cxy_wh_2_rect

# get supported device
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# load net
net = SiamRPNvot()
net.load_state_dict(torch.load(join(realpath(dirname(__file__)), 'SiamRPNVOT.model'), map_location=device))
net.eval().to(device)

# image and init box
image_files = sorted(glob.glob('./bag/*.jpg'))
init_rbox = [334.02,128.36,438.19,188.78,396.39,260.83,292.23,200.41]
[cx, cy, w, h] = get_axis_aligned_bbox(init_rbox)

# tracker init
target_pos, target_sz = np.array([cx, cy]), np.array([w, h])
im = cv2.imread(image_files[0])  # HxWxC
state = SiamRPN_init(im, target_pos, target_sz, net, device)

# tracking and visualization
toc = 0
Example #11
0
def showImage(subscriber, camera_matrix, kcf_tracker_h):
    global x1, y1, x2, y2, drawing, init, flag, image, getim, start

    flag = 1
    init = False
    drawing = False
    getim = False
    start = False
    x1, x2, y1, y2 = -1, -1, -1, -1
    flag_lose = False
    count_lose = 0

    print('loading model...........')
    net = SiamRPNvot()
    net.load_state_dict(torch.load(path + 'SiamRPNVOT.model'))
    net.eval().cuda()
    z = torch.Tensor(1, 3, 127, 127)
    net.temple(z.cuda())
    x = torch.Tensor(1, 3, 271, 271)
    net(x.cuda())
    print('ready for starting!')

    rospy.Subscriber(subscriber, Image, callback)

    cv2.namedWindow('image')
    cv2.setMouseCallback('image', draw_circle)
    rate = rospy.Rate(50)
    while not rospy.is_shutdown():
        if getim:
            getim = False
            ## !
            d_info = DetectionInfo()
            d_info.frame = 0
            ## !

            if start is False and init is True:
                target_pos = np.array([int((x1 + x2) / 2), int((y1 + y2) / 2)])
                target_sz = np.array([int(x2 - x1), int(y2 - y1)])
                state = SiamRPN_init(image, target_pos, target_sz, net)
                start = True
                flag_lose = False
                continue
            if start is True:
                state = SiamRPN_track(state, image)  # track
                res = cxy_wh_2_rect(state['target_pos'], state['target_sz'])
                res = [int(l) for l in res]
                cv2.rectangle(image, (res[0], res[1]),
                              (res[0] + res[2], res[1] + res[3]),
                              (0, 255, 255), 2)

                ## !
                depth = kcf_tracker_h / state['target_sz'][1] * camera_matrix[
                    1, 1]
                cx = state['target_pos'][0] - image.shape[1] / 2
                cy = state['target_pos'][1] - image.shape[0] / 2
                d_info.position[0] = depth * cx / camera_matrix[0, 0]
                d_info.position[1] = depth * cy / camera_matrix[1, 1]
                d_info.position[2] = depth
                d_info.sight_angle[0] = cx / (image.shape[1] / 2) * math.atan(
                    (image.shape[1] / 2) / camera_matrix[0, 0])
                d_info.sight_angle[1] = cy / (image.shape[0] / 2) * math.atan(
                    (image.shape[0] / 2) / camera_matrix[1, 1])
                d_info.detected = True
                ## !

                cv2.putText(image, str(state['score']),
                            (res[0] + res[2], res[1] + res[3]),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 0), 1)

                if state['score'] < 0.5:
                    count_lose = count_lose + 1
                else:
                    count_lose = 0
                if count_lose > 4:
                    flag_lose = True
            if flag_lose is True:
                cv2.putText(image, 'target lost', (20, 40),
                            cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
                ## !
                d_info.detected = False
            if drawing is True:
                cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)

            cx = int(image.shape[1] / 2)
            cy = int(image.shape[0] / 2)
            cv2.line(image, (cx - 20, cy), (cx + 20, cy), (255, 255, 255), 2)
            cv2.line(image, (cx, cy - 20), (cx, cy + 20), (255, 255, 255), 2)
            ## !
            pub.publish(d_info)
            cv2.imshow('image', image)
            cv2.waitKey(1)

        rate.sleep()
Example #12
0
# DaSiamRPN
# Licensed under The MIT License
# Written by Qiang Wang (wangqiang2015 at ia.ac.cn)
# --------------------------------------------------------
#!/usr/bin/python

import glob, cv2, torch
import numpy as np
from os.path import realpath, dirname, join

from net import SiamRPNvot
from run_SiamRPN import SiamRPN_init, SiamRPN_track
from utils import get_axis_aligned_bbox, cxy_wh_2_rect

# load net
net = SiamRPNvot()
net.load_state_dict(torch.load(join(realpath(dirname(__file__)), 'SiamRPNVOT.model')))
net.cpu()
#net.eval().cuda()

# image and init box
image_files = sorted(glob.glob('./bag/*.jpg'))
init_rbox = [334.02,128.36,438.19,188.78,396.39,260.83,292.23,200.41]
[cx, cy, w, h] = get_axis_aligned_bbox(init_rbox)

# tracker init
target_pos, target_sz = np.array([cx, cy]), np.array([w, h])
im = cv2.imread(image_files[0])  # HxWxC
state = SiamRPN_init(im, target_pos, target_sz, net)

# tracking and visualization
def get_object_center(q, detect_class):

    # classes:
    # 1.Aeroplanes     2.Bicycles   3.Birds       4.Boats           5.Bottles
    # 6.Buses          7.Cars       8.Cats        9.Chairs          10.Cows
    # 11.Dining tables 12.Dogs      13.Horses     14.Motorbikes     15.People
    # 16.Potted plants 17.Sheep     18.Sofas      19.Trains         20.TV/Monitors

    slim = tf.contrib.slim

    # TensorFlow session: grow memory when needed. TF, DO NOT USE ALL MY GPU MEMORY!!!
    gpu_options = tf.GPUOptions(allow_growth=True)
    config = tf.ConfigProto(log_device_placement=False,
                            gpu_options=gpu_options)
    isess = tf.InteractiveSession(config=config)

    # Input placeholder.
    net_shape = (300, 300)
    data_format = 'NHWC'
    img_input = tf.placeholder(tf.uint8, shape=(None, None, 3))
    # Evaluation pre-processing: resize to SSD net shape.
    image_pre, labels_pre, bboxes_pre, bbox_img = ssd_vgg_preprocessing.preprocess_for_eval(
        img_input,
        None,
        None,
        net_shape,
        data_format,
        resize=ssd_vgg_preprocessing.Resize.WARP_RESIZE)
    image_4d = tf.expand_dims(image_pre, 0)

    # Define the SSD model.
    reuse = True if 'ssd_net' in locals() else None
    ssd_net = ssd_vgg_300.SSDNet()
    with slim.arg_scope(ssd_net.arg_scope(data_format=data_format)):
        predictions, localisations, _, _ = ssd_net.net(image_4d,
                                                       is_training=False,
                                                       reuse=reuse)

    # Restore SSD model.
    # ckpt_filename = 'checkpoints/ssd_300_vgg.ckpt'
    ckpt_filename = '../SSD-Tensorflow/checkpoints/VGG_VOC0712_SSD_300x300_ft_iter_120000.ckpt'

    isess.run(tf.global_variables_initializer())
    saver = tf.train.Saver()
    saver.restore(isess, ckpt_filename)

    # SSD default anchor boxes.
    ssd_anchors = ssd_net.anchors(net_shape)

    # Main image processing routine.
    def process_image(img,
                      select_threshold=0.5,
                      nms_threshold=.45,
                      net_shape=(300, 300)):
        # Run SSD network.
        rimg, rpredictions, rlocalisations, rbbox_img = isess.run(
            [image_4d, predictions, localisations, bbox_img],
            feed_dict={img_input: img})

        # Get classes and bboxes from the net outputs.
        rclasses, rscores, rbboxes = np_methods.ssd_bboxes_select(
            rpredictions,
            rlocalisations,
            ssd_anchors,
            select_threshold=select_threshold,
            img_shape=net_shape,
            num_classes=21,
            decode=True)

        rbboxes = np_methods.bboxes_clip(rbbox_img, rbboxes)
        rclasses, rscores, rbboxes = np_methods.bboxes_sort(rclasses,
                                                            rscores,
                                                            rbboxes,
                                                            top_k=400)
        rclasses, rscores, rbboxes = np_methods.bboxes_nms(
            rclasses, rscores, rbboxes, nms_threshold=nms_threshold)
        # Resize bboxes to original image shape. Note: useless for Resize.WARP!
        rbboxes = np_methods.bboxes_resize(rbbox_img, rbboxes)
        return rclasses, rscores, rbboxes

    def get_bboxes(rclasses, rbboxes):
        # get center location of object

        number_classes = rclasses.shape[0]
        object_bboxes = []
        for i in range(number_classes):
            object_bbox = dict()
            object_bbox['i'] = i
            object_bbox['class'] = rclasses[i]
            object_bbox['y_min'] = rbboxes[i, 0]
            object_bbox['x_min'] = rbboxes[i, 1]
            object_bbox['y_max'] = rbboxes[i, 2]
            object_bbox['x_max'] = rbboxes[i, 3]
            object_bboxes.append(object_bbox)
        return object_bboxes

    # load net
    net = SiamRPNvot()
    net.load_state_dict(
        torch.load(
            join(realpath(dirname(__file__)),
                 '../DaSiamRPN-master/code/SiamRPNVOT.model')))

    net.eval()

    # open video capture
    video = cv2.VideoCapture(0)

    if not video.isOpened():
        print("Could not open video")
        sys.exit()

    index = True
    while index:

        # Read first frame.
        ok, frame = video.read()
        if not ok:
            print('Cannot read video file')
            sys.exit()

        # Define an initial bounding box
        height = frame.shape[0]
        width = frame.shape[1]

        rclasses, rscores, rbboxes = process_image(frame)

        bboxes = get_bboxes(rclasses, rbboxes)
        for bbox in bboxes:
            if bbox['class'] == detect_class:
                print(bbox)
                ymin = int(bbox['y_min'] * height)
                xmin = int((bbox['x_min']) * width)
                ymax = int(bbox['y_max'] * height)
                xmax = int((bbox['x_max']) * width)
                cx = (xmin + xmax) / 2
                cy = (ymin + ymax) / 2
                h = ymax - ymin
                w = xmax - xmin
                new_bbox = (cx, cy, w, h)
                print(new_bbox)
                index = False
                break

    # tracker init
    target_pos, target_sz = np.array([cx, cy]), np.array([w, h])
    state = SiamRPN_init(frame, target_pos, target_sz, net)

    # tracking and visualization
    toc = 0
    count_number = 0

    while True:

        # Read a new frame
        ok, frame = video.read()
        if not ok:
            break

        # Start timer
        tic = cv2.getTickCount()

        # Update tracker
        state = SiamRPN_track(state, frame)  # track
        # print(state)

        toc += cv2.getTickCount() - tic

        if state:

            res = cxy_wh_2_rect(state['target_pos'], state['target_sz'])
            res = [int(l) for l in res]

            cv2.rectangle(frame, (res[0], res[1]),
                          (res[0] + res[2], res[1] + res[3]), (0, 255, 255), 3)

            count_number += 1
            # set object_center
            object_center = dict()
            object_center['x'] = state['target_pos'][0] / width
            object_center['y'] = state['target_pos'][1] / height
            q.put(object_center)

            if (not state) or count_number % 40 == 3:
                # Tracking failure
                cv2.putText(frame, "Tracking failure detected", (100, 80),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.75, (0, 0, 255), 2)
                index = True
                while index:
                    ok, frame = video.read()
                    rclasses, rscores, rbboxes = process_image(frame)
                    bboxes = get_bboxes(rclasses, rbboxes)
                    for bbox in bboxes:
                        if bbox['class'] == detect_class:
                            ymin = int(bbox['y_min'] * height)
                            xmin = int(bbox['x_min'] * width)
                            ymax = int(bbox['y_max'] * height)
                            xmax = int(bbox['x_max'] * width)
                            cx = (xmin + xmax) / 2
                            cy = (ymin + ymax) / 2
                            h = ymax - ymin
                            w = xmax - xmin
                            new_bbox = (cx, cy, w, h)
                            target_pos, target_sz = np.array(
                                [cx, cy]), np.array([w, h])
                            state = SiamRPN_init(frame, target_pos, target_sz,
                                                 net)

                            p1 = (int(xmin), int(ymin))
                            p2 = (int(xmax), int(ymax))
                            cv2.rectangle(frame, p1, p2, (0, 255, 0), 2, 1)

                            index = 0

                            break

        # 调整图片大小
        resized_frame = cv2.resize(frame,
                                   None,
                                   fx=0.65,
                                   fy=0.65,
                                   interpolation=cv2.INTER_AREA)
        # 水平翻转图片(为了镜像显示)
        horizontal = cv2.flip(resized_frame, 1, dst=None)

        # 显示图片
        cv2.namedWindow("SSD+SiamRPN", cv2.WINDOW_NORMAL)
        cv2.imshow('SSD+SiamRPN', horizontal)

        # Exit if ESC pressed
        k = cv2.waitKey(1) & 0xff
        if k == 27:
            break

    video.release()
    cv2.destroyAllWindows()
Example #14
0
def showImage():
    
    global x1, y1, x2, y2, drawing, init, flag, image, getim, start
    rospy.init_node('RPN', anonymous=True)
    
    flag=1
    init = False
    drawing = False
    getim = False
    start = False
    x1, x2, y1, y2 = -1, -1, -1, -1
    flag_lose = False
    count_lose = 0

    print('laoding model...........')
    net = SiamRPNvot()
    net.load_state_dict(torch.load(path + 'SiamRPNVOT.model'))
    net.eval().cuda()
    z = torch.Tensor(1, 3, 127, 127)
    net.temple(z.cuda())
    x = torch.Tensor(1, 3, 271, 271)
    net(x.cuda())
    print('ready for starting!')
    
    rospy.Subscriber('/camera/rgb/image_raw', Image, callback)
    pub = rospy.Publisher('/vision/target', Pose, queue_size=10) 
    cv2.namedWindow('image')
    cv2.setMouseCallback('image', draw_circle)
    rate = rospy.Rate(30)
    i = 1
    t = time.time()
    fps = 0
    while not rospy.is_shutdown():
      
        if getim:
            t1 = time.time()
            idd = readid(image)
            
            pose = Pose()
            pose.position.z = 0
            
            if start is False and init is True:
                target_pos = np.array([int((x1+x2)/2), int((y1+y2)/2)])
                target_sz = np.array([int(x2-x1), int(y2-y1)])
                state = SiamRPN_init(image, target_pos, target_sz, net)
                start = True
                flag_lose = False
                continue
                
            if start is True:
            
                state = SiamRPN_track(state, image)  # track              
                res = cxy_wh_2_rect(state['target_pos'], state['target_sz'])
                res = [int(l) for l in res]
                cv2.rectangle(image, (res[0], res[1]), (res[0] + res[2], res[1] + res[3]), (0, 255, 255), 2)
                pose.position.x = (state['target_pos'][0]-image.shape[1]/2) / (image.shape[1]/2)
                pose.position.y = (state['target_pos'][1]-image.shape[0]/2) / (image.shape[0]/2)
                cv2.putText(image, str(state['score']), (res[0] + res[2], res[1] + res[3]), cv2.FONT_HERSHEY_SIMPLEX , 0.5, (255,255,0), 1)
                pose.position.z = 1
                if state['score'] < 0.5:
                    count_lose = count_lose + 1
                else:
                    count_lose = 0
                if count_lose > 4:
                    flag_lose = True
                    
            if flag_lose is True:
                    cv2.putText(image, 'target is lost!', (200,200), cv2.FONT_HERSHEY_SIMPLEX , 2, (255,0,0), 3)
                    pose.position.z = -1
                   
            if drawing is True:              
                cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)
            
            cv2.putText(image, '#'+str(idd), (30,30), cv2.FONT_HERSHEY_SIMPLEX , 0.5, (0, 255, 255), 1)
            cx = int(image.shape[1]/2)
            cy = int(image.shape[0]/2)
            cv2.line(image,(cx-20,cy), (cx+20, cy), (255, 255, 255), 2)
            cv2.line(image,(cx, cy-20), (cx, cy+20), (255, 255, 255), 2)
            
            pub.publish(pose)
            
            if start is True:    
               
                i = i + 1
            if i > 5:
                i = 1
                fps = 5 / (time.time()-t)
                t = time.time()
            cv2.putText(image, 'fps='+str(fps), (200,30), cv2.FONT_HERSHEY_SIMPLEX , 0.5, (0, 255, 255), 1)
            
            cv2.imshow('image', image)
            cv2.waitKey(1)
            getim = False

        rate.sleep()
Example #15
0
def test(score):

    net = SiamRPNvot()
    net.load_state_dict(
        torch.load('/home/traker_hao/code/learn/train_RPN/model/30.model'))
    net.eval().cuda()

    version_name = 'jiasu'

    sequence_path = '/media/traker_hao/data/dataset/UAV1/sequences'
    init_path = '/media/traker_hao/data/dataset/UAV1/annotations'
    result_path = '/home/traker_hao/result/visdrone/' + version_name
    if os.path.exists(result_path) is False:
        os.mkdir(result_path)

    sequence_names = os.listdir(sequence_path)
    random.shuffle(sequence_names)
    #sequence_names.sort()
    i = 0
    for sequence_name in sequence_names:
        print(sequence_name)
        #if sequence_name != 'Suv':
        #continue
        #sequence_name='uav0000054_00000_s'
        imagenames = os.listdir(sequence_path + '/' + sequence_name)
        imagenames.sort()
        print(i)
        i = i + 1
        print(sequence_path + '/' + sequence_name)
        f = open(
            result_path + '/' + sequence_name + '_' + version_name + '.txt',
            'w')
        inited = False
        fp = open(init_path + '/' + sequence_name + '.txt')
        j = 0
        for imagename in imagenames:
            j = j + 1
            image = cv2.imread(sequence_path + '/' + sequence_name + '/' +
                               imagename)
            #init the tracker
            if inited is False:
                data = fp.readline()
                data = data.strip('\n')
                data = data.split(',')
                [cx, cy, w,
                 h] = (int(data[0]) + int(data[2]) // 2,
                       int(data[1]) + int(data[3]) // 2, int(data[2]),
                       int(data[3]))
                #f.write(str(annos[0]['bbox'][0])+','+str(annos[0]['bbox'][1])+','+str(annos[0]['bbox'][2])+','+str(annos[0]['bbox'][3])+','+str(1.00)+'\n')
                f.write(data[0] + ',' + data[1] + ',' + data[2] + ',' +
                        data[3] + '\n')
                target_pos, target_sz = np.array([cx, cy]), np.array([w, h])
                state = SiamRPN_init(image, target_pos, target_sz, net)
                inited = True

                cv2.rectangle(image,
                              (int(cx) - int(w) // 2, int(cy) - int(h) // 2),
                              (int(cx) + int(w) // 2, int(cy) + int(h) // 2),
                              (0, 255, 0), 3)
                cv2.putText(image, sequence_name, (50, 50), 0, 5e-3 * 200,
                            (0, 255, 0), 2)
                cv2.putText(image, 'initing...', (100, 100), 0, 5e-3 * 200,
                            (0, 255, 0), 2)
                image2 = cv2.resize(image, (960, 540))
                cv2.imshow('aa2', image2)
                cv2.waitKey(1)
            else:

                data = fp.readline()
                data = data.strip('\n')
                data = data.split(',')
                try:
                    truth = (int(data[0]),
                             int(data[1]), int(data[0]) + int(data[2]),
                             int(data[1]) + int(data[3]))
                except:
                    truth = [0, 0, 0, 0]

                #update the tracker
                #print([cx, cy, w, h])
                tic = cv2.getTickCount()
                t1 = time.time()
                state = SiamRPN_track(state, image)  # track
                #state['target_sz'] = np.array( [int(data[2]), int(data[3])] )

                toc = (cv2.getTickCount() - tic) / cv2.getTickFrequency()
                #print(1/toc)
                #mytracker.target_sz = np.array([int(truth[2]),int(truth[3])])
                res = cxy_wh_2_rect(state['target_pos'], state['target_sz'])
                res = [int(l) for l in res]
                cv2.rectangle(image, (res[0], res[1]),
                              (res[0] + res[2], res[1] + res[3]),
                              (0, 255, 255), 2)

                #visualize the result

                cv2.rectangle(image, (int(truth[0]), int(truth[1])),
                              (int(truth[2]), int(truth[3])), (0, 255, 0), 2)
                #mytracker.target_sz=np.array([int(data[2]),int(data[3])])
                #cv2.putText(image, str(iou), (res[0] + res[2], res[1] + res[3]), 0, 5e-3*200, (0,255,0), 2)
            cv2.putText(image, sequence_name, (50, 50), 0, 5e-3 * 200,
                        (0, 255, 0), 2)
            image2 = cv2.resize(image, (960, 540))
            cv2.imshow('aa2', image2)
            if cv2.waitKey(1) == 97:
                break
            #if j>209:
            #cv2.waitKey(0)

        f.close()
Example #16
0
def main():
    args = parser.parse_args()
    """ compute max_batches """
    for root, dirs, files in os.walk(args.train_path):
        for dirnames in dirs:
            dir_path = os.path.join(root, dirnames)
            args.max_batches += len(os.listdir(dir_path))
    """ Model on gpu """
    model = SiamRPNvot()
    model = model.cuda()
    model.load_state_dict(
        torch.load(join(realpath(dirname(__file__)), 'SiamRPNVOT.model')))
    model.train().cuda()
    cudnn.benchmark = True
    """ train dataloader """
    data_loader = TrainDataLoader(args.train_path, model)
    if not os.path.exists(args.weight_dir):
        os.makedirs(args.weight_dir)
    """ loss and optimizer """
    criterion = MultiBoxLoss()
    optimizer = torch.optim.SGD(model.parameters(),
                                lr=args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)
    """ train phase """
    closses, rlosses, tlosses = AverageMeter(), AverageMeter(), AverageMeter()
    steps = 0
    writer = SummaryWriter()
    for epoch in range(args.max_epoches):
        cur_lr = adjust_learning_rate(args.lr, optimizer, epoch, gamma=0.1)
        index_list = range(data_loader.__len__())  #获取数据集的长度
        losss = [0.0, 0.0, 0.0]

        for example in range(args.max_batches):
            ret = data_loader.__get__(random.choice(index_list))
            template = ret['temple'].cuda()
            detection = ret['detection'].cuda()
            pos_neg_diff = ret['pos_neg_diff_tensor'].cuda()
            model.temple(template)
            rout, cout = model(detection)
            cout = cout.squeeze().permute(1, 2, 0).reshape(-1, 2)
            rout = rout.squeeze().permute(1, 2, 0).reshape(-1, 4)

            predictions, targets = (cout, rout), pos_neg_diff
            closs, rloss, loss, reg_pred, reg_target, pos_index, neg_index = criterion(
                predictions, targets)
            closs_ = closs.cpu().item()

            if np.isnan(closs_):
                sys.exit(0)

            closses.update(closs.cpu().item())
            rlosses.update(rloss.cpu().item())
            tlosses.update(loss.cpu().item())

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            steps += 1
            losss[0] = closses.avg
            losss[1] = rlosses.avg
            losss[2] = tlosses.avg
        print("Epoch:{:04d}\tcloss:{:.4f}\trloss:{:.4f}\ttloss:{:.4f}".format(
            epoch, closses.avg, rlosses.avg, tlosses.avg))
        writer.add_scalar("closses", losss[0], epoch)
        writer.add_scalar("rlosses", losss[1], epoch)
        writer.add_scalar("tlosses", losss[2], epoch)
        if steps % 150 == 0:
            file_path = os.path.join(args.weight_dir,
                                     'weights-{:07d}.pth'.format(steps))
            state = {
                'epoch': epoch + 1,
                'state_dict': model.state_dict(),
                'optimizer': optimizer.state_dict(),
            }
            torch.save(state, file_path)