def __init__(self, vid_file=False, scale=.6, start_frame_num=0): self.labels = [] self.scale = scale self.start_frame_num = start_frame_num self.frame_num = start_frame_num self.modes = ["ColorCut", "AdaptiveThreshold", "GrabCut"] self.mode_idx = 0 self.mode = self.modes[self.mode_idx] self.use_grab_cut = False self.run_video = False self.vid_done = False self.redo_annotation = False self.root = Tk() self.canvas = Canvas(self.root, width = 600, height = 600) self.canvas.pack() self.rect_pad = .1 # Amount to pad the DaSaiam rectangle for grabcut (relative units) self.threshold_content_limit = 90 if not vid_file: self.vid_file = self.select_video() else: self.vid_file = vid_file self.disp_name = 'SiamRPN' self.create_window() self.down_point = None self.up_point = None self.sel_rect = None self.in_click = False self.x_cursor = None self.y_cursor = None cv2.setMouseCallback(self.disp_name, self.on_mouse) self.cap = cv2.VideoCapture(self.vid_file) self.init_rbox = None # load net self.net = SiamRPNvot() self.net.load_state_dict(torch.load(join(realpath(dirname(__file__)), 'SiamRPNVOT.model'))) self.net.eval().cuda()
def compute_preds(vid_line): # Most nodes has 4 GPUs, run on a random one torch.cuda.set_device(np.random.randint(4)) # load net <-- have to do for each as I'm running in mp net = SiamRPNvot() net.load_state_dict(model_wts) net.eval().cuda() vid_fpath, lbl = vid_line.split() pred, pred_gt = track_and_predict(vid_fpath, net) return pred, pred_gt, int(lbl), vid_fpath
def main(): global args, v_id args = parser.parse_args() net = SiamRPNvot() #changed otb to vot net.load_state_dict( torch.load(join(realpath(dirname(__file__)), 'SiamRPNVOT.model'))) #changed OTB to VOT net.eval().cuda() dataset = load_dataset(args.dataset) fps_list = [] for v_id, video in enumerate(dataset.keys()): fps_list.append(track_video(net, dataset[video])) print('Mean Running Speed {:.1f}fps'.format(np.mean(np.array(fps_list))))
def main(): vid_file = os.path.expanduser("~/Videos/VID_20190327_195111.mp4") cap = cv2.VideoCapture(vid_file) # load net net = SiamRPNvot() net.load_state_dict( torch.load(join(realpath(dirname(__file__)), 'SiamRPNVOT.model'))) net.eval().cuda() # # image and init box # image_files = sorted(glob.glob('./bag/*.jpg')) init_rbox = [ 334.02, 128.36, 438.19, 188.78, 396.39, 260.83, 292.23, 200.41 ] [cx, cy, w, h] = get_axis_aligned_bbox(init_rbox) # tracker init target_pos, target_sz = np.array([cx, cy]), np.array([w, h]) ret, im = cap.read() state = SiamRPN_init(im, target_pos, target_sz, net, use_gpu=True) toc = 0 while (True): # Capture frame-by-frame ret, im = cap.read() tic = cv2.getTickCount() state = SiamRPN_track(state, im, use_gpu=True) # track toc += cv2.getTickCount() - tic res = cxy_wh_2_rect(state['target_pos'], state['target_sz']) res = [int(l) for l in res] cv2.rectangle(im, (res[0], res[1]), (res[0] + res[2], res[1] + res[3]), (0, 255, 255), 3) cv2.imshow('SiamRPN', im) if cv2.waitKey(1) & 0xFF == ord('q'): break # When everything done, release the capture cap.release() cv2.destroyAllWindows()
super(SiamRPNvot_fe, self).__init__(size=1, feature_out=256) self.cfg = { 'lr': 0.45, 'window_influence': 0.44, 'penalty_k': 0.04, 'instance_size': 600, 'adaptive': False } # 0.355 # load net from net import SiamRPNvot from os.path import realpath, dirname, join import os net = SiamRPNvot() net.load_state_dict( torch.load('/home/malick/Bureau/DaSiamRPN/code/SiamRPNVOT.model')) net.eval() #save and load model fe = SiamRPNvot_fe() fe_dict = fe.state_dict() net_dict = net.state_dict() new_dict = {k: v for k, v in net_dict.items() if k in fe_dict} fe_dict.update(new_dict) fe.load_state_dict(fe_dict) #freeze parameters of feature extractor to avoid computing gradient for param in fe.parameters():
class Video_Labeler(): def __init__(self, vid_file=False, scale=.6, start_frame_num=0): self.labels = [] self.scale = scale self.start_frame_num = start_frame_num self.frame_num = start_frame_num self.modes = ["ColorCut", "AdaptiveThreshold", "GrabCut"] self.mode_idx = 0 self.mode = self.modes[self.mode_idx] self.use_grab_cut = False self.run_video = False self.vid_done = False self.redo_annotation = False self.root = Tk() self.canvas = Canvas(self.root, width = 600, height = 600) self.canvas.pack() self.rect_pad = .1 # Amount to pad the DaSaiam rectangle for grabcut (relative units) self.threshold_content_limit = 90 if not vid_file: self.vid_file = self.select_video() else: self.vid_file = vid_file self.disp_name = 'SiamRPN' self.create_window() self.down_point = None self.up_point = None self.sel_rect = None self.in_click = False self.x_cursor = None self.y_cursor = None cv2.setMouseCallback(self.disp_name, self.on_mouse) self.cap = cv2.VideoCapture(self.vid_file) self.init_rbox = None # load net self.net = SiamRPNvot() self.net.load_state_dict(torch.load(join(realpath(dirname(__file__)), 'SiamRPNVOT.model'))) self.net.eval().cuda() def create_window(self): cv2.namedWindow(self.disp_name) def select_video(self): Tk().withdraw() # we don't want a full GUI, so keep the root window from appearing filename = askopenfilename() # show an "Open" dialog box and return the path to the selected file return filename def adaptive_cut(self,img,rect): img_crop = img[rect[1]:rect[1]+rect[3], rect[0]:rect[0]+rect[2]] img_gray = cv2.cvtColor(img_crop, cv2.COLOR_BGR2GRAY) # ret, img_thresh = cv2.threshold(img_gray,127,255,cv2.THRESH_BINARY_INV) frame_threshold = cv2.adaptiveThreshold(img_gray,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV,11,2) cv2.imshow("thresh", frame_threshold) rect_2 = self.get_bounding_box(frame_threshold) return [rect[0]+rect_2[0], rect[1] + rect_2[1], rect_2[2], rect_2[3]] def color_cut(self, img, rect): frame_HSV = cv2.cvtColor(img, cv2.COLOR_BGR2HSV) hsv_cropped = frame_HSV[rect[1]:rect[1]+rect[3], rect[0]:rect[0]+rect[2]] high_b = np.asarray((180, 255, 120)) low_b = np.asarray((0, 0, 0)) frame_threshold = cv2.inRange(hsv_cropped, low_b, high_b) cv2.imshow("thresh", frame_threshold) rect_2 = self.get_bounding_box(frame_threshold) return [rect[0]+rect_2[0], rect[1] + rect_2[1], rect_2[2], rect_2[3]] def edge_cut(self, img, rect): edges = cv2.Canny(img,100,200) def grab_cut(self, img, rect): mask = np.zeros(img.shape[:2],np.uint8) bgdModel = np.zeros((1,65),np.float64) fgdModel = np.zeros((1,65),np.float64) cv2.grabCut(img,mask,rect,bgdModel,fgdModel,5,cv2.GC_INIT_WITH_RECT) frame_threshold = 255*np.where((mask==2)|(mask==0),0,1).astype('uint8') kernel = np.ones((5,5),np.uint8) frame_threshold = cv2.erode(frame_threshold,kernel,iterations = 2) cv2.imshow("thresh", frame_threshold[rect[1]:rect[1]+rect[3], rect[0]:rect[0]+rect[2]]) img = img*frame_threshold[:,:,np.newaxis] im,contours,hierarchy = cv2.findContours(frame_threshold, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) if len(contours) != 0: contours_sorted = sorted(contours, key = self.hull_convexity_ratio) c = contours_sorted[0] x,y,w,h = cv2.boundingRect(c) return [x,y,w,h] # img += 255*(1 - mask2[:,:,np.newaxis]) # return self.get_bounding_box(frame_threshold) def hull_convexity_ratio(self, cnt): return cv2.contourArea(cv2.convexHull(cnt))/cv2.contourArea(cnt) def get_bounding_box(self, mask): im,contours,hierarchy = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) if len(contours) != 0: #find the biggest area c = max(contours, key = cv2.contourArea) x,y,w,h = cv2.boundingRect(c) return [x,y,w,h] def check_rect(self, rect, im_size): # clip xywh-style rectangle to fit within image w, h = im_size rect = BoxUtils.convert(rect, BoxUtils.FMT_XYWH, BoxUtils.FMT_XYXY ) rect[0] = np.clip(rect[0], 0, w) rect[1] = np.clip(rect[1], 0, h) rect[2] = np.clip(rect[2], 0, w) rect[3] = np.clip(rect[3], 0, h) rect = BoxUtils.convert(rect, BoxUtils.FMT_XYXY, BoxUtils.FMT_XYWH ) return rect def optimize_rect(self, rect_xywh, im): height, width, channels = im.shape rect_xywh = np.int32(np.hstack([rect_xywh[0] - width*self.rect_pad / 2., rect_xywh[1] - height * self.rect_pad / 2., rect_xywh[2] + 2*width*self.rect_pad , rect_xywh[3] + 2*height*self.rect_pad])) rect_xywh = self.check_rect(rect_xywh, [width, height]) if self.mode == "GrabCut": rect_xywh = self.grab_cut(im, tuple(np.int32(rect_xywh))) elif self.mode == "ColorCut": rect_xywh = self.color_cut(im, np.int32(rect_xywh)) # Perform color cut elif self.mode == "AdaptiveThreshold": rect_xywh = self.adaptive_cut(im, np.int32(rect_xywh)) rect_xywh = self.check_rect(rect_xywh, (width, height)) rect_xywh = np.int32(rect_xywh) return rect_xywh def save(self): print("File saved as: " + "{}_labels.p".format(os.path.splitext(ntpath.basename(self.vid_file))[0])) pickle.dump(self.labels, open("{}_labels.p".format(os.path.splitext(ntpath.basename(self.vid_file))[0]), "wb" ) ) def start_labeling(self): self.cap.set(1,self.frame_num) ret, im = self.cap.read() im = cv2.resize(im,None, fx=self.scale, fy=self.scale) height, width, channels = im.shape # # image and init box self.draw_bbox() sel_rect_wh = ( self.sel_rect[0][0], self.sel_rect[0][1], self.sel_rect[1][0]- self.sel_rect[0][0], self.sel_rect[1][1] - self.sel_rect[0][1]) rect_gc = self.color_cut(im, sel_rect_wh) # tracker init target_pos, target_sz = np.array(rect_gc[0:2]) + np.array(rect_gc[2:4])/2, np.array(rect_gc[2:4]) state = SiamRPN_init(im, target_pos, target_sz, self.net, use_gpu=True) state_hist = [state] toc = 0 old_frame_num = self.frame_num - 1 # self.frame_num = 0 while(True): if old_frame_num != self.frame_num: old_frame_num = self.frame_num self.cap.set(1,self.frame_num) # Capture frame-by-frame ret, im = self.cap.read() if not ret: self.save() break im = cv2.resize(im,None, fx=self.scale, fy=self.scale) if len(self.labels) <= self.frame_num - self.start_frame_num: state_hist.append(SiamRPN_track(state_hist[-1], im, use_gpu=True)) # track) state = state_hist[-1] rect_ccwh = np.concatenate([state['target_pos'], state['target_sz']]) rect_xywh = BoxUtils.convert(rect_ccwh, BoxUtils.FMT_CCWH, BoxUtils.FMT_XYWH ) rect_xywh = self.optimize_rect(rect_xywh, im) rect_xyxy = BoxUtils.convert(rect_xywh, BoxUtils.FMT_XYWH, BoxUtils.FMT_XYXY ) else: state = state_hist[self.frame_num - self.start_frame_num] # Use previous frame state rect_xyxy = BoxUtils.unnormalize(self.labels[self.frame_num - self.start_frame_num].bounding_rect, BoxUtils.FMT_XYXY, [height, width]) pt0, pt1 = tuple(np.asarray(rect_xyxy[:2], np.int32)), tuple(np.asarray(rect_xyxy[2:], np.int32)) cv2.rectangle(im, pt0, pt1, (0, 255, 255), 3) k = cv2.waitKey(33) if k & 0xFF == ord('d'): # Drawing Mode self.draw_bbox() rect_xyxy = [self.sel_rect[0][0],self.sel_rect[0][1], self.sel_rect[1][0], self.sel_rect[1][1]] # drawn_norm = rect_xywh = BoxUtils.convert( rect_xyxy, BoxUtils.FMT_XYXY, BoxUtils.FMT_XYWH) # rect_xywh = self.optimize_rect(rect_xywh, im) # # self.labels[self.frame_num].bounding_rect = BoxUtils.normalize(rect_xywh, BoxUtils.FMT_XYWH, [height, width]) # rect_xyxy = BoxUtils.convert( # rect_xywh, # BoxUtils.FMT_XYWH, # BoxUtils.FMT_XYXY) rect_ccwh = BoxUtils.convert(rect_xywh, BoxUtils.FMT_XYWH, BoxUtils.FMT_CCWH ) state['target_pos'] = rect_ccwh[:2] state['target_sz'] = rect_ccwh[2:] old_frame_num = self.frame_num - 1 if k & 0xFF == ord('r'): self.redo_annotation = not self.redo_annotation print("Annotation redo set to: ", self.redo_annotation) # If a new frame is being annotated, add it if len(self.labels) <= self.frame_num - self.start_frame_num: print("Normalized rect: ", BoxUtils.normalize(rect_xyxy, BoxUtils.FMT_XYXY, [height, width])) self.labels.append(Annotation(self.vid_file, self.frame_num, BoxUtils.normalize(rect_xyxy, BoxUtils.FMT_XYXY, [height, width]), self.redo_annotation)) self.redo_annotation = False else: # Check if the cache should be used if k & 0xFF == ord('d'): self.labels[self.frame_num] = Annotation(self.vid_file, self.frame_num, BoxUtils.normalize(rect_xyxy, BoxUtils.FMT_XYXY, [height, width]), self.redo_annotation) self.redo_annotation = False cv2.imshow(self.disp_name, im) if k & 0xFF == ord('j'): if self.frame_num > 0: self.frame_num = self.frame_num - 1 if k & 0xFF == ord('m'): self.mode_idx = self.mode_idx + 1 self.mode = self.modes[self.mode_idx%len(self.modes)] print("Toggeling mode: ", self.mode) if k & 0xFF == ord('v'): print("Toggeling Video Mode: ", not self.run_video) self.run_video = not self.run_video if k & 0xFF == ord('k') or self.run_video: self.frame_num = self.frame_num + 1 if k & 0xFF == ord('s') : self.save() if k & 0xFF == ord('q'): break # When everything done, release the capture self.cap.release() cv2.destroyAllWindows() def draw_bbox(self): # preview self.sel_rect = None self.cap.set(1,self.frame_num) ret, im = self.cap.read() im = cv2.resize(im,None, fx=self.scale, fy=self.scale) height, width, channels = im.shape while(True): frame_draw = np.copy(im) if self.sel_rect != None: cv2.rectangle(frame_draw, tuple(self.sel_rect[0]), tuple(self.sel_rect[1]), (255, 255, 255)) if self.x_cursor is not None: cv2.line(frame_draw, (self.x_cursor, 0), (self.x_cursor, height), (0,0,255)) cv2.line(frame_draw, (0, self.y_cursor), (width, self.y_cursor), (0,0,255)) cv2.imshow(self.disp_name, frame_draw) if cv2.waitKey(1) & 0xFF == ord('q'): break # return [334.02,128.36,438.19,188.78,396.39,260.83,292.23,200.41] def on_mouse(self, event, x, y, flags, param): self.x_cursor = x self.y_cursor = y if event == cv2.EVENT_LBUTTONDOWN: self.in_click = True self.down_point = [x, y] if event == cv2.EVENT_LBUTTONUP: self.in_click = False self.up_point = [x, y] if self.up_point == self.down_point: self.sel_rect = None else: for i in range(0,2): if self.down_point[i] > self.up_point[i]: self.down_point[i], self.up_point[i] = self.up_point[i], self.down_point[i] if self.down_point != None: self.sel_rect = (self.down_point, self.up_point) if self.in_click: self.sel_rect = (self.down_point, (x, y))
# DaSiamRPN # Licensed under The MIT License # Written by Qiang Wang (wangqiang2015 at ia.ac.cn) # -------------------------------------------------------- #!/usr/bin/python import glob, cv2, torch import numpy as np from os.path import realpath, dirname, join from net import SiamRPNvot from run_SiamRPN import SiamRPN_init, SiamRPN_track from utils import get_axis_aligned_bbox, cxy_wh_2_rect # load net net = SiamRPNvot() net.load_state_dict(torch.load(join(realpath(dirname(__file__)), 'SOT.model'))) net.eval().cuda() # image and init box image_files = sorted(glob.glob('./12_video/*.png')) image_files.reverse() #init_rbox = [334.02,128.36,438.19,188.78,396.39,260.83,292.23,200.41] init_rbox = [135.0, 141.0, 187.0, 141.0, 187.0, 168.0, 135.0, 168.0] [cx, cy, w, h] = get_axis_aligned_bbox(init_rbox) # tracker init target_pos, target_sz = np.array([cx, cy]), np.array([w, h]) im = cv2.imread(image_files[0]) # HxWxC state = SiamRPN_init(im, target_pos, target_sz, net)
'ants1', 'ants3', 'bag', 'ball1', 'ball2', 'basketball', 'birds1', 'blanket', 'bmx', 'bolt1', 'bolt2', 'book', 'butterfly', 'car1', 'conduction1', 'crabs1', 'crossing', 'dinosaur', 'drone_across', 'drone_flip', 'drone1', 'fernando', 'fish1', 'fish2', 'fish3', 'flamingo1', 'frisbee', 'girl', 'glove', 'godfather', 'graduate', 'gymnastics1', 'gymnastics2', 'gymnastics3', 'hand', 'handball1', 'handball2', 'helicopter', 'iceskater1', 'iceskater2', 'leaves', 'matrix', 'motocross1', 'motocross2', 'nature', 'pedestrian1', 'rabbit', 'racing', 'road', 'shaking', 'sheep', 'singer2', 'singer3', 'soccer1', 'soccer2', 'soldier', 'tiger', 'traffic', 'wiper', 'zebrafish1'] """ vot = VOT(root) video_names = vot.get_video_names() # load net net = SiamRPNvot() net.load_state_dict(torch.load(join('/home/jianingq/bgflow/DaSiamRPN/code/', 'model','SiamRPNVOT.model'))) net.eval().cuda() score_net = ScoreNet(6, 1) score_net.cuda() score_net.load_state_dict(torch.load('models/ckpt_0.pth')) for video_name in ['gymnastics3']: total_iou = 0 total_failure = 0 warped_images = [] video_length = vot.get_frame_length(video_name) #ground truth bounding box gts = vot.get_gts(video_name)
self._pick_img_pairs(index) self._pad_crop_resize_template() self._pad_crop_resize_detection() self._generate_pos_neg_diff() # 生成框的1445*5的张量,代表每个框的类别,dx,dy,dw,dh # self._tranform() # PIL to Tensor self.count += 1 return self.ret def __len__(self): return len(self.sub_class_dir) if __name__ == '__main__': # we will do a test for dataloader net = SiamRPNvot() loader = TrainDataLoader('D:\\uav_frame\\00',net ,check = True) #print(loader.__len__()) index_list = range(loader.__len__()) for i in range(1000): ret = loader.__get__(random.choice(index_list)) label = ret['pos_neg_diff'][:, 0].reshape(-1) pos_index = list(np.where(label == 1)[0]) pos_num = len(pos_index) print(pos_index) print(pos_num) if pos_num != 0 and pos_num != 16: print(pos_num) sys.exit(0) print(i)
# -------------------------------------------------------- #!/usr/bin/python import glob, cv2, torch import numpy as np from os.path import realpath, dirname, join from net import SiamRPNvot from run_SiamRPN import SiamRPN_init, SiamRPN_track from utils import get_axis_aligned_bbox, cxy_wh_2_rect # get supported device device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # load net net = SiamRPNvot() net.load_state_dict(torch.load(join(realpath(dirname(__file__)), 'SiamRPNVOT.model'), map_location=device)) net.eval().to(device) # image and init box image_files = sorted(glob.glob('./bag/*.jpg')) init_rbox = [334.02,128.36,438.19,188.78,396.39,260.83,292.23,200.41] [cx, cy, w, h] = get_axis_aligned_bbox(init_rbox) # tracker init target_pos, target_sz = np.array([cx, cy]), np.array([w, h]) im = cv2.imread(image_files[0]) # HxWxC state = SiamRPN_init(im, target_pos, target_sz, net, device) # tracking and visualization toc = 0
def showImage(subscriber, camera_matrix, kcf_tracker_h): global x1, y1, x2, y2, drawing, init, flag, image, getim, start flag = 1 init = False drawing = False getim = False start = False x1, x2, y1, y2 = -1, -1, -1, -1 flag_lose = False count_lose = 0 print('loading model...........') net = SiamRPNvot() net.load_state_dict(torch.load(path + 'SiamRPNVOT.model')) net.eval().cuda() z = torch.Tensor(1, 3, 127, 127) net.temple(z.cuda()) x = torch.Tensor(1, 3, 271, 271) net(x.cuda()) print('ready for starting!') rospy.Subscriber(subscriber, Image, callback) cv2.namedWindow('image') cv2.setMouseCallback('image', draw_circle) rate = rospy.Rate(50) while not rospy.is_shutdown(): if getim: getim = False ## ! d_info = DetectionInfo() d_info.frame = 0 ## ! if start is False and init is True: target_pos = np.array([int((x1 + x2) / 2), int((y1 + y2) / 2)]) target_sz = np.array([int(x2 - x1), int(y2 - y1)]) state = SiamRPN_init(image, target_pos, target_sz, net) start = True flag_lose = False continue if start is True: state = SiamRPN_track(state, image) # track res = cxy_wh_2_rect(state['target_pos'], state['target_sz']) res = [int(l) for l in res] cv2.rectangle(image, (res[0], res[1]), (res[0] + res[2], res[1] + res[3]), (0, 255, 255), 2) ## ! depth = kcf_tracker_h / state['target_sz'][1] * camera_matrix[ 1, 1] cx = state['target_pos'][0] - image.shape[1] / 2 cy = state['target_pos'][1] - image.shape[0] / 2 d_info.position[0] = depth * cx / camera_matrix[0, 0] d_info.position[1] = depth * cy / camera_matrix[1, 1] d_info.position[2] = depth d_info.sight_angle[0] = cx / (image.shape[1] / 2) * math.atan( (image.shape[1] / 2) / camera_matrix[0, 0]) d_info.sight_angle[1] = cy / (image.shape[0] / 2) * math.atan( (image.shape[0] / 2) / camera_matrix[1, 1]) d_info.detected = True ## ! cv2.putText(image, str(state['score']), (res[0] + res[2], res[1] + res[3]), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 0), 1) if state['score'] < 0.5: count_lose = count_lose + 1 else: count_lose = 0 if count_lose > 4: flag_lose = True if flag_lose is True: cv2.putText(image, 'target lost', (20, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2) ## ! d_info.detected = False if drawing is True: cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2) cx = int(image.shape[1] / 2) cy = int(image.shape[0] / 2) cv2.line(image, (cx - 20, cy), (cx + 20, cy), (255, 255, 255), 2) cv2.line(image, (cx, cy - 20), (cx, cy + 20), (255, 255, 255), 2) ## ! pub.publish(d_info) cv2.imshow('image', image) cv2.waitKey(1) rate.sleep()
# DaSiamRPN # Licensed under The MIT License # Written by Qiang Wang (wangqiang2015 at ia.ac.cn) # -------------------------------------------------------- #!/usr/bin/python import glob, cv2, torch import numpy as np from os.path import realpath, dirname, join from net import SiamRPNvot from run_SiamRPN import SiamRPN_init, SiamRPN_track from utils import get_axis_aligned_bbox, cxy_wh_2_rect # load net net = SiamRPNvot() net.load_state_dict(torch.load(join(realpath(dirname(__file__)), 'SiamRPNVOT.model'))) net.cpu() #net.eval().cuda() # image and init box image_files = sorted(glob.glob('./bag/*.jpg')) init_rbox = [334.02,128.36,438.19,188.78,396.39,260.83,292.23,200.41] [cx, cy, w, h] = get_axis_aligned_bbox(init_rbox) # tracker init target_pos, target_sz = np.array([cx, cy]), np.array([w, h]) im = cv2.imread(image_files[0]) # HxWxC state = SiamRPN_init(im, target_pos, target_sz, net) # tracking and visualization
def get_object_center(q, detect_class): # classes: # 1.Aeroplanes 2.Bicycles 3.Birds 4.Boats 5.Bottles # 6.Buses 7.Cars 8.Cats 9.Chairs 10.Cows # 11.Dining tables 12.Dogs 13.Horses 14.Motorbikes 15.People # 16.Potted plants 17.Sheep 18.Sofas 19.Trains 20.TV/Monitors slim = tf.contrib.slim # TensorFlow session: grow memory when needed. TF, DO NOT USE ALL MY GPU MEMORY!!! gpu_options = tf.GPUOptions(allow_growth=True) config = tf.ConfigProto(log_device_placement=False, gpu_options=gpu_options) isess = tf.InteractiveSession(config=config) # Input placeholder. net_shape = (300, 300) data_format = 'NHWC' img_input = tf.placeholder(tf.uint8, shape=(None, None, 3)) # Evaluation pre-processing: resize to SSD net shape. image_pre, labels_pre, bboxes_pre, bbox_img = ssd_vgg_preprocessing.preprocess_for_eval( img_input, None, None, net_shape, data_format, resize=ssd_vgg_preprocessing.Resize.WARP_RESIZE) image_4d = tf.expand_dims(image_pre, 0) # Define the SSD model. reuse = True if 'ssd_net' in locals() else None ssd_net = ssd_vgg_300.SSDNet() with slim.arg_scope(ssd_net.arg_scope(data_format=data_format)): predictions, localisations, _, _ = ssd_net.net(image_4d, is_training=False, reuse=reuse) # Restore SSD model. # ckpt_filename = 'checkpoints/ssd_300_vgg.ckpt' ckpt_filename = '../SSD-Tensorflow/checkpoints/VGG_VOC0712_SSD_300x300_ft_iter_120000.ckpt' isess.run(tf.global_variables_initializer()) saver = tf.train.Saver() saver.restore(isess, ckpt_filename) # SSD default anchor boxes. ssd_anchors = ssd_net.anchors(net_shape) # Main image processing routine. def process_image(img, select_threshold=0.5, nms_threshold=.45, net_shape=(300, 300)): # Run SSD network. rimg, rpredictions, rlocalisations, rbbox_img = isess.run( [image_4d, predictions, localisations, bbox_img], feed_dict={img_input: img}) # Get classes and bboxes from the net outputs. rclasses, rscores, rbboxes = np_methods.ssd_bboxes_select( rpredictions, rlocalisations, ssd_anchors, select_threshold=select_threshold, img_shape=net_shape, num_classes=21, decode=True) rbboxes = np_methods.bboxes_clip(rbbox_img, rbboxes) rclasses, rscores, rbboxes = np_methods.bboxes_sort(rclasses, rscores, rbboxes, top_k=400) rclasses, rscores, rbboxes = np_methods.bboxes_nms( rclasses, rscores, rbboxes, nms_threshold=nms_threshold) # Resize bboxes to original image shape. Note: useless for Resize.WARP! rbboxes = np_methods.bboxes_resize(rbbox_img, rbboxes) return rclasses, rscores, rbboxes def get_bboxes(rclasses, rbboxes): # get center location of object number_classes = rclasses.shape[0] object_bboxes = [] for i in range(number_classes): object_bbox = dict() object_bbox['i'] = i object_bbox['class'] = rclasses[i] object_bbox['y_min'] = rbboxes[i, 0] object_bbox['x_min'] = rbboxes[i, 1] object_bbox['y_max'] = rbboxes[i, 2] object_bbox['x_max'] = rbboxes[i, 3] object_bboxes.append(object_bbox) return object_bboxes # load net net = SiamRPNvot() net.load_state_dict( torch.load( join(realpath(dirname(__file__)), '../DaSiamRPN-master/code/SiamRPNVOT.model'))) net.eval() # open video capture video = cv2.VideoCapture(0) if not video.isOpened(): print("Could not open video") sys.exit() index = True while index: # Read first frame. ok, frame = video.read() if not ok: print('Cannot read video file') sys.exit() # Define an initial bounding box height = frame.shape[0] width = frame.shape[1] rclasses, rscores, rbboxes = process_image(frame) bboxes = get_bboxes(rclasses, rbboxes) for bbox in bboxes: if bbox['class'] == detect_class: print(bbox) ymin = int(bbox['y_min'] * height) xmin = int((bbox['x_min']) * width) ymax = int(bbox['y_max'] * height) xmax = int((bbox['x_max']) * width) cx = (xmin + xmax) / 2 cy = (ymin + ymax) / 2 h = ymax - ymin w = xmax - xmin new_bbox = (cx, cy, w, h) print(new_bbox) index = False break # tracker init target_pos, target_sz = np.array([cx, cy]), np.array([w, h]) state = SiamRPN_init(frame, target_pos, target_sz, net) # tracking and visualization toc = 0 count_number = 0 while True: # Read a new frame ok, frame = video.read() if not ok: break # Start timer tic = cv2.getTickCount() # Update tracker state = SiamRPN_track(state, frame) # track # print(state) toc += cv2.getTickCount() - tic if state: res = cxy_wh_2_rect(state['target_pos'], state['target_sz']) res = [int(l) for l in res] cv2.rectangle(frame, (res[0], res[1]), (res[0] + res[2], res[1] + res[3]), (0, 255, 255), 3) count_number += 1 # set object_center object_center = dict() object_center['x'] = state['target_pos'][0] / width object_center['y'] = state['target_pos'][1] / height q.put(object_center) if (not state) or count_number % 40 == 3: # Tracking failure cv2.putText(frame, "Tracking failure detected", (100, 80), cv2.FONT_HERSHEY_SIMPLEX, 0.75, (0, 0, 255), 2) index = True while index: ok, frame = video.read() rclasses, rscores, rbboxes = process_image(frame) bboxes = get_bboxes(rclasses, rbboxes) for bbox in bboxes: if bbox['class'] == detect_class: ymin = int(bbox['y_min'] * height) xmin = int(bbox['x_min'] * width) ymax = int(bbox['y_max'] * height) xmax = int(bbox['x_max'] * width) cx = (xmin + xmax) / 2 cy = (ymin + ymax) / 2 h = ymax - ymin w = xmax - xmin new_bbox = (cx, cy, w, h) target_pos, target_sz = np.array( [cx, cy]), np.array([w, h]) state = SiamRPN_init(frame, target_pos, target_sz, net) p1 = (int(xmin), int(ymin)) p2 = (int(xmax), int(ymax)) cv2.rectangle(frame, p1, p2, (0, 255, 0), 2, 1) index = 0 break # 调整图片大小 resized_frame = cv2.resize(frame, None, fx=0.65, fy=0.65, interpolation=cv2.INTER_AREA) # 水平翻转图片(为了镜像显示) horizontal = cv2.flip(resized_frame, 1, dst=None) # 显示图片 cv2.namedWindow("SSD+SiamRPN", cv2.WINDOW_NORMAL) cv2.imshow('SSD+SiamRPN', horizontal) # Exit if ESC pressed k = cv2.waitKey(1) & 0xff if k == 27: break video.release() cv2.destroyAllWindows()
def showImage(): global x1, y1, x2, y2, drawing, init, flag, image, getim, start rospy.init_node('RPN', anonymous=True) flag=1 init = False drawing = False getim = False start = False x1, x2, y1, y2 = -1, -1, -1, -1 flag_lose = False count_lose = 0 print('laoding model...........') net = SiamRPNvot() net.load_state_dict(torch.load(path + 'SiamRPNVOT.model')) net.eval().cuda() z = torch.Tensor(1, 3, 127, 127) net.temple(z.cuda()) x = torch.Tensor(1, 3, 271, 271) net(x.cuda()) print('ready for starting!') rospy.Subscriber('/camera/rgb/image_raw', Image, callback) pub = rospy.Publisher('/vision/target', Pose, queue_size=10) cv2.namedWindow('image') cv2.setMouseCallback('image', draw_circle) rate = rospy.Rate(30) i = 1 t = time.time() fps = 0 while not rospy.is_shutdown(): if getim: t1 = time.time() idd = readid(image) pose = Pose() pose.position.z = 0 if start is False and init is True: target_pos = np.array([int((x1+x2)/2), int((y1+y2)/2)]) target_sz = np.array([int(x2-x1), int(y2-y1)]) state = SiamRPN_init(image, target_pos, target_sz, net) start = True flag_lose = False continue if start is True: state = SiamRPN_track(state, image) # track res = cxy_wh_2_rect(state['target_pos'], state['target_sz']) res = [int(l) for l in res] cv2.rectangle(image, (res[0], res[1]), (res[0] + res[2], res[1] + res[3]), (0, 255, 255), 2) pose.position.x = (state['target_pos'][0]-image.shape[1]/2) / (image.shape[1]/2) pose.position.y = (state['target_pos'][1]-image.shape[0]/2) / (image.shape[0]/2) cv2.putText(image, str(state['score']), (res[0] + res[2], res[1] + res[3]), cv2.FONT_HERSHEY_SIMPLEX , 0.5, (255,255,0), 1) pose.position.z = 1 if state['score'] < 0.5: count_lose = count_lose + 1 else: count_lose = 0 if count_lose > 4: flag_lose = True if flag_lose is True: cv2.putText(image, 'target is lost!', (200,200), cv2.FONT_HERSHEY_SIMPLEX , 2, (255,0,0), 3) pose.position.z = -1 if drawing is True: cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2) cv2.putText(image, '#'+str(idd), (30,30), cv2.FONT_HERSHEY_SIMPLEX , 0.5, (0, 255, 255), 1) cx = int(image.shape[1]/2) cy = int(image.shape[0]/2) cv2.line(image,(cx-20,cy), (cx+20, cy), (255, 255, 255), 2) cv2.line(image,(cx, cy-20), (cx, cy+20), (255, 255, 255), 2) pub.publish(pose) if start is True: i = i + 1 if i > 5: i = 1 fps = 5 / (time.time()-t) t = time.time() cv2.putText(image, 'fps='+str(fps), (200,30), cv2.FONT_HERSHEY_SIMPLEX , 0.5, (0, 255, 255), 1) cv2.imshow('image', image) cv2.waitKey(1) getim = False rate.sleep()
def test(score): net = SiamRPNvot() net.load_state_dict( torch.load('/home/traker_hao/code/learn/train_RPN/model/30.model')) net.eval().cuda() version_name = 'jiasu' sequence_path = '/media/traker_hao/data/dataset/UAV1/sequences' init_path = '/media/traker_hao/data/dataset/UAV1/annotations' result_path = '/home/traker_hao/result/visdrone/' + version_name if os.path.exists(result_path) is False: os.mkdir(result_path) sequence_names = os.listdir(sequence_path) random.shuffle(sequence_names) #sequence_names.sort() i = 0 for sequence_name in sequence_names: print(sequence_name) #if sequence_name != 'Suv': #continue #sequence_name='uav0000054_00000_s' imagenames = os.listdir(sequence_path + '/' + sequence_name) imagenames.sort() print(i) i = i + 1 print(sequence_path + '/' + sequence_name) f = open( result_path + '/' + sequence_name + '_' + version_name + '.txt', 'w') inited = False fp = open(init_path + '/' + sequence_name + '.txt') j = 0 for imagename in imagenames: j = j + 1 image = cv2.imread(sequence_path + '/' + sequence_name + '/' + imagename) #init the tracker if inited is False: data = fp.readline() data = data.strip('\n') data = data.split(',') [cx, cy, w, h] = (int(data[0]) + int(data[2]) // 2, int(data[1]) + int(data[3]) // 2, int(data[2]), int(data[3])) #f.write(str(annos[0]['bbox'][0])+','+str(annos[0]['bbox'][1])+','+str(annos[0]['bbox'][2])+','+str(annos[0]['bbox'][3])+','+str(1.00)+'\n') f.write(data[0] + ',' + data[1] + ',' + data[2] + ',' + data[3] + '\n') target_pos, target_sz = np.array([cx, cy]), np.array([w, h]) state = SiamRPN_init(image, target_pos, target_sz, net) inited = True cv2.rectangle(image, (int(cx) - int(w) // 2, int(cy) - int(h) // 2), (int(cx) + int(w) // 2, int(cy) + int(h) // 2), (0, 255, 0), 3) cv2.putText(image, sequence_name, (50, 50), 0, 5e-3 * 200, (0, 255, 0), 2) cv2.putText(image, 'initing...', (100, 100), 0, 5e-3 * 200, (0, 255, 0), 2) image2 = cv2.resize(image, (960, 540)) cv2.imshow('aa2', image2) cv2.waitKey(1) else: data = fp.readline() data = data.strip('\n') data = data.split(',') try: truth = (int(data[0]), int(data[1]), int(data[0]) + int(data[2]), int(data[1]) + int(data[3])) except: truth = [0, 0, 0, 0] #update the tracker #print([cx, cy, w, h]) tic = cv2.getTickCount() t1 = time.time() state = SiamRPN_track(state, image) # track #state['target_sz'] = np.array( [int(data[2]), int(data[3])] ) toc = (cv2.getTickCount() - tic) / cv2.getTickFrequency() #print(1/toc) #mytracker.target_sz = np.array([int(truth[2]),int(truth[3])]) res = cxy_wh_2_rect(state['target_pos'], state['target_sz']) res = [int(l) for l in res] cv2.rectangle(image, (res[0], res[1]), (res[0] + res[2], res[1] + res[3]), (0, 255, 255), 2) #visualize the result cv2.rectangle(image, (int(truth[0]), int(truth[1])), (int(truth[2]), int(truth[3])), (0, 255, 0), 2) #mytracker.target_sz=np.array([int(data[2]),int(data[3])]) #cv2.putText(image, str(iou), (res[0] + res[2], res[1] + res[3]), 0, 5e-3*200, (0,255,0), 2) cv2.putText(image, sequence_name, (50, 50), 0, 5e-3 * 200, (0, 255, 0), 2) image2 = cv2.resize(image, (960, 540)) cv2.imshow('aa2', image2) if cv2.waitKey(1) == 97: break #if j>209: #cv2.waitKey(0) f.close()
def main(): args = parser.parse_args() """ compute max_batches """ for root, dirs, files in os.walk(args.train_path): for dirnames in dirs: dir_path = os.path.join(root, dirnames) args.max_batches += len(os.listdir(dir_path)) """ Model on gpu """ model = SiamRPNvot() model = model.cuda() model.load_state_dict( torch.load(join(realpath(dirname(__file__)), 'SiamRPNVOT.model'))) model.train().cuda() cudnn.benchmark = True """ train dataloader """ data_loader = TrainDataLoader(args.train_path, model) if not os.path.exists(args.weight_dir): os.makedirs(args.weight_dir) """ loss and optimizer """ criterion = MultiBoxLoss() optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) """ train phase """ closses, rlosses, tlosses = AverageMeter(), AverageMeter(), AverageMeter() steps = 0 writer = SummaryWriter() for epoch in range(args.max_epoches): cur_lr = adjust_learning_rate(args.lr, optimizer, epoch, gamma=0.1) index_list = range(data_loader.__len__()) #获取数据集的长度 losss = [0.0, 0.0, 0.0] for example in range(args.max_batches): ret = data_loader.__get__(random.choice(index_list)) template = ret['temple'].cuda() detection = ret['detection'].cuda() pos_neg_diff = ret['pos_neg_diff_tensor'].cuda() model.temple(template) rout, cout = model(detection) cout = cout.squeeze().permute(1, 2, 0).reshape(-1, 2) rout = rout.squeeze().permute(1, 2, 0).reshape(-1, 4) predictions, targets = (cout, rout), pos_neg_diff closs, rloss, loss, reg_pred, reg_target, pos_index, neg_index = criterion( predictions, targets) closs_ = closs.cpu().item() if np.isnan(closs_): sys.exit(0) closses.update(closs.cpu().item()) rlosses.update(rloss.cpu().item()) tlosses.update(loss.cpu().item()) optimizer.zero_grad() loss.backward() optimizer.step() steps += 1 losss[0] = closses.avg losss[1] = rlosses.avg losss[2] = tlosses.avg print("Epoch:{:04d}\tcloss:{:.4f}\trloss:{:.4f}\ttloss:{:.4f}".format( epoch, closses.avg, rlosses.avg, tlosses.avg)) writer.add_scalar("closses", losss[0], epoch) writer.add_scalar("rlosses", losss[1], epoch) writer.add_scalar("tlosses", losss[2], epoch) if steps % 150 == 0: file_path = os.path.join(args.weight_dir, 'weights-{:07d}.pth'.format(steps)) state = { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), } torch.save(state, file_path)