def track_video(model, video): toc, regions = 0, [] image_files, gt = video['image_files'], video['gt'] for f, image_file in enumerate(image_files): im = cv2.imread(image_file) # TODO: batch load tic = cv2.getTickCount() if f == 0: # init target_pos, target_sz = rect_2_cxy_wh(gt[f]) state = SiamRPN_init(im, target_pos, target_sz, model) # init tracker location = cxy_wh_2_rect(state['target_pos'], state['target_sz']) regions.append(gt[f]) elif f > 0: # tracking state = SiamRPN_track(state, im) # track location = cxy_wh_2_rect(state['target_pos'] + 1, state['target_sz']) regions.append(location) toc += cv2.getTickCount() - tic if args.visualization and f >= 0: # visualization if f == 0: cv2.destroyAllWindows() if len(gt[f]) == 8: cv2.polylines(im, [np.array(gt[f], np.int).reshape( (-1, 1, 2))], True, (0, 255, 0), 3) else: cv2.rectangle(im, (gt[f, 0], gt[f, 1]), (gt[f, 0] + gt[f, 2], gt[f, 1] + gt[f, 3]), (0, 255, 0), 3) if len(location) == 8: cv2.polylines(im, [location.reshape((-1, 1, 2))], True, (0, 255, 255), 3) else: location = [int(l) for l in location] # cv2.rectangle( im, (location[0], location[1]), (location[0] + location[2], location[1] + location[3]), (0, 255, 255), 3) cv2.putText(im, str(f), (40, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2) cv2.imshow(video['name'], im) cv2.waitKey(1) toc /= cv2.getTickFrequency() # save result video_path = join('../test', args.dataset, 'SiamRPN_AlexNet_OTB2015') if not isdir(video_path): makedirs(video_path) result_path = join(video_path, '{:s}.txt'.format(video['name'])) with open(result_path, "w") as fin: for x in regions: fin.write(','.join([str(i) for i in x]) + '\n') print('({:d}) Video: {:12s} Time: {:02.1f}s Speed: {:3.1f}fps'.format( v_id, video['name'], toc, f / toc)) return f / toc
def on_tracking(self): # warm up for i in range(10): self.net.template( torch.autograd.Variable(torch.FloatTensor(1, 3, 127, 127)).cuda()) self.net( torch.autograd.Variable(torch.FloatTensor(1, 3, 255, 255)).cuda()) i = 1 pred_bbx = self.gt_first print("{}th frame: {} {} {} {}".format(i, pred_bbx[0], pred_bbx[1], pred_bbx[2], pred_bbx[3])) cx, cy, w, h = pred_bbx[0] + pred_bbx[2] / 2.0, pred_bbx[ 1] + pred_bbx[3] / 2.0, pred_bbx[2], pred_bbx[3] i += 1 target_pos, target_sz = np.array([cx, cy]), np.array([w, h]) im = cv2.imread(self.path_seq + '/imgs/0001.jpg') # HxWxC state = SiamRPN_init(im, target_pos, target_sz, self.net) # init tracker while i <= self.num_frames: self.index_frame = i im = cv2.imread(self.path_seq + '/imgs/' + str(i).zfill(4) + '.jpg') state = SiamRPN_track(state, im) # convert cx, cy, w, h into rect res = cxy_wh_2_rect(state['target_pos'], state['target_sz']) print(f"{i}th frame: ", res) i += 1
def main(): # load net net_file = join(realpath(dirname(__file__)), 'SiamRPNBIG.model') net = SiamRPNBIG() net.load_state_dict(torch.load(net_file)) net.eval().cuda() # warm up for i in range(10): net.temple( torch.autograd.Variable(torch.FloatTensor(1, 3, 127, 127)).cuda()) net(torch.autograd.Variable(torch.FloatTensor(1, 3, 255, 255)).cuda()) # start to track handle = vot.VOT("polygon") Polygon = handle.region() cx, cy, w, h = get_axis_aligned_bbox(Polygon) image_file = handle.frame() if not image_file: sys.exit(0) target_pos, target_sz = np.array([cx, cy]), np.array([w, h]) im = cv2.imread(image_file) # HxWxC state = SiamRPN_init(im, target_pos, target_sz, net) # init tracker while True: image_file = handle.frame() if not image_file: break im = cv2.imread(image_file) # HxWxC state = SiamRPN_track(state, im) # track res = cxy_wh_2_rect(state['target_pos'], state['target_sz']) handle.report(Rectangle(res[0], res[1], res[2], res[3]))
def siam_track(image, img_name, txt_path, states, labels, roi): x_min, y_min = roi[0], roi[1] x_max, y_max = roi[2], roi[3] # img_h, img_w = image.shape[0], image.shape[1] result_state = [] for state in states: state_o = SiamRPN_track(state, image) # track result_state.append(state_o) txt = os.path.join(txt_path, img_name.replace('.jpg', '.txt')) file_handle = open(txt, 'w') mess = '' del_list = [] for i, state in enumerate(result_state): res = cxy_wh_2_rect(state['target_pos'], state['target_sz']) res = [int(l) for l in res] if (res[0] + res[2]) > x_max or ( res[1] + res[3]) > y_max or res[0] < x_min or res[1] < y_min: del_list.append(i) else: cv2.rectangle(image, (res[0], res[1]), (res[0] + res[2], res[1] + res[3]), (0, 0, 255), 2) cv2.putText(image, labels[i], (res[0], res[1] - 5), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2) mess += f'{(res[0])} {res[1]} {res[0]+res[2]} {res[1]+res[3]} {labels[i]} \n' file_handle.write(mess) for i in reversed(del_list): del result_state[i] del labels[i] return result_state, image, labels
def track(self, image_file): image = cv2.imread(image_file) self.state = SiamRPN_track(self.state, image) # track center = self.state["target_pos"] + 1 target_sz = self.state["target_sz"] box = cxy_wh_2_rect(center, target_sz) return box
def main(imagedir, gtdir): # load net net_file = join(realpath(dirname(__file__)), 'SiamRPNBIG.model') net = SiamRPNBIG() net.load_state_dict(torch.load(net_file)) net.eval().cuda() # warm up for i in range(10): net.temple( torch.autograd.Variable(torch.FloatTensor(1, 3, 127, 127)).cuda()) net(torch.autograd.Variable(torch.FloatTensor(1, 3, 255, 255)).cuda()) # start to track # get the first frame groundtruth gt_file = os.path.join(gtdir, 'gt.txt') with open(gt_file, 'r') as f: lines = f.readlines() gt = [] for line in lines: line = line.split(' ') gt.append([int(float(x)) for x in line]) init_bbox = gt[0] # top-left x y,w,h target_pos, target_sz = rect_2_cxy_wh( init_bbox) # top-left x y,w,h --> center x y,w,h image_list = glob.glob(os.path.join(imagedir, '*.jpg')) image_list.sort() im = cv2.imread(image_list[0]) # HxWxC state = SiamRPN_init(im, target_pos, target_sz, net) # init tracker bboxes = [] for i in range(1, len(gt)): im = cv2.imread(image_list[i]) # HxWxC state = SiamRPN_track(state, im) # track res = cxy_wh_2_rect( state['target_pos'], state['target_sz']) # center x y,w,h --> top-left x y,w,h bboxes.append(res.tolist()) _, precision, precision_auc, iou = _compile_results(gt[1:], bboxes) print(' -- Precision ' + "(20 px)" + ': ' + "%.2f" % precision +\ ' -- Precision AUC: ' + "%.2f" % precision_auc + \ ' -- IOU: ' + "%.2f" % iou + ' --') isSavebbox = True if isSavebbox: print('saving bbox...') res_bbox_file = os.path.join('results_bbox.json') json.dump(bboxes, open(res_bbox_file, 'w'), indent=2) isSavevideo = True if isSavevideo: print('saving video...') save_video(image_list, bboxes) print('done')
def tracking(im, state, cap): """Return cx, cy""" state = SiamRPN_track(state, im) # track res = cxy_wh_2_rect(state['target_pos'], state['target_sz']) cx, cy = res[0] + res[2] / 2, res[1] + res[3] / 2 res = [int(l) for l in res] cv2.rectangle(im, (res[0], res[1]), (res[0] + res[2], res[1] + res[3]), (0, 255, 255), 3) cv2.imshow('SiamRPN', im) cap.write(im) cv2.waitKey(1) return cx, cy
def main(): vid_file = os.path.expanduser("~/Videos/VID_20190327_195111.mp4") cap = cv2.VideoCapture(vid_file) # load net net = SiamRPNvot() net.load_state_dict( torch.load(join(realpath(dirname(__file__)), 'SiamRPNVOT.model'))) net.eval().cuda() # # image and init box # image_files = sorted(glob.glob('./bag/*.jpg')) init_rbox = [ 334.02, 128.36, 438.19, 188.78, 396.39, 260.83, 292.23, 200.41 ] [cx, cy, w, h] = get_axis_aligned_bbox(init_rbox) # tracker init target_pos, target_sz = np.array([cx, cy]), np.array([w, h]) ret, im = cap.read() state = SiamRPN_init(im, target_pos, target_sz, net, use_gpu=True) toc = 0 while (True): # Capture frame-by-frame ret, im = cap.read() tic = cv2.getTickCount() state = SiamRPN_track(state, im, use_gpu=True) # track toc += cv2.getTickCount() - tic res = cxy_wh_2_rect(state['target_pos'], state['target_sz']) res = [int(l) for l in res] cv2.rectangle(im, (res[0], res[1]), (res[0] + res[2], res[1] + res[3]), (0, 255, 255), 3) cv2.imshow('SiamRPN', im) if cv2.waitKey(1) & 0xFF == ord('q'): break # When everything done, release the capture cap.release() cv2.destroyAllWindows()
net.eval().cuda() # image and init box image_files = sorted(glob.glob('./testData/*.jpg')) init_rbox = [3641, 1778, 3810, 1778, 3810, 2313, 3641, 2313] [cx, cy, w, h] = get_axis_aligned_bbox(init_rbox) # tracker init target_pos, target_sz = np.array([cx, cy]), np.array([w, h]) im = cv2.imread(image_files[0]) # HxWxC state = SiamRPN_init(im, target_pos, target_sz, net, args.model) # tracking and visualization toc = 0 for f, image_file in enumerate(image_files): im = cv2.imread(image_file) # print(im.shape) tic = cv2.getTickCount() state = SiamRPN_track(state, im) # track toc += cv2.getTickCount() - tic res = cxy_wh_2_rect(state['target_pos'], state['target_sz']) res = [int(l) for l in res] # print(res) cv2.rectangle(im, (res[0], res[1]), (res[0] + res[2], res[1] + res[3]), (0, 255, 255), 3) cv2.imshow('SiamRPN', im) cv2.waitKey(1) print('Tracking Speed {:.1f}fps'.format( (len(image_files) - 1) / (toc / cv2.getTickFrequency())))
def process_video(net, groundtruth_path, image_path, out_video): print('processing sequence', out_video) with open(groundtruth_path) as f: groundtruth = f.readlines() groundtruth = [x.rstrip() for x in groundtruth] image_filenames = [ y for x in walk(image_path) for y in glob(join(x[0], '*.jpg')) ] image_filenames.sort() assert len(image_filenames) == len(groundtruth) image = cv2.imread(image_filenames[0]) height, width = image.shape[:2] writer = cv2.VideoWriter(out_video, cv2.VideoWriter_fourcc('X', 'V', 'I', 'D'), 15, (width, height)) if not writer.isOpened(): print('Failed to open video') return # VOT sequence # polygon_ = parse_region(groundtruth[0]) # cx, cy, w, h = get_axis_aligned_bbox(polygon_) # target_pos, target_sz = np.array([cx, cy]), np.array([w, h]) polygon = [float(x) for x in groundtruth[0].split(',')] target_pos, target_sz = np.array( [polygon[0] + polygon[2] / 2, polygon[1] + polygon[3] / 2]), np.array([polygon[2], polygon[3]]) state = SiamRPN_init(image, target_pos, target_sz, net) # init tracker for i in range(len(image_filenames)): image = cv2.imread(image_filenames[i]) polygon = [float(x) for x in groundtruth[i].split(',')] polygon = [int(x) for x in polygon] # VOT sequence # cv2.line(image, (polygon[0], polygon[1]), (polygon[2], polygon[3]), (0, 0, 255), 2) # cv2.line(image, (polygon[2], polygon[3]), (polygon[4], polygon[5]), (0, 0, 255), 2) # cv2.line(image, (polygon[4], polygon[5]), (polygon[6], polygon[7]), (0, 0, 255), 2) # cv2.line(image, (polygon[6], polygon[7]), (polygon[0], polygon[1]), (0, 0, 255), 2) cv2.rectangle(image, (polygon[0], polygon[1]), (polygon[0] + polygon[2], polygon[1] + polygon[3]), (0, 0, 255), 2) # Start timer timer = cv2.getTickCount() state = SiamRPN_track(state, image) # track res = cxy_wh_2_rect(state['target_pos'], state['target_sz']) res = [int(x) for x in res] cv2.rectangle(image, (res[0], res[1]), (res[0] + res[2], res[1] + res[3]), (255, 0, 0), 2) # Calculate Frames per second (FPS) fps = cv2.getTickFrequency() / (cv2.getTickCount() - timer) cv2.rectangle(image, (res[0], res[1]), (res[0] + res[2], res[1] + res[3]), (255, 0, 0), 2) # Display tracker type on frame cv2.putText(image, "SiamRPN", (50, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.75, (230, 170, 50), 2) # Display FPS on frame cv2.putText(image, "FPS : " + str(int(fps)), (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 0.75, (230, 170, 50), 2) writer.write(image) writer.release()
def showImage(subscriber, camera_matrix, kcf_tracker_h): global x1, y1, x2, y2, drawing, init, flag, image, getim, start flag = 1 init = False drawing = False getim = False start = False x1, x2, y1, y2 = -1, -1, -1, -1 flag_lose = False count_lose = 0 print('loading model...........') net = SiamRPNvot() net.load_state_dict(torch.load(path + 'SiamRPNVOT.model')) net.eval().cuda() z = torch.Tensor(1, 3, 127, 127) net.temple(z.cuda()) x = torch.Tensor(1, 3, 271, 271) net(x.cuda()) print('ready for starting!') rospy.Subscriber(subscriber, Image, callback) cv2.namedWindow('image') cv2.setMouseCallback('image', draw_circle) rate = rospy.Rate(50) while not rospy.is_shutdown(): if getim: getim = False ## ! d_info = DetectionInfo() d_info.frame = 0 ## ! if start is False and init is True: target_pos = np.array([int((x1 + x2) / 2), int((y1 + y2) / 2)]) target_sz = np.array([int(x2 - x1), int(y2 - y1)]) state = SiamRPN_init(image, target_pos, target_sz, net) start = True flag_lose = False continue if start is True: state = SiamRPN_track(state, image) # track res = cxy_wh_2_rect(state['target_pos'], state['target_sz']) res = [int(l) for l in res] cv2.rectangle(image, (res[0], res[1]), (res[0] + res[2], res[1] + res[3]), (0, 255, 255), 2) ## ! depth = kcf_tracker_h / state['target_sz'][1] * camera_matrix[ 1, 1] cx = state['target_pos'][0] - image.shape[1] / 2 cy = state['target_pos'][1] - image.shape[0] / 2 d_info.position[0] = depth * cx / camera_matrix[0, 0] d_info.position[1] = depth * cy / camera_matrix[1, 1] d_info.position[2] = depth d_info.sight_angle[0] = cx / (image.shape[1] / 2) * math.atan( (image.shape[1] / 2) / camera_matrix[0, 0]) d_info.sight_angle[1] = cy / (image.shape[0] / 2) * math.atan( (image.shape[0] / 2) / camera_matrix[1, 1]) d_info.detected = True ## ! cv2.putText(image, str(state['score']), (res[0] + res[2], res[1] + res[3]), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 0), 1) if state['score'] < 0.5: count_lose = count_lose + 1 else: count_lose = 0 if count_lose > 4: flag_lose = True if flag_lose is True: cv2.putText(image, 'target lost', (20, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2) ## ! d_info.detected = False if drawing is True: cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2) cx = int(image.shape[1] / 2) cy = int(image.shape[0] / 2) cv2.line(image, (cx - 20, cy), (cx + 20, cy), (255, 255, 255), 2) cv2.line(image, (cx, cy - 20), (cx, cy + 20), (255, 255, 255), 2) ## ! pub.publish(d_info) cv2.imshow('image', image) cv2.waitKey(1) rate.sleep()
def tracker_eval_record_data(net, x_crop, target_pos, target_sz, window, scale_z, p, im, next_mask, conf_mask, index_1, index_2, frame_num, data_dir, gtbbox): delta, score = net(x_crop) delta = delta.permute(1, 2, 3, 0).contiguous().view(4, -1).data.cpu().numpy() score = F.softmax(score.permute(1, 2, 3, 0).contiguous().view(2, -1), dim=0).data[1, :].cpu().numpy() delta[0, :] = delta[0, :] * p.anchor[:, 2] + p.anchor[:, 0] delta[1, :] = delta[1, :] * p.anchor[:, 3] + p.anchor[:, 1] delta[2, :] = np.exp(delta[2, :]) * p.anchor[:, 2] delta[3, :] = np.exp(delta[3, :]) * p.anchor[:, 3] def change(r): return np.maximum(r, 1. / r) def sz(w, h): pad = (w + h) * 0.5 sz2 = (w + pad) * (h + pad) return np.sqrt(sz2) def sz_wh(wh): pad = (wh[0] + wh[1]) * 0.5 sz2 = (wh[0] + pad) * (wh[1] + pad) return np.sqrt(sz2) # size penalty s_c = change(sz(delta[2, :], delta[3, :]) / (sz_wh(target_sz))) # scale penalty r_c = change((target_sz[0] / target_sz[1]) / (delta[2, :] / delta[3, :])) # ratio penalty penalty = np.exp(-(r_c * s_c - 1.) * p.penalty_k) pscore = penalty * score # window float pscore = pscore * (1 - p.window_influence) + window * p.window_influence #background penalty inspect_num = 100 top_score = score.argsort()[-inspect_num:][::-1] temp_result = np.zeros((inspect_num, 4), dtype=int) fg_result = np.zeros(inspect_num) score_result = np.zeros(inspect_num) original_score = np.zeros(inspect_num) all_scores = [] for i in range(0, inspect_num): target = delta[:, top_score[i]] / scale_z res_x = target[0] + target_pos[0] res_y = target[1] + target_pos[1] res_w = target[2] res_h = target[3] res = cxy_wh_2_rect(np.array([res_x, res_y]), np.array([res_w, res_h])) res = [int(l) for l in res] temp = [res[0], res[1], (res[0] + res[2]), (res[1] + res[3])] res = [np.clip(temp[0], 0, next_mask.shape[1]-1),\ np.clip(temp[1], 0, next_mask.shape[0]-1),\ np.clip(temp[2], 0, next_mask.shape[1]-1),\ np.clip(temp[3], 0, next_mask.shape[0]-1)] res[2] = res[2] - res[0] res[3] = res[3] - res[1] #IOU with groundtruth iou = calculate_iou([res[0], res[1], res[0] + res[2], res[1] + res[3]], bbox_format(gtbbox, 'tlxy_wh_2_rect')) #score bbox_mask = np.zeros(next_mask.shape) bbox_mask[(res[1]):(res[1] + res[3]), (res[0]):(res[0] + res[2])] = 1 intersection = np.count_nonzero(np.logical_and(next_mask, bbox_mask)) fg_result[i] = float(intersection) / float(res[3] * res[2]) #conf score conf_intersection = np.count_nonzero( np.logical_and(conf_mask, bbox_mask)) conf_score = float(conf_intersection) / float(res[3] * res[2]) #size_penalty size_penalty_score = np.exp(r_c[top_score[i]]) #scale_penalty scale_penalty_score = np.exp(s_c[top_score[i]]) #cosine score cosine_score = window[top_score[i]] temp_result[i, :] = res original_score[i] = score[top_score[i]] score_result[i] = np.exp( -(r_c[top_score[i]] * s_c[top_score[i]] - 1.) * p.penalty_k) scores = [ original_score[i], fg_result[i], conf_score, size_penalty_score, scale_penalty_score, cosine_score ] all_scores.append(scores) #This is probably the input to your network #original_score[i]) , fg_result[i]) , conf_score , size_penalty_score , scale_penalty_score , cosine_score #iou is the groundtruth iou x = Variable(torch.from_numpy(np.array(all_scores)).float().cuda()) y = net(x).data.cpu().numpy() #pick the bounding box with largest network score #visualize top ten box """ for j in range(9,-1,-1): cv2.rectangle(im, (box x0, box y0), (box x1, box y1), (255,255, 0), 3) target_pos = np.array([box x0 + ((box x1 - box x0)/2), box y0 + ((box y1 - box y0)/2)]) target_sz = np.array([(box x1 - box x0),(box y1 - box y0)]) alternative = [] return target_pos, target_sz, score_result[top_ids[0]], alternative """ # score_result = score_result * score[top_score] # #score_result = 0.1248*original_score + 0.4754*fg_result + 0.1267*score_result # score_result = score_result * (1 - p.window_influence) + window[top_score] * p.window_influence # top_ids = score_result.argsort()[-10:][::-1] # font = cv2.FONT_HERSHEY_SIMPLEX # best = [] # for j in range(9,-1,-1): # cv2.putText(im,"%.2f" % fg_result[top_ids[j]],(20,20+j*10), font, 0.4,(255,255,255), 1,cv2.LINE_AA) # cv2.putText(im,"%.2f" % score_result[top_ids[j]],(50,20+j*10), font, 0.4,(255,255,255), 1,cv2.LINE_AA) # cv2.putText(im,"%.2f" % original_score[top_ids[j]],(220,20+j*10), font, 0.4,(0,0,0), 1,cv2.LINE_AA) # res = temp_result[top_ids[j]] # if(original_score[top_ids[j]] > 0.75): # cv2.rectangle(im, (res[0], res[1]), (res[0] + res[2], res[1] + res[3]), (255,255, 0), 3) # else: # cv2.rectangle(im, (res[0], res[1]), (res[0] + res[2], res[1] + res[3]), (255,0, 0), 3) # #cv2.putText(im,"%.2f" % score_result[top_ids[j]],(50,20+j*10), font, 0.4,(255,255,255), 1,cv2.LINE_AA) # #cv2.rectangle(im, (res[0], res[1]), (res[0] + res[2], res[1] + res[3]), (0,0, 255), 3) chosen_id = np.argmax(y) target_pos = np.array([ temp_result[chosen_id, 0] + (temp_result[chosen_id, 2] / 2), temp_result[top_ids[0], 1] + (temp_result[top_ids[0], 3] / 2) ]) target_sz = np.array( [temp_result[chosen_id, 2], temp_result[chosen_id, 3]]) alternative = [] return target_pos, target_sz, score_result[chosen_id], alternative
def get_object_center(q, detect_class): # classes: # 1.Aeroplanes 2.Bicycles 3.Birds 4.Boats 5.Bottles # 6.Buses 7.Cars 8.Cats 9.Chairs 10.Cows # 11.Dining tables 12.Dogs 13.Horses 14.Motorbikes 15.People # 16.Potted plants 17.Sheep 18.Sofas 19.Trains 20.TV/Monitors slim = tf.contrib.slim # TensorFlow session: grow memory when needed. TF, DO NOT USE ALL MY GPU MEMORY!!! gpu_options = tf.GPUOptions(allow_growth=True) config = tf.ConfigProto(log_device_placement=False, gpu_options=gpu_options) isess = tf.InteractiveSession(config=config) # Input placeholder. net_shape = (300, 300) data_format = 'NHWC' img_input = tf.placeholder(tf.uint8, shape=(None, None, 3)) # Evaluation pre-processing: resize to SSD net shape. image_pre, labels_pre, bboxes_pre, bbox_img = ssd_vgg_preprocessing.preprocess_for_eval( img_input, None, None, net_shape, data_format, resize=ssd_vgg_preprocessing.Resize.WARP_RESIZE) image_4d = tf.expand_dims(image_pre, 0) # Define the SSD model. reuse = True if 'ssd_net' in locals() else None ssd_net = ssd_vgg_300.SSDNet() with slim.arg_scope(ssd_net.arg_scope(data_format=data_format)): predictions, localisations, _, _ = ssd_net.net(image_4d, is_training=False, reuse=reuse) # Restore SSD model. # ckpt_filename = 'checkpoints/ssd_300_vgg.ckpt' ckpt_filename = '../SSD-Tensorflow/checkpoints/VGG_VOC0712_SSD_300x300_ft_iter_120000.ckpt' isess.run(tf.global_variables_initializer()) saver = tf.train.Saver() saver.restore(isess, ckpt_filename) # SSD default anchor boxes. ssd_anchors = ssd_net.anchors(net_shape) # Main image processing routine. def process_image(img, select_threshold=0.5, nms_threshold=.45, net_shape=(300, 300)): # Run SSD network. rimg, rpredictions, rlocalisations, rbbox_img = isess.run( [image_4d, predictions, localisations, bbox_img], feed_dict={img_input: img}) # Get classes and bboxes from the net outputs. rclasses, rscores, rbboxes = np_methods.ssd_bboxes_select( rpredictions, rlocalisations, ssd_anchors, select_threshold=select_threshold, img_shape=net_shape, num_classes=21, decode=True) rbboxes = np_methods.bboxes_clip(rbbox_img, rbboxes) rclasses, rscores, rbboxes = np_methods.bboxes_sort(rclasses, rscores, rbboxes, top_k=400) rclasses, rscores, rbboxes = np_methods.bboxes_nms( rclasses, rscores, rbboxes, nms_threshold=nms_threshold) # Resize bboxes to original image shape. Note: useless for Resize.WARP! rbboxes = np_methods.bboxes_resize(rbbox_img, rbboxes) return rclasses, rscores, rbboxes def get_bboxes(rclasses, rbboxes): # get center location of object number_classes = rclasses.shape[0] object_bboxes = [] for i in range(number_classes): object_bbox = dict() object_bbox['i'] = i object_bbox['class'] = rclasses[i] object_bbox['y_min'] = rbboxes[i, 0] object_bbox['x_min'] = rbboxes[i, 1] object_bbox['y_max'] = rbboxes[i, 2] object_bbox['x_max'] = rbboxes[i, 3] object_bboxes.append(object_bbox) return object_bboxes # load net net = SiamRPNvot() net.load_state_dict( torch.load( join(realpath(dirname(__file__)), '../DaSiamRPN-master/code/SiamRPNVOT.model'))) net.eval() # open video capture video = cv2.VideoCapture(0) if not video.isOpened(): print("Could not open video") sys.exit() index = True while index: # Read first frame. ok, frame = video.read() if not ok: print('Cannot read video file') sys.exit() # Define an initial bounding box height = frame.shape[0] width = frame.shape[1] rclasses, rscores, rbboxes = process_image(frame) bboxes = get_bboxes(rclasses, rbboxes) for bbox in bboxes: if bbox['class'] == detect_class: print(bbox) ymin = int(bbox['y_min'] * height) xmin = int((bbox['x_min']) * width) ymax = int(bbox['y_max'] * height) xmax = int((bbox['x_max']) * width) cx = (xmin + xmax) / 2 cy = (ymin + ymax) / 2 h = ymax - ymin w = xmax - xmin new_bbox = (cx, cy, w, h) print(new_bbox) index = False break # tracker init target_pos, target_sz = np.array([cx, cy]), np.array([w, h]) state = SiamRPN_init(frame, target_pos, target_sz, net) # tracking and visualization toc = 0 count_number = 0 while True: # Read a new frame ok, frame = video.read() if not ok: break # Start timer tic = cv2.getTickCount() # Update tracker state = SiamRPN_track(state, frame) # track # print(state) toc += cv2.getTickCount() - tic if state: res = cxy_wh_2_rect(state['target_pos'], state['target_sz']) res = [int(l) for l in res] cv2.rectangle(frame, (res[0], res[1]), (res[0] + res[2], res[1] + res[3]), (0, 255, 255), 3) count_number += 1 # set object_center object_center = dict() object_center['x'] = state['target_pos'][0] / width object_center['y'] = state['target_pos'][1] / height q.put(object_center) if (not state) or count_number % 40 == 3: # Tracking failure cv2.putText(frame, "Tracking failure detected", (100, 80), cv2.FONT_HERSHEY_SIMPLEX, 0.75, (0, 0, 255), 2) index = True while index: ok, frame = video.read() rclasses, rscores, rbboxes = process_image(frame) bboxes = get_bboxes(rclasses, rbboxes) for bbox in bboxes: if bbox['class'] == detect_class: ymin = int(bbox['y_min'] * height) xmin = int(bbox['x_min'] * width) ymax = int(bbox['y_max'] * height) xmax = int(bbox['x_max'] * width) cx = (xmin + xmax) / 2 cy = (ymin + ymax) / 2 h = ymax - ymin w = xmax - xmin new_bbox = (cx, cy, w, h) target_pos, target_sz = np.array( [cx, cy]), np.array([w, h]) state = SiamRPN_init(frame, target_pos, target_sz, net) p1 = (int(xmin), int(ymin)) p2 = (int(xmax), int(ymax)) cv2.rectangle(frame, p1, p2, (0, 255, 0), 2, 1) index = 0 break # 调整图片大小 resized_frame = cv2.resize(frame, None, fx=0.65, fy=0.65, interpolation=cv2.INTER_AREA) # 水平翻转图片(为了镜像显示) horizontal = cv2.flip(resized_frame, 1, dst=None) # 显示图片 cv2.namedWindow("SSD+SiamRPN", cv2.WINDOW_NORMAL) cv2.imshow('SSD+SiamRPN', horizontal) # Exit if ESC pressed k = cv2.waitKey(1) & 0xff if k == 27: break video.release() cv2.destroyAllWindows()
def showImage(): global x1, y1, x2, y2, drawing, init, flag, image, getim, start rospy.init_node('RPN', anonymous=True) flag=1 init = False drawing = False getim = False start = False x1, x2, y1, y2 = -1, -1, -1, -1 flag_lose = False count_lose = 0 print('laoding model...........') net = SiamRPNvot() net.load_state_dict(torch.load(path + 'SiamRPNVOT.model')) net.eval().cuda() z = torch.Tensor(1, 3, 127, 127) net.temple(z.cuda()) x = torch.Tensor(1, 3, 271, 271) net(x.cuda()) print('ready for starting!') rospy.Subscriber('/camera/rgb/image_raw', Image, callback) pub = rospy.Publisher('/vision/target', Pose, queue_size=10) cv2.namedWindow('image') cv2.setMouseCallback('image', draw_circle) rate = rospy.Rate(30) i = 1 t = time.time() fps = 0 while not rospy.is_shutdown(): if getim: t1 = time.time() idd = readid(image) pose = Pose() pose.position.z = 0 if start is False and init is True: target_pos = np.array([int((x1+x2)/2), int((y1+y2)/2)]) target_sz = np.array([int(x2-x1), int(y2-y1)]) state = SiamRPN_init(image, target_pos, target_sz, net) start = True flag_lose = False continue if start is True: state = SiamRPN_track(state, image) # track res = cxy_wh_2_rect(state['target_pos'], state['target_sz']) res = [int(l) for l in res] cv2.rectangle(image, (res[0], res[1]), (res[0] + res[2], res[1] + res[3]), (0, 255, 255), 2) pose.position.x = (state['target_pos'][0]-image.shape[1]/2) / (image.shape[1]/2) pose.position.y = (state['target_pos'][1]-image.shape[0]/2) / (image.shape[0]/2) cv2.putText(image, str(state['score']), (res[0] + res[2], res[1] + res[3]), cv2.FONT_HERSHEY_SIMPLEX , 0.5, (255,255,0), 1) pose.position.z = 1 if state['score'] < 0.5: count_lose = count_lose + 1 else: count_lose = 0 if count_lose > 4: flag_lose = True if flag_lose is True: cv2.putText(image, 'target is lost!', (200,200), cv2.FONT_HERSHEY_SIMPLEX , 2, (255,0,0), 3) pose.position.z = -1 if drawing is True: cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2) cv2.putText(image, '#'+str(idd), (30,30), cv2.FONT_HERSHEY_SIMPLEX , 0.5, (0, 255, 255), 1) cx = int(image.shape[1]/2) cy = int(image.shape[0]/2) cv2.line(image,(cx-20,cy), (cx+20, cy), (255, 255, 255), 2) cv2.line(image,(cx, cy-20), (cx, cy+20), (255, 255, 255), 2) pub.publish(pose) if start is True: i = i + 1 if i > 5: i = 1 fps = 5 / (time.time()-t) t = time.time() cv2.putText(image, 'fps='+str(fps), (200,30), cv2.FONT_HERSHEY_SIMPLEX , 0.5, (0, 255, 255), 1) cv2.imshow('image', image) cv2.waitKey(1) getim = False rate.sleep()
def test(score): net = SiamRPNvot() net.load_state_dict( torch.load('/home/traker_hao/code/learn/train_RPN/model/30.model')) net.eval().cuda() version_name = 'jiasu' sequence_path = '/media/traker_hao/data/dataset/UAV1/sequences' init_path = '/media/traker_hao/data/dataset/UAV1/annotations' result_path = '/home/traker_hao/result/visdrone/' + version_name if os.path.exists(result_path) is False: os.mkdir(result_path) sequence_names = os.listdir(sequence_path) random.shuffle(sequence_names) #sequence_names.sort() i = 0 for sequence_name in sequence_names: print(sequence_name) #if sequence_name != 'Suv': #continue #sequence_name='uav0000054_00000_s' imagenames = os.listdir(sequence_path + '/' + sequence_name) imagenames.sort() print(i) i = i + 1 print(sequence_path + '/' + sequence_name) f = open( result_path + '/' + sequence_name + '_' + version_name + '.txt', 'w') inited = False fp = open(init_path + '/' + sequence_name + '.txt') j = 0 for imagename in imagenames: j = j + 1 image = cv2.imread(sequence_path + '/' + sequence_name + '/' + imagename) #init the tracker if inited is False: data = fp.readline() data = data.strip('\n') data = data.split(',') [cx, cy, w, h] = (int(data[0]) + int(data[2]) // 2, int(data[1]) + int(data[3]) // 2, int(data[2]), int(data[3])) #f.write(str(annos[0]['bbox'][0])+','+str(annos[0]['bbox'][1])+','+str(annos[0]['bbox'][2])+','+str(annos[0]['bbox'][3])+','+str(1.00)+'\n') f.write(data[0] + ',' + data[1] + ',' + data[2] + ',' + data[3] + '\n') target_pos, target_sz = np.array([cx, cy]), np.array([w, h]) state = SiamRPN_init(image, target_pos, target_sz, net) inited = True cv2.rectangle(image, (int(cx) - int(w) // 2, int(cy) - int(h) // 2), (int(cx) + int(w) // 2, int(cy) + int(h) // 2), (0, 255, 0), 3) cv2.putText(image, sequence_name, (50, 50), 0, 5e-3 * 200, (0, 255, 0), 2) cv2.putText(image, 'initing...', (100, 100), 0, 5e-3 * 200, (0, 255, 0), 2) image2 = cv2.resize(image, (960, 540)) cv2.imshow('aa2', image2) cv2.waitKey(1) else: data = fp.readline() data = data.strip('\n') data = data.split(',') try: truth = (int(data[0]), int(data[1]), int(data[0]) + int(data[2]), int(data[1]) + int(data[3])) except: truth = [0, 0, 0, 0] #update the tracker #print([cx, cy, w, h]) tic = cv2.getTickCount() t1 = time.time() state = SiamRPN_track(state, image) # track #state['target_sz'] = np.array( [int(data[2]), int(data[3])] ) toc = (cv2.getTickCount() - tic) / cv2.getTickFrequency() #print(1/toc) #mytracker.target_sz = np.array([int(truth[2]),int(truth[3])]) res = cxy_wh_2_rect(state['target_pos'], state['target_sz']) res = [int(l) for l in res] cv2.rectangle(image, (res[0], res[1]), (res[0] + res[2], res[1] + res[3]), (0, 255, 255), 2) #visualize the result cv2.rectangle(image, (int(truth[0]), int(truth[1])), (int(truth[2]), int(truth[3])), (0, 255, 0), 2) #mytracker.target_sz=np.array([int(data[2]),int(data[3])]) #cv2.putText(image, str(iou), (res[0] + res[2], res[1] + res[3]), 0, 5e-3*200, (0,255,0), 2) cv2.putText(image, sequence_name, (50, 50), 0, 5e-3 * 200, (0, 255, 0), 2) image2 = cv2.resize(image, (960, 540)) cv2.imshow('aa2', image2) if cv2.waitKey(1) == 97: break #if j>209: #cv2.waitKey(0) f.close()
# for i in range(10): # net.temple(torch.autograd.Variable(torch.FloatTensor(1, 3, 127, 127)).cuda()) # selonsy:cuda()表示使用GPU进行计算,FloatTensor(1, 3, 127, 127):浮点型四维张量 # net(torch.autograd.Variable(torch.FloatTensor(1, 3, 255, 255)).cuda()) # start to track handle = vot.VOT("polygon") Polygon = handle.region() # region:将配置消息发送到客户端并接收初始化区域和第一个图像的路径。其返回值为初始化区域。 cx, cy, w, h = get_axis_aligned_bbox(Polygon) # get_axis_aligned_bbox:将坐标数据转换成 RPN 的格式 image_file = handle.frame() # frame 函数从客户端获取帧(图像路径) if not image_file: sys.exit(0) target_pos, target_sz = np.array([cx, cy]), np.array([w, h]) im = cv2.imread(image_file) # HxWxC state = SiamRPN_init(im, target_pos, target_sz, net) # init tracker,SiamRPN_init:构造状态结构体并运行模板分支 # 从第一帧开始跟踪,表示很奇怪,难道直接给定的不准确么? # selonsy:改进点 while True: # 进入跟踪循环 image_file = handle.frame() if not image_file: break im = cv2.imread(image_file) # HxWxC state = SiamRPN_track(state, im) # track,SiamRPN_track:运行检测分支并更新状态变量 res = cxy_wh_2_rect(state['target_pos'], state['target_sz']) # cxy_wh_2_rect:将坐标转换成矩形框的表示形式 handle.report(Rectangle(res[0], res[1], res[2], res[3])) # report:将跟踪结果报告给客户端 print(handle.result) print(handle.frames) del handle