def __init__(self): super(SiamMCF, self).__init__("SiamMCF") root_dir = path_config.SIAMMCF_ROOT_DIR self.hp, self.evaluation, self.env, self.design = parse_arguments(root_dir) self.final_score_sz = self.hp.response_up * (self.design.score_sz - 1) + 1 # build TF graph once for all ( self.filename, self.image, self.templates_x, self.templates_z, self.scores_list, ) = siam.build_tracking_graph( root_dir, self.final_score_sz, self.design, self.env, self.hp ) config = tf.ConfigProto() config.gpu_options.allow_growth = True self.sess = tf.Session(config=config) tf.global_variables_initializer().run(session=self.sess) vars_to_load = [] for v in tf.global_variables(): if "postnorm" not in v.name: vars_to_load.append(v) siam_ckpt_name = path_config.SIAMMCF_MODEL siam_saver = tf.train.Saver(vars_to_load) siam_saver.restore(self.sess, siam_ckpt_name)
def main(): # Avoid printing TF debugging information os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1' # --- Parse arguments from JSON file --- hp, evaluation, run, env, design = parse_arguments() final_score_sz = hp.response_up * (design.score_sz - 1) + 1 image, templates_z, scores = siam.build_tracking_graph( final_score_sz, design, env) # --- Start Streaming from Video --- cap = cv2.VideoCapture(env.root_sequences + '/' + sys.argv[1] + '.mp4') ret, frame = cap.read() if (not ret): print "Error opening video sequence" # --- Save Video (Optional) --- if run.save_video: vid_write = cv2.VideoWriter( env.root_sequences + '/' + sys.argv[1] + '_out.avi', cv2.VideoWriter_fourcc(*'MJPG'), 25, (frame.shape[1], frame.shape[0]), True) # --- Define Initial Bounding Box --- BB = click_and_crop(frame, design.window_name) cv2.namedWindow(design.window_name) cv2.startWindowThread() cv2.setMouseCallback(design.window_name, BB.callback) cv2.imshow(design.window_name, frame) cv2.waitKey(0) while True: if cv2.waitKey(1) & 0xFF == ord('q'): break # ----- Define Initial Bounding Box Params & Template ----- pos_x = int((BB.refPt[0][0] + BB.refPt[1][0]) / 2) # Template Center pos_y = int((BB.refPt[0][1] + BB.refPt[1][1]) / 2) # Template Center target_w = int(abs(BB.refPt[1][0] - BB.refPt[0][0])) # Template Width / 2 target_h = int(abs(BB.refPt[1][1] - BB.refPt[0][1])) # Template Height / 2 # ----- Beging Tracking ----- tracker(hp, run, design, pos_x, pos_y, target_w, target_h, final_score_sz, templates_z, scores, cap, vid_write, frame) cap.release() cv2.destroyAllWindows() if run.save_video: vid_write.release()
def InitSiamNetwork(self): #Initialize the network and load the weights # self.graph_siam = tf.Graph() # with self.graph_siam.as_default(): filename, image, templates_z, \ templates_x, scores, scores_original = siam.build_tracking_graph(self.final_score_sz, self.design, self.env) siam_params = { "filename": filename, "image": image, "templates_z": templates_z, "templates_x": templates_x, "scores": scores, "scores_original": scores_original } self.graph_siam = tf.get_default_graph() return siam_params
def main(process, queue, box, video): # avoid printing TF debugging information os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # TODO: allow parameters from command line or leave everything in json files? hp, evaluation, run, env, design = parse_arguments() # Set size for use with tf.image.resize_images with align_corners=True. # For example, # [1 4 7] => [1 2 3 4 5 6 7] (length 3*(3-1)+1) # instead of # [1 4 7] => [1 1 2 3 4 5 6 7 7] (length 3*3) final_score_sz = hp.response_up * (design.score_sz - 1) + 1 # build TF graph once for all image, templates_z, scores = siam.build_tracking_graph(final_score_sz, design, env) # read radio # width = 640 # height = 480 # process1 = ( # ffmpeg # .input('tcp://192.168.1.155:8300',vcodec='h264',r = 24,probesize=32,fflags="nobuffer",flags="low_delay",analyzeduration=1) # .output('pipe:', format='rawvideo',pix_fmt="rgb24") # .run_async(pipe_stdout=True) # ) ## model # model_path = './frozen_inference_graph.pb' # odapi = DetectorAPI(path_to_ckpt=model_path) # while True : # in_bytes = process1.stdout.read(width * height * 3) # if not in_bytes : # print ("none") # video = (np.frombuffer(in_bytes, np.uint8).reshape([height, width, 3])) # video = cv2.cvtColor(video, cv2.COLOR_RGB2BGR) # read target from mat # box = odapi.processFrame(video) box[2] -= box[0] box[3] -= box[1] box[0] += box[2]/2 box[1] += box[3]/2 print ('box', box) pos_x, pos_y, target_w, target_h = box[0], box[1], box[2], box[3] tracker(hp, run, design, video, pos_x, pos_y, target_w, target_h, final_score_sz, image, templates_z, scores, process, queue) print ('done')
def main(): # avoid printing TF debugging information os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # TODO: allow parameters from command line or leave everything in json files? hp, evaluation, run, env, design = parse_arguments() # Set size for use with tf.image.resize_images with align_corners=True. # For example, # [1 4 7] => [1 2 3 4 5 6 7] (length 3*(3-1)+1) # instead of # [1 4 7] => [1 1 2 3 4 5 6 7 7] (length 3*3) final_score_sz = hp.response_up * (design.score_sz - 1) + 1 # build TF graph once for all filename, image, templates_z, scores = siam.build_tracking_graph(final_score_sz, design, env) # iterate through all videos of evaluation.dataset if evaluation.video == 'all': dataset_folder = os.path.join(env.root_dataset, evaluation.dataset) videos_list = [v for v in os.listdir(dataset_folder)] videos_list.sort() nv = np.size(videos_list) speed = np.zeros(nv * evaluation.n_subseq) precisions = np.zeros(nv * evaluation.n_subseq) precisions_auc = np.zeros(nv * evaluation.n_subseq) ious = np.zeros(nv * evaluation.n_subseq) lengths = np.zeros(nv * evaluation.n_subseq) for i in range(nv): images_arr, gt, frame_name_list, frame_sz, n_frames = _init_video(env, evaluation, videos_list[i]) starts = np.rint(np.linspace(0, n_frames - 1, evaluation.n_subseq + 1)) starts = starts[0:evaluation.n_subseq] for j in range(evaluation.n_subseq): start_frame = int(starts[j]) gt_ = gt[start_frame:, :] frame_name_list_ = frame_name_list[start_frame:] pos_x, pos_y, target_w, target_h = region_to_bbox(gt_[0]) idx = i * evaluation.n_subseq + j bboxes, speed[idx] = tracker(hp, run, design, frame_name_list_, pos_x, pos_y, target_w, target_h, final_score_sz, filename, image, templates_z, scores, start_frame) lengths[idx], precisions[idx], precisions_auc[idx], ious[idx] = compile_results(gt, bboxes, evaluation.dist_threshold) print(str(i) + ' -- ' + videos_list[i] + ' -- Precision: ' + "%.2f" % precisions[idx] + ' -- Precisions AUC: ' + "%.2f" % precisions_auc[idx] + ' -- IOU: ' + "%.2f" % ious[idx] + ' -- Speed: ' + "%.2f" % speed[idx] + ' --') tot_frames = np.sum(lengths) mean_precision = np.sum(precisions * lengths) / tot_frames mean_precision_auc = np.sum(precisions_auc * lengths) / tot_frames mean_iou = np.sum(ious * lengths) / tot_frames mean_speed = np.sum(speed * lengths) / tot_frames print('-- Overall stats (averaged per frame) on ' + str(nv) + ' videos (' + str(tot_frames) + ' frames) --') print(' -- Precision ' + "(%d px)" % evaluation.dist_threshold + ': ' + "%.2f" % mean_precision + ' -- Precisions AUC: ' + "%.2f" % mean_precision_auc + ' -- IOU: ' + "%.2f" % mean_iou + ' -- Speed: ' + "%.2f" % mean_speed + ' --') else: images_arr, gt, frame_name_list, _, _ = _init_video(env, evaluation, evaluation.video) pos_x, pos_y, target_w, target_h = region_to_bbox(gt[evaluation.start_frame]) bboxes, speed = tracker(hp, run, design, frame_name_list, pos_x, pos_y, target_w, target_h, final_score_sz, filename, image, templates_z, scores, evaluation.start_frame) num_frames = np.size(frame_name_list) bboxes_final = np.zeros((num_frames,4)) lk_params = dict( winSize = (5,5), maxLevel = 2, criteria = (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 1, 0.03)) bboxes_final = np.zeros((num_frames, 4)) for i in range(1,len(images_arr)-1): # Create some random colors color = np.random.randint(0,255,(100,3)) # Take first frame and find corners in it #ret, old_frame = cap.read() frame = images_arr[i+1] old_frame = images_arr[i] old_gray = cv2.cvtColor(old_frame, cv2.COLOR_BGR2GRAY) p0 = np.zeros((1,1,2), dtype=np.float32) bbox_i = bboxes[i] c, r, w, h = int(bbox_i[0]), int(bbox_i[1]), int(bbox_i[2]), int(bbox_i[3]) p0[0,0,0] = c p0[0,0,1] = r frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) p1, st, err = cv2.calcOpticalFlowPyrLK(old_gray, frame_gray, p0, None, **lk_params) good_new = p1[st==1] bboxes_final[i,:] = p1[0][0][0], p1[0][0][1], w, h good_old = p0[st==1] for i,(new,old) in enumerate(zip(good_new,good_old)): a,b = new.ravel() c,d = old.ravel() fig = plt.figure(1) ax = fig.add_subplot(111) r1 = patches.ConnectionPatch((a,b),(c,d),'data','data',arrowstyle="-|>") r2 = patches.Circle((a,b),5,color='r')#((x,y), w, h, linewidth=2, edgecolor='r', fill=False) ax.imshow(np.uint8(frame)) ax.add_patch(r2) ax.add_patch(r1) plt.ion() plt.show() plt.pause(0.001) plt.clf() old_gray = frame_gray.copy() p0 = good_new.reshape(-1,1,2) _, precision, precision_auc, iou = _compile_results(gt, bboxes_final, evaluation.dist_threshold) print(evaluation.video + ' -- Precision ' + "(%d px)" % evaluation.dist_threshold + ': ' + "%.2f" % precision + ' -- Precision AUC: ' + "%.2f" % precision_auc + ' -- IOU: ' + "%.2f" % iou + ' -- Speed: ' + "%.2f" % speed + ' --')
def main(): # avoid printing TF debugging information os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # Load hyperparameter (hp), evaluation, run, environment (env) and design parameters from # parameters/ directory. hp, evaluation, run, env, design = parse_arguments() # Set size for use with tf.image.resize_images with align_corners=True. # For example, # [1 4 7] => [1 2 3 4 5 6 7] (length 3*(3-1)+1) # instead of # [1 4 7] => [1 1 2 3 4 5 6 7 7] (length 3*3) final_score_sz = hp.response_up * (design.score_sz - 1) + 1 # build TF graph once for all filename, image, templates_z, scores = siam.build_tracking_graph( final_score_sz, design, env) # iterate through all videos of evaluation.dataset if evaluation.video == 'all': dataset_folder = os.path.join(env.root_dataset, evaluation.dataset) videos_list = [v for v in os.listdir(dataset_folder)] videos_list.sort() nv = np.size(videos_list) speed = np.zeros(nv * evaluation.n_subseq) precisions = np.zeros(nv * evaluation.n_subseq) precisions_auc = np.zeros(nv * evaluation.n_subseq) ious = np.zeros(nv * evaluation.n_subseq) lengths = np.zeros(nv * evaluation.n_subseq) for i in range(nv): gt, frame_name_list, frame_sz, n_frames = _init_video( env, evaluation, videos_list[i]) starts = np.rint( np.linspace(0, n_frames - 1, evaluation.n_subseq + 1)) starts = starts[0:evaluation.n_subseq] for j in range(evaluation.n_subseq): start_frame = int(starts[j]) gt_ = gt[start_frame:, :] frame_name_list_ = frame_name_list[start_frame:] pos_x, pos_y, target_w, target_h = region_to_bbox(gt_[0]) idx = i * evaluation.n_subseq + j bboxes, speed[idx] = tracker(hp, run, design, frame_name_list_, pos_x, pos_y, target_w, target_h, final_score_sz, filename, image, templates_z, scores, start_frame) lengths[idx], precisions[idx], precisions_auc[idx], ious[ idx] = _compile_results(gt_, bboxes, evaluation.dist_threshold) print str(i) + ' -- ' + videos_list[i] + \ ' -- Precision: ' + "%.2f" % precisions[idx] + \ ' -- Precisions AUC: ' + "%.2f" % precisions_auc[idx] + \ ' -- IOU: ' + "%.2f" % ious[idx] + \ ' -- Speed: ' + "%.2f" % speed[idx] + ' --' print tot_frames = np.sum(lengths) mean_precision = np.sum(precisions * lengths) / tot_frames mean_precision_auc = np.sum(precisions_auc * lengths) / tot_frames mean_iou = np.sum(ious * lengths) / tot_frames mean_speed = np.sum(speed * lengths) / tot_frames print '-- Overall stats (averaged per frame) on ' + str( nv) + ' videos (' + str(tot_frames) + ' frames) --' print ' -- Precision ' + "(%d px)" % evaluation.dist_threshold + ': ' + "%.2f" % mean_precision +\ ' -- Precisions AUC: ' + "%.2f" % mean_precision_auc +\ ' -- IOU: ' + "%.2f" % mean_iou +\ ' -- Speed: ' + "%.2f" % mean_speed + ' --' print else: gt, frame_name_list, _, _ = _init_video(env, evaluation, evaluation.video) pos_x, pos_y, target_w, target_h = region_to_bbox( gt[evaluation.start_frame]) bboxes, speed = tracker(hp, run, design, frame_name_list, pos_x, pos_y, target_w, target_h, final_score_sz, filename, image, templates_z, scores, evaluation.start_frame) _, precision, precision_auc, iou = _compile_results( gt, bboxes, evaluation.dist_threshold) print evaluation.video + \ ' -- Precision ' + "(%d px)" % evaluation.dist_threshold + ': ' + "%.2f" % precision +\ ' -- Precision AUC: ' + "%.2f" % precision_auc + \ ' -- IOU: ' + "%.2f" % iou + \ ' -- Speed: ' + "%.2f" % speed + ' --' print
def __init__(self, image_path, region): #Parse the arguments self.hp, self.evaluation, self.run, self.env, self.design = parse_arguments( mode='siamese') #Get first frame image and ground-truth self.region = region self.pos_x = region.x + region.width / 2 self.pos_y = region.y + region.height / 2 self.target_w = region.width self.target_h = region.height self.bbox = self.pos_x - self.target_w / 2, self.pos_y - self.target_h / 2, self.target_w, self.target_h #Calculate the size of final score (upscaled size of score matrix, where score matrix # is convolution of results of two branches of siamese network) self.final_score_sz = self.hp.response_up * (self.design.score_sz - 1) + 1 #Initialize the network and load the weights self.filename, self.image, self.templates_z, \ self.templates_x, self.scores, self.scores_original = siam.build_tracking_graph(self.final_score_sz, self.design, self.env) #Calculate the scale factors self.scale_factors = self.hp.scale_step**np.linspace( -np.ceil(self.hp.scale_num / 2), np.ceil(self.hp.scale_num / 2), self.hp.scale_num) # cosine window to penalize large displacements hann_1d = np.expand_dims(np.hanning(self.final_score_sz), axis=0) penalty = np.transpose(hann_1d) * hann_1d self.penalty = penalty / np.sum(penalty) #Calculate search and target patch sizes context = self.design.context * (self.target_w + self.target_h) self.z_sz = np.sqrt( np.prod((self.target_w + context) * (self.target_h + context))) self.x_sz = float( self.design.search_sz) / self.design.exemplar_sz * self.z_sz #Create a tensorflow session config = tf.ConfigProto() config.gpu_options.visible_device_list = "1" config.gpu_options.per_process_gpu_memory_fraction = 0.9 self.sess = tf.Session(config=config) with self.sess.as_default(): tf.global_variables_initializer().run() # Coordinate the loading of image files. self.coord = tf.train.Coordinator() self.threads = tf.train.start_queue_runners(coord=self.coord) self.run_opts = {} #Calculate the template for the given region image_, self.templates_z_ = self.sess.run( [self.image, self.templates_z], feed_dict={ siam.pos_x_ph: self.pos_x, siam.pos_y_ph: self.pos_y, siam.z_sz_ph: self.z_sz, self.filename: image_path }) return
def main(): # Avoid printing TF debugging information os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # --- Parse arguments from JSON file --- hp, evaluation, run, env, design = parse_arguments() final_score_sz = hp.response_up * (design.score_sz - 1) + 1 image, templates_z, scores = siam.build_tracking_graph( final_score_sz, design, env) # --- Start Streaming from Live Video --- stream_path = "/home/hugogermain/stream.flv" cap = cv2.VideoCapture(stream_path) start_frame = cap.get(cv2.CAP_PROP_FRAME_COUNT) # Start at last frame cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame - 15) ret, frame = cap.read() if (not ret): print "Error opening video sequence" # --- Save Video (Optional) --- vid_write = cv2.VideoWriter(env.root_sequences + '/stream_out.avi', cv2.VideoWriter_fourcc(*'MJPG'), 25, (frame.shape[1], frame.shape[0]), True) # --- Initialize projection maps --- e2s = equirect2stereograph(-2.5, frame, 0, 0) # =================================== # --- Define Initial Bounding Box --- # =================================== BB = click_and_crop(e2s.project(frame), design.window_name) cv2.namedWindow(design.window_name) cv2.startWindowThread() cv2.setMouseCallback(design.window_name, BB.callback) cv2.imshow(design.window_name, e2s.project(frame)) cv2.waitKey(1) while True: ret, frame = cap.read() cv2.waitKey(1) if ret: # --- Equirectangular to Stereographic Projection --- BB.img = e2s.project(frame) BB.refresh() # --- Reset to last frame to avoid cumulative lagging --- cap.release() cap = cv2.VideoCapture(stream_path) start_frame = cap.get( cv2.CAP_PROP_FRAME_COUNT) # Start at last frame cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame - 10) cv2.waitKey(1) start_frame += 1 else: # --- Reached end of file, wait for new frames --- cap.release() cap = cv2.VideoCapture(stream_path) cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame - 10) cv2.waitKey(1) # ---- Rotate Camera Viewpoint --- k = cv2.waitKey(33) if k == 119: # w e2s.set_lat(e2s.lat + 10) if k == 115: # s e2s.set_lat(e2s.lat - 10) if k == 100: # d e2s.set_roll(e2s.roll + 10) if k == 97: # a e2s.set_roll(e2s.roll - 10) # ---- Selection is done ---- if k == 113: # q break if BB.ready: break print("[INFO]: Bounding Box Selection: Done") # ----- Define Initial Bounding Box Params & Template ----- pos_x = int((BB.refPt[0][0] + BB.refPt[1][0]) / 2) # Template Center pos_y = int((BB.refPt[0][1] + BB.refPt[1][1]) / 2) # Template Center target_w = int(abs(BB.refPt[1][0] - BB.refPt[0][0])) # Template Width / 2 target_h = int(abs(BB.refPt[1][1] - BB.refPt[0][1])) # Template Height / 2 # =========================== # ----- Beging Tracking ----- # =========================== live_tracker(hp, run, design, pos_x, pos_y, target_w, target_h, final_score_sz, templates_z, scores, cap, vid_write, frame, stream_path, e2s) cap.release() cv2.destroyAllWindows() if run.save_video: vid_write.release()
def main(argv): # avoid printing TF debugging information os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' hp, evaluation, env, design = parse_arguments(root_dir) cmd_args = parse_command_line_arguments() if 'otb13' in cmd_args.dataset_name: dataset_type = 'otb13' elif 'otb15' in cmd_args.dataset_name: dataset_type = 'otb15' elif 'vot16' in cmd_args.dataset_name: dataset_type = 'vot16' elif 'vot17' in cmd_args.dataset_name: dataset_type = 'vot17' # Set size for use with tf.image.resize_images with align_corners=True. # For example, # [1 4 7] => [1 2 3 4 5 6 7] (length 3*(3-1)+1) # instead of # [1 4 7] => [1 1 2 3 4 5 6 7 7] (length 3*3) final_score_sz = hp.response_up * (design.score_sz - 1) + 1 # build TF graph once for all filename, image, templates_x, templates_z, scores_list =\ siam.build_tracking_graph( root_dir, final_score_sz, design, env, hp) # iterate through all videos of dataset_name videos_folder = os.path.join(root_dir, env.root_dataset, cmd_args.dataset_name) videos_list = [ v for v in os.listdir(videos_folder) if os.path.isdir(os.path.join(videos_folder, v)) ] videos_list.sort() nv = np.size(videos_list) speed = np.zeros(nv * evaluation.n_subseq) precisions = np.zeros(nv * evaluation.n_subseq) precisions_auc = np.zeros(nv * evaluation.n_subseq) ious = np.zeros(nv * evaluation.n_subseq) lengths = np.zeros(nv * evaluation.n_subseq) successes = np.zeros(nv * evaluation.n_subseq) config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: tf.global_variables_initializer().run() vars_to_load = [] for v in tf.global_variables(): if 'postnorm' not in v.name: vars_to_load.append(v) siam_ckpt_name = 'pretrained/siam_mcf.ckpt-50000' siam_saver = tf.train.Saver(vars_to_load) siam_saver.restore(sess, siam_ckpt_name) for i in range(nv): gt, frame_name_list, frame_sz, n_frames = _init_video( videos_list[i], videos_folder, dataset_type) starts = np.rint( np.linspace(0, n_frames - 1, evaluation.n_subseq + 1)) starts = starts[0:evaluation.n_subseq] for j in range(evaluation.n_subseq): start_frame = int(starts[j]) gt_ = gt[start_frame:, :] frame_name_list_ = frame_name_list[start_frame:] pos_x, pos_y, target_w, target_h = region_to_bbox(gt_[0]) idx = i * evaluation.n_subseq + j bboxes, speed[idx] = track_one_sequence(hp, design, frame_name_list_, pos_x, pos_y, target_w, target_h, final_score_sz, filename, image, templates_x, templates_z, scores_list, videos_list[i], dataset_type, sess, cmd_args.visualize, cmd_args.save_images, cmd_args.save_bboxes, vot_handle=None, gt=gt_) (lengths[idx], precisions[idx], precisions_auc[idx], ious[idx], successes[idx]) = _compile_results(gt_, bboxes, evaluation.dist_threshold) print( str(i) + ' -- ' + videos_list[i] + ' -- Precision: ' + "%.2f" % precisions[idx] + ' -- Precisions AUC: ' + "%.2f" % precisions_auc[idx] + ' -- IOU: ' + "%.2f" % ious[idx] + ' -- [email protected]: ' + "%.2f" % successes[idx] + ' -- Speed: ' + "%.2f" % speed[idx] + ' --') tot_frames = np.sum(lengths) mean_precision = np.sum(precisions * lengths) / tot_frames mean_precision_auc = np.sum(precisions_auc * lengths) / tot_frames mean_iou = np.sum(ious * lengths) / tot_frames mean_speed = np.sum(speed * lengths) / tot_frames mean_success = np.sum(successes * lengths) / tot_frames print('-- Overall stats (averaged per frame) on ' + str(nv) + ' videos (' + str(tot_frames) + ' frames) --') print(' -- Precision ' + "(%d px)" % evaluation.dist_threshold + ': ' + '%.2f' % mean_precision + ' -- Precisions AUC: ' + "%.2f" % mean_precision_auc + ' -- IOU: ' + "%.2f" % mean_iou + ' -- [email protected]: ' + "%.2f" % mean_success + ' -- Speed: ' + "%.2f" % mean_speed + ' --')
def main(): # avoid printing TF debugging information os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # TODO: allow parameters from command line or leave everything in json files? hp, evaluation, run, env, design = parse_arguments() # Set size for use with tf.image.resize_images with align_corners=True. # For example, # [1 4 7] => [1 2 3 4 5 6 7] (length 3*(3-1)+1) # instead of # [1 4 7] => [1 1 2 3 4 5 6 7 7] (length 3*3) #由于得到的sorcemap与原图像大小不一致,所以要在这里按比例进行放大以此来得到图像中每个位置对应的score #这里的final也就是design中的search_sz final_score_sz = hp.response_up * (design.score_sz - 1) + 1 # build TF graph once for all filename, image, templates_z, scores = siam.build_tracking_graph( final_score_sz, design, env, hp) # iterate through all videos of evaluation.dataset if evaluation.video == 'all': dataset_folder = os.path.join(env.root_dataset, evaluation.dataset) videos_list = [v for v in os.listdir(dataset_folder)] videos_list.sort() nv = np.size(videos_list) speed = np.zeros(nv * evaluation.n_subseq) precisions = np.zeros(nv * evaluation.n_subseq) precisions_auc = np.zeros(nv * evaluation.n_subseq) ious = np.zeros(nv * evaluation.n_subseq) lengths = np.zeros(nv * evaluation.n_subseq) for i in range(nv): if os.path.exists(os.path.join('data/result', videos_list[i])): continue gt, frame_name_list, frame_sz, n_frames, video_folder, equal = _init_video( env, evaluation, videos_list[i]) if not equal: print('The .jpg and .xml is not equal in', video_folder) continue starts = np.rint( np.linspace(0, n_frames - 1, evaluation.n_subseq + 1)) starts = starts[0:evaluation.n_subseq] # for j in range(evaluation.n_subseq): for j in range(1): start_frame = int(starts[j]) gt_ = gt[start_frame:] frame_name_list_ = frame_name_list[start_frame:] # pos_x, pos_y, target_w, target_h = region_to_bbox(gt_[0]) pos_x, pos_y, target_w, target_h = xml_to_bbox(gt_[0]) idx = i * evaluation.n_subseq + j bboxes, speed[idx] = tracker(hp, run, design, frame_name_list_, pos_x, pos_y, target_w, target_h, final_score_sz, filename, image, templates_z, scores, start_frame) lengths[idx], precisions[idx], precisions_auc[idx], ious[idx] =\ _compile_results(gt_, bboxes, evaluation.dist_threshold) print str(i) + ' -- ' + videos_list[i] + \ ' -- Precision: ' + "%.2f" % precisions[idx] + \ ' -- Precisions AUC: ' + "%.2f" % precisions_auc[idx] + \ ' -- IOU: ' + "%.2f" % ious[idx] + \ ' -- Speed: ' + "%.2f" % speed[idx] + ' --' print # Draw_Result(bboxes, frame_name_list_, gt_, env) tot_frames = np.sum(lengths) mean_precision = np.sum(precisions * lengths) / tot_frames mean_precision_auc = np.sum(precisions_auc * lengths) / tot_frames mean_iou = np.sum(ious * lengths) / tot_frames mean_speed = np.sum(speed * lengths) / tot_frames print print '-- Overall stats (averaged per frame) on ' + str( nv) + ' videos (' + str(tot_frames) + ' frames) --' print ' -- Precision ' + "(%d px)" % evaluation.dist_threshold + ': ' + "%.2f" % mean_precision +\ ' -- Precisions AUC: ' + "%.2f" % mean_precision_auc +\ ' -- IOU: ' + "%.2f" % mean_iou +\ ' -- Speed: ' + "%.2f" % mean_speed + ' --' print else: gt, frame_name_list, _, _, video_folder, equal = _init_video( env, evaluation, evaluation.video) if not equal: print("The .jpg and .xml is not equal in", video_folder) exit(0) pos_x, pos_y, target_w, target_h = xml_to_bbox( gt[evaluation.start_frame]) bboxes, speed = tracker(hp, run, design, frame_name_list, pos_x, pos_y, target_w, target_h, final_score_sz, filename, image, templates_z, scores, evaluation.start_frame) _, precision, precision_auc, iou = _compile_results( gt, bboxes, evaluation.dist_threshold) print evaluation.video + \ ' -- Precision ' + "(%d px)" % evaluation.dist_threshold + ': ' + "%.2f" % precision +\ ' -- Precision AUC: ' + "%.2f" % precision_auc + \ ' -- IOU: ' + "%.2f" % iou + \ ' -- Speed: ' + "%.2f" % speed + ' --' print
def main(): # avoid printing TF debugging information os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # TODO: allow parameters from command line or leave everything in json files? hp, evaluation, run, env, design = parse_arguments(mode="conv2") # Set size for use with tf.image.resize_images with align_corners=True. # For example, # [1 4 7] => [1 2 3 4 5 6 7] (length 3*(3-1)+1) # instead of # [1 4 7] => [1 1 2 3 4 5 6 7 7] (length 3*3) final_score_sz = hp.response_up * design.score_sz # build TF graph once for all image, templates_z, scores = siam.build_tracking_graph( final_score_sz, design, env) # iterate through all videos of evaluation.dataset if evaluation.video == 'all': dataset_folder = os.path.join(env.root_dataset, evaluation.dataset) videos_list = [v for v in os.listdir(dataset_folder)] videos_list.sort() # videos_list = videos_list[91:][:] #only use vot 2016 nv = np.size(videos_list) speed = np.zeros(nv * evaluation.n_subseq) precisions = np.zeros(nv * evaluation.n_subseq) precisions_auc = np.zeros(nv * evaluation.n_subseq) ious = np.zeros(nv * evaluation.n_subseq) success_auc = np.zeros(nv * evaluation.n_subseq) lengths = np.zeros(nv * evaluation.n_subseq) for i in range(nv): # gt, frame_name_list, frame_sz, n_frames, img_mode = _init_video(env, evaluation, videos_list[i]) gt, frame_name_list, frame_sz, n_frames, img_mode = _init_video_OTB( env, evaluation, videos_list[i]) starts = np.rint( np.linspace(0, n_frames - 1, evaluation.n_subseq + 1)) starts = starts[0:evaluation.n_subseq] for j in range(evaluation.n_subseq): start_frame = int(starts[j]) gt_ = gt[start_frame:, :] frame_name_list_ = frame_name_list[start_frame:] pos_x, pos_y, target_w, target_h = region_to_bbox(gt_[0]) idx = i * evaluation.n_subseq + j bboxes, speed[idx] = tracker(hp, run, design, frame_name_list_, pos_x, pos_y, target_w, target_h, final_score_sz, image, templates_z, scores, start_frame) lengths[idx], precisions[idx], precisions_auc[idx], ious[ idx], success_auc[idx] = _compile_results( gt_, bboxes, evaluation.dist_threshold) print str(i) + ' -- ' + videos_list[i] + \ ' -- Precision: ' + "%.2f" % precisions[idx] + \ ' -- Precisions AUC: ' + "%.2f" % precisions_auc[idx] + \ ' -- IOU: ' + "%.2f" % ious[idx] + \ ' -- Success AUC: ' + "%.2f" % success_auc[idx] + \ ' -- Speed: ' + "%.2f" % speed[idx] + ' --' print tot_frames = np.mean(lengths) mean_precision = np.mean(precisions) mean_precision_auc = np.mean(precisions_auc) mean_iou = np.mean(ious) mean_success_auc = np.mean(success_auc) mean_speed = np.mean(speed) print 'data set ' + evaluation.dataset + ' z_lr %f scale_step %f scale_penalty %f scale_lr %f window_influence %f' % ( hp.z_lr, hp.scale_step, hp.scale_penalty, hp.scale_lr, hp.window_influence) print '-- Overall stats (averaged per frame) on ' + str( nv) + ' videos (' + str(tot_frames) + ' frames) --' print ' -- Precision ' + "(%d px)" % evaluation.dist_threshold + ': ' + "%.2f" % mean_precision +\ ' -- Precisions AUC: ' + "%.2f" % mean_precision_auc +\ ' -- IOU: ' + "%.2f" % mean_iou +\ ' -- Success AUC: ' + "%.2f" % mean_success_auc +\ ' -- Speed: ' + "%.2f" % mean_speed + ' --' print with open('log_test.txt', 'a+') as f: f.write(time.asctime(time.localtime(time.time())) + '\r\n') f.write( 'data set ' + evaluation.dataset + ' z_lr %f scale_step %f scale_penalty %f scale_lr %f window_influence %f \r\n' % (hp.z_lr, hp.scale_step, hp.scale_penalty, hp.scale_lr, hp.window_influence)) f.write('-- Overall stats (averaged per frame) on ' + str(nv) + ' videos (' + str(tot_frames) + ' frames) --\r\n') f.write(' -- Precision ' + "(%d px)" % evaluation.dist_threshold + ': ' + "%.2f" % mean_precision + \ ' -- Precisions AUC: ' + "%.2f" % mean_precision_auc + \ ' -- IOU: ' + "%.2f" % mean_iou + \ ' -- AUC: ' + "%.3f" % mean_success_auc + \ ' -- Speed: ' + "%.2f" % mean_speed + ' --\r\n') f.write('\r\n') else: gt, frame_name_list, _, _ = _init_video(env, evaluation, evaluation.video) pos_x, pos_y, target_w, target_h = region_to_bbox( gt[evaluation.start_frame]) bboxes, speed = tracker(hp, run, design, frame_name_list, pos_x, pos_y, target_w, target_h, final_score_sz, filename, image, templates_z, scores, evaluation.start_frame) _, precision, precision_auc, iou = _compile_results( gt, bboxes, evaluation.dist_threshold) print evaluation.video + \ ' -- Precision ' + "(%d px)" % evaluation.dist_threshold + ': ' + "%.2f" % precision +\ ' -- Precision AUC: ' + "%.2f" % precision_auc + \ ' -- IOU: ' + "%.2f" % iou + \ ' -- Speed: ' + "%.2f" % speed + ' --' print
def main(): #Command Line Arguments parser = argparse.ArgumentParser(description="Run WIND Project") parser.add_argument('-c', '--clear', action='store_true', default=False, help='Delete old camera data') parser.add_argument('-ny', '--noYolo', action='store_true', default=False, help='Do not use Yolo') parser.add_argument('-ns', '--noSiamfc', action='store_true', default=False, help='Do not use Simafc') parser.add_argument('-nv', '--noVideo', action='store_true', default=False, help='Do not generate video') parser.add_argument('-dl', '--dataLocation', choices=['fromFile', 'fromCamera'], help='Use live camera or folder of images') parser.add_argument('-cn', '--cameraNumber', default=0, help='Camera number to use') parser.add_argument('-ff', '--filesFolder', default="") parser.add_argument('-rf', '--refreshRate', default=10, help='Refresh rate for siamfc') parser.add_argument('-t', '--timeRecording', default=3, help='Seconds to record from camera') #Assign command line arguments to global variables global cameraNumber global doYolo global doSiamfc global genVideos global refreshRate global liveFeed args = parser.parse_args() cameraNumber = int(args.cameraNumber) doYolo = not args.noYolo doSiamfc = not args.noSiamfc genVideos = not args.noVideo refreshRate = int(args.refreshRate) liveFeed = args.dataLocation == 'fromCamera' VideoLength = int(args.timeRecording) #Command line argument error checking #Clearing out old data in the cameradata folder if (args.clear == True): for oldData in os.listdir('CameraData'): filep = os.path.join('CameraData', oldData) if (os.path.isfile(filep)): os.remove(filep) print('Deleted: ' + filep) elif os.path.isdir(filep): for reallyOldData in os.listdir(filep): newfilep = os.path.join(filep, reallyOldData) if (os.path.isfile(newfilep)): os.remove(newfilep) os.rmdir(filep) print('Deleted: ' + filep + '/') print('Done Deleting') #Checking if file folder is valid if (args.dataLocation == 'fromFile'): if (os.path.isdir(args.filesFolder)): dirList = [ os.path.join(args.filesFolder, d) for d in os.listdir(args.filesFolder) if os.path.isdir(os.path.join(args.filesFolder, d)) ] dirList.sort() testImage = Image.open( os.path.join(dirList[0], os.listdir(dirList[0])[0])) testWidth, testHeight = testImage.size else: if (args.filesFolder == ""): print("No folder locatoin was given") else: print(args.filesFolder + " is not a valid file location") return #Check if the camera is valid else: try: cam = cv2.VideoCapture(cameraNumber) ret, testImage = cam.read() testHeight, testWidth = testImage.shape[:2] cam.release() except: print("Camera number given is not valid or connected") return # Initialize object detector database.net = load_net(b"YoloConfig/yolov3-tiny.cfg", b"YoloConfig/yolov3-tiny.weights", 0) database.meta = load_meta(b"YoloConfig/coco.data") # avoid printing TF debugging information os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # TODO: allow parameters from command line or leave everything in json files? hp, evaluation, run, env, design = parse_arguments() final_score_sz = hp.response_up * (design.score_sz - 1) + 1 # build TF graph once for all filename, image, templates_z, scores, graph, scfg = siam.build_tracking_graph( final_score_sz, design, env) #sFCgraph = siamfcGraph(filename, image, templates_z, scores) fourcc = cv2.VideoWriter_fourcc(*'MJPG') finalImages = [] plt.xticks([]), plt.yticks([]) YoloVid = cv2.VideoWriter('Yolov3Vid.avi', fourcc, 10, (testWidth, testHeight)) SiamfcVid = cv2.VideoWriter('SiamfcVid.avi', fourcc, 10, (testWidth, testHeight)) i = 0 notDone = True now = datetime.datetime.now() if (args.dataLocation == 'fromCamera'): haha = threading.Thread(target=getImages, args=('CameraData/', 10, VideoLength, database, now, graph, scfg)) haha.start() haha.join() frame_name_list = _init_video('CameraData/%d_%d_%d/' % (now.hour, now.minute, now.second)) while notDone and i < VideoLength and datetime.datetime.now( ) < now + datetime.timedelta(seconds=20 + VideoLength) and genVideos: try: if os.path.isdir( os.path.join( 'CameraData', '%d_%d_%d' % (now.hour, now.minute, now.second + i))): showYoloResult( os.path.join( 'CameraData', '%d_%d_%d' % (now.hour, now.minute, now.second + i)), YoloVid, False) showSiamFCResult( os.path.join( 'CameraData', '%d_%d_%d' % (now.hour, now.minute, now.second + i)), SiamfcVid, True) i = i + 1 else: time.sleep(.2) except (KeyboardInterrupt, SystemExit): notDone = False else: for dirName in dirList: if (doYolo): runYolo(dirName, database, fourcc, testWidth, testHeight, graph, scfg) if (genVideos): showYoloResult(dirName, YoloVid, False) if (doSiamfc and dirName == dirList[0]): runSiamfc(dirName, fourcc, testWidth, testHeight, graph, scfg) if (genVideos): showSiamFCResult(dirName, SiamfcVid, True) # haha.join() YoloVid.release() SiamfcVid.release() return
def runSiamfc(folderPath, fourcc, testWidth, testHeight, graph, scfg): print('Running Siamfc: ' + folderPath) frame_name_list = _init_video(folderPath) hp, evaluation, run, env, design = parse_arguments() final_score_sz = hp.response_up * (design.score_sz - 1) + 1 fp = open(os.path.join(folderPath, 'YoloBoxes.txt')) filename, image, templates_z, scores, graph1, scfg1 = siam.build_tracking_graph( final_score_sz, design, env) finalImages = [] Allbboxes = [] SiamfcVid = cv2.VideoWriter(join(folderPath, 'SiamfcVid.avi'), fourcc, 10, (testWidth, testHeight)) f = open(folderPath + "/SiamfcBoxes.txt", "w+") nucAngles = open(os.path.join(folderPath, "nuclearAngles.txt"), "r") print(refreshRate) for i in range(len(frame_name_list)): line = fp.readline() finalImages = [] if (line == '\n'): continue elif (i % refreshRate == 0): boxes = line[:-1].split(':') boxNr = 0 for j in Allbboxes: #label = j.label #pos_x = j.positions[len(j.positions)-1][0] #pos_y = j.positions[len(j.positions)-1][1] #target_w = j.positions[len(j.positions)-1][2] #target_h = j.positions[len(j.positions)-1][3] #bboxes, speed, finalImages = tracker(graph1, scfg1, hp, run, design, frame_name_list[i:i+refreshRate-1], pos_x, pos_y, target_w, target_h, final_score_sz, # filename, image, templates_z, scores, label,0,colors[boxNr%len(colors)],0,refreshRate-1, finalImages, 0) #j.positions = np.concatenate((j.positions,bboxes),0) j.padafter(refreshRate) for j in boxes: box = j.split(',') label = box[0] box = map(int, box[1:]) print( 'In folder %s Image %d has a box at %d,%d,%d,%d with label %s' % (folderPath, i, box[0], box[1], box[2], box[3], label)) pos_x = box[0] pos_y = box[1] target_w = box[2] target_h = box[3] print('Pos_x: %d, Pos_y:%d, width:%d, height:%d' % (pos_x, pos_y, target_w, target_h)) bboxes, speed, finalImages = tracker( graph1, scfg1, hp, run, design, frame_name_list[i:i + refreshRate - 1], pos_x, pos_y, target_w, target_h, final_score_sz, filename, image, templates_z, scores, label, 0, colors[boxNr % len(colors)], 0, refreshRate - 1, finalImages, 0) newBox = sfc_bbox(colors[boxNr % len(colors)], label, bboxes, 0) newBox.padfront(i) Allbboxes.append(newBox) boxNr = boxNr + 1 print(bboxes) fname = i probs = [0] * len(Allbboxes) #print(Allbboxes) if (liveFeed): try: oldFolderPath = folderPath.split('_') oldFolderPath[2] = str(int(oldFolderPath[2]) - 1) underscore = '_' oldFolderPath = underscore.join(oldFolderPath) except: print("Could not load old probabilies") oldFolderPath = "" else: oldFolderPath = "" if (os.path.isfile(oldFolderPath + "/SiamfcBoxes.txt")): print('Extracting old probs') probs = getOldProbs(oldFolderPath, Allbboxes, [(item.split(','))[0] for item in boxes]) print(probs) for j in range(len(finalImages)): #print(probs) angle = int(nucAngles.readline()) calcProbs(finalImages[j], angle, Allbboxes, i + j, f) cv2.circle(finalImages[j], (int(float(1.0 - angle / 180.0) * testWidth), int(testHeight / 2)), 10, (0, 0, 225), -1) SiamfcVid.write(finalImages[j]) cv2.imwrite(frame_name_list[j][0:-4] + '_siamfc.png', finalImages[j]) fname = fname + 1 #break f.close() SiamfcVid.release() return
def main(): #avoid printing TF debugging information #仅显示error log os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' #TODO:allow parameters from command line or leave everything in json files? hp, evaluation, run, env, design = parse_arguments() #gt_,frame_name_list_,_,_=_init_video(env,evaluation,evaluation.video) #pos_x,pos_y,target_w,target_h=region_to_bbox(gt_[0]) #print('---target_w---'+"%d"%target_w+'--target_h---'+"%d"%target_h) #why????????????? #Set size for use with tf.image.resize_images with align_corners=True #For example: # [1,4,7]=>[1 2 3 4 5 6 7] (length 3*(3-1)+1) #instead of #[1,4,7]=>[1 1 2 3 4 5 6 7 7](length 3*3) #Why hp.response_up??? #design.score_sz=33 #hp.response_up=8 final_score_sz = hp.response_up * (design.score_sz - 1) + 1 #build TF graph once for all #filename,image,templates_z,scores are only processes.!!! #真正返回信息需要用sess去执行(tracker中执行) #return filename, image, templates_z, scores_up filename, image, templates_z, scores = siam.build_tracking_graph( final_score_sz, design, env) #iterate through all videos of evaluation dataset if evaluation.video == 'all': dataset_folder = os.path.join(env.root_dataset, evaluation.dataset) #os.listdir(path):返回指定路径下的文件和文件夹 videos_list = [v for v in os.listdir(dataset_folder)] videos_list.sort() nv = np.size(videos_list) speed = np.zeros(nv * evaluation.n_subseq) precisions = np.zeros(nv * evaluation.n_subseq) precisions_auc = np.zeros(nv * evaluation.n_subseq) ious = np.zeros(nv * evaluation.n_subseq) lengths = np.zeros(nv * evaluation.n_subseq) #遍历不同的视频样本 for i in range(nv): #frame_name_list:each image of a video sequence gt, frame_name_list, frame_sz, n_frames = _init_video( env, evaluation, videos_list[i]) #np.rint():对浮点数取整但不改变浮点数类型 #n_subseq=3 starts = np.rint( np.linspace(0, n_frame - 1, evaluation.n_subseq + 1)) #分成n_subseq+1份,将数组赋给starts starts = starts[0:evaluation.n_subseq] for j in range(evaluation.n_subseq): start_frame = int(starts[j]) #start_frame:指start_frame及以后(选取了n_subseq中的一份) gt_ = gt[start_frame:, :] frame_name_list_ = frame_name_list[start_frame:] pos_x, pos_y, target_w, target_h = region_to_bbox(gt_[0]) idx = i * evaluation.n_subseq + j #Update bboxes, speed[idx] = tracker( hp, run, design, env, evaluation, frame_name_list_, pos_x, pos_y, #bboxes,speed[idx]=tracker(hp,run,design,frame_name_list_,pos_x,pos_y, target_w, target_h, final_score_sz, filename, image, templates_z, scores, start_frame) #gt_:ground truth #bboxes:the result of tracking lengths[idx], precisions[idx], precisions_auc[idx], ious[ idx] = _compile_results(gt_, bboxes, evaluation.dist_threshold) print( str(i) + '--' + videos_list[i] + '--Precision: ' + "%.2f" % precisions[idx] + '--Precisions AUC: ' + "%.2f" % precisions_auc[idx] + '--IOU: ' + "%.2f" % ious[idx] + '--Speed: ' + "%.2f" % speed[idx] + '--') else: #evaluation.video='all' print(evaluation.video) gt, frame_name_list, _, _ = _init_video(env, evaluation, evaluation.video) #evaluation.start_frame=0 pos_x, pos_y, target_w, target_h = region_to_bbox( gt[evaluation.start_frame]) #Update #bboxes,speed=tracker(hp,run,design,frame_name_list,pos_x,pos_y,target_w,target_h,final_score_sz, bboxes, speed = tracker(hp, run, design, env, evaluation, frame_name_list, pos_x, pos_y, target_w, target_h, final_score_sz, filename, image, templates_z, scores, evaluation.start_frame) _, precision, precisions_auc, iou = _compile_results( gt, bboxes, evaluation.dist_threshold) #print(evaluation.video+ print(evaluation.video + '--Precision: ' + "(%d px)" % evaluation.dist_threshold + ': ' + "%.2f" % precision + '--Precisions AUC: ' + "%.2f" % precisions_auc + '--IOU: ' + "%.2f" % iou + '--Speed: ' + "%.2f" % speed + '--')
def main(): # avoid printing TF debugging information os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # TODO: allow parameters from command line or leave everything in json files? hp, evaluation, run, env, design = parse_arguments() # Set size for use with tf.image.resize_images with align_corners=True. # For example, # [1 4 7] => [1 2 3 4 5 6 7] (length 3*(3-1)+1) # instead of # [1 4 7] => [1 1 2 3 4 5 6 7 7] (length 3*3) final_score_sz = hp.response_up * (design.score_sz - 1) + 1 # build TF graph once for all filename, image, templates_z, scores = siam.build_tracking_graph( final_score_sz, design, env) # iterate through all videos of evaluation.dataset if evaluation.video == 'all': dataset_folder = os.path.join(env.root_dataset, evaluation.dataset) videos_list = [v for v in os.listdir(dataset_folder)] videos_list.sort() nv = np.size(videos_list) speed = np.zeros(nv * evaluation.n_subseq) precisions = np.zeros(nv * evaluation.n_subseq) precisions_auc = np.zeros(nv * evaluation.n_subseq) ious = np.zeros(nv * evaluation.n_subseq) lengths = np.zeros(nv * evaluation.n_subseq) for i in range(nv): gt, frame_name_list, frame_sz, n_frames = _init_video( env, evaluation, videos_list[i]) starts = np.rint( np.linspace(0, n_frames - 1, evaluation.n_subseq + 1)) starts = starts[0:evaluation.n_subseq] for j in range(evaluation.n_subseq): start_frame = int(starts[j]) gt_ = gt[start_frame:, :] frame_name_list_ = frame_name_list[start_frame:] pos_x, pos_y, target_w, target_h = region_to_bbox(gt_[0]) idx = i * evaluation.n_subseq + j # Call Tracker for the selected sequence print("Tracking started!") bboxes, speed[idx] = tracker(hp, run, design, frame_name_list_, pos_x, pos_y, target_w, target_h, final_score_sz, filename, image, templates_z, scores, start_frame) lengths[idx], precisions[idx], precisions_auc[idx], ious[ idx] = _compile_results(gt_, bboxes, evaluation.dist_threshold) print( str(i) + ' -- ' + videos_list[i] + \ ' -- Precision: ' + "%.2f" % precisions[idx] + \ ' -- Precisions AUC: ' + "%.2f" % precisions_auc[idx] + \ ' -- IOU: ' + "%.2f" % ious[idx] + \ ' -- Speed: ' + "%.2f" % speed[idx] + ' --' ) tot_frames = np.sum(lengths) mean_precision = np.sum(precisions * lengths) / tot_frames mean_precision_auc = np.sum(precisions_auc * lengths) / tot_frames mean_iou = np.sum(ious * lengths) / tot_frames mean_speed = np.sum(speed * lengths) / tot_frames print('-- Overall stats (averaged per frame) on ' + str(nv) + ' videos (' + str(tot_frames) + ' frames) --') print( ' -- Precision ' + "(%d px)" % evaluation.dist_threshold + ': ' + "%.2f" % mean_precision +\ ' -- Precisions AUC: ' + "%.2f" % mean_precision_auc +\ ' -- IOU: ' + "%.2f" % mean_iou +\ ' -- Speed: ' + "%.2f" % mean_speed + ' --' ) else: gt, frame_name_list, _, n_frames = _init_video(env, evaluation, evaluation.video) #pos_x, pos_y, target_w, target_h = region_to_bbox(gt[evaluation.start_frame]) # np.size(frame_name_list) = Amount of frames # ott = amount of Objects To Track ott = len(gt) if evaluation.multi_object else 1 objects = np.zeros((ott, 4)) for i in range(ott): objects[i, :] = region_to_bbox(gt[i]) # Call Tracker for the selected sequence. print("Tracking started!") bboxes, speed = tracker(hp, run, design, frame_name_list, objects, final_score_sz, filename, image, templates_z, scores, evaluation.start_frame) if evaluation.multi_object: print('No Ground Truth available for multi object, just printing speed result....\n' + \ evaluation.video + \ ' -- Speed: ' + "%.2f" % speed + ' --' ) else: _, precision, precision_auc, iou = _compile_results( gt, bboxes, evaluation.dist_threshold) print( evaluation.video + \ ' -- Precision ' + "(%d px)" % evaluation.dist_threshold + ': ' + "%.2f" % precision +\ ' -- Precision AUC: ' + "%.2f" % precision_auc + \ ' -- IOU: ' + "%.2f" % iou + \ ' -- Speed: ' + "%.2f" % speed + ' --' ) print("Tracking finished!")
def main(): # avoid printing TF debugging information os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # TODO: allow parameters from command line or leave everything in json files? hp, evaluation, run, env, design = parse_arguments() # Set size for use with tf.image.resize_images with align_corners=True. # For example, # [1 4 7] => [1 2 3 4 5 6 7] (length 3*(3-1)+1) # instead of # [1 4 7] => [1 1 2 3 4 5 6 7 7] (length 3*3) final_score_sz = hp.response_up * (design.score_sz - 1) + 1 # build TF graph once for all filename, image, templates_z, templates_x, scores, scores_original = siam.build_tracking_graph( final_score_sz, design, env) # create dataset dataset = DatasetFactory.create_dataset(name=args_dataset, dataset_root=dataset_root, load_img=False) # iterate through all videos of evaluation.dataset videos_list = list(dataset.videos.keys()) videos_list.sort() nv = np.size(videos_list) for i in range(nv): current_key = sorted(list(dataset.videos.keys()))[i] gt, frame_name_list, frame_sz, n_frames = _init_video( dataset, current_key) for j in range(1): start_frame = 0 gt_ = gt[start_frame:, :] frame_name_list_ = frame_name_list[start_frame:] pos_x, pos_y, target_w, target_h = region_to_bbox(gt_[0]) bboxes, _ = tracker(videos_list[i], hp, run, design, frame_name_list_, pos_x, pos_y, target_w, target_h, final_score_sz, filename, image, templates_z, templates_x, scores, scores_original, start_frame) #Visualize if visualize: for bbox, groundt, frame_name in zip(bboxes, gt_, frame_name_list_): image = cv2.imread(frame_name) bbox_pt1, bbox_pt2 = get_bbox_cv(bbox) bbox_gt1, bbox_gt2 = get_gt_bbox_cv(groundt) #Draw result cv2.rectangle(image, bbox_pt1, bbox_pt2, (0, 255, 0)) #Draw ground truth cv2.rectangle(image, bbox_gt1, bbox_gt2, (0, 0, 0)) cv2.imshow("Results:", image) cv2.waitKey() bboxes = bboxes.tolist() bboxes[0] = [1] target_dir = os.path.join(result_output, current_key) if not os.path.exists(target_dir): os.mkdir(target_dir) results_file = current_key + "_" + "{:03d}".format(1) + ".txt" results_abs_file = os.path.join(target_dir, results_file) with open(results_abs_file, "w") as f: for bbox in bboxes: if len(bbox) == 1: f.write('%d\n' % (bbox[0])) else: f.write('%.2f, %.2f, %.2f, %.2f\n' % (bbox[0], bbox[1], bbox[2], bbox[3]))