def evaluate(): # avoid printing TF debugging information os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # TODO: allow parameters from command line or leave everything in json files? hp, evaluation, run, env, design = parse_arguments() # Set size for use with tf.image.resize_images with align_corners=True. # For example, # [1 4 7] => [1 2 3 4 5 6 7] (length 3*(3-1)+1) # instead of # [1 4 7] => [1 1 2 3 4 5 6 7 7] (length 3*3) final_score_sz = hp.response_up * (design.score_sz - 1) + 1 # build TF graph once for all gt, frame_name_list, _, _ = _init_video(env, evaluation, evaluation.video) frame_sz = [i for i in cv2.imread(frame_name_list[0]).shape] siamNet = siam.Siamese(batch_size = 1); image, z_crops, x_crops, templates_z, scores, loss, _, distance_to_gt, summary, templates_x, max_pos_x, max_pos_y = siamNet.build_tracking_graph_train(final_score_sz, design, env, hp, frame_sz) pos_x, pos_y, target_w, target_h = region_to_bbox(gt[evaluation.start_frame]) bboxes, speed = tracker(hp, run, design, frame_name_list, pos_x, pos_y, target_w, target_h, final_score_sz, image, templates_z, scores, evaluation.start_frame, path_ckpt = os.path.join(design.saver_folder, design.path_ckpt), siamNet = siamNet) _, precision, precision_auc, iou = _compile_results(gt, bboxes, evaluation.dist_threshold) print(evaluation.video + \ ' -- Precision ' + "(%d px)" % evaluation.dist_threshold + ': ' + "%.2f" % precision +\ ' -- Precision AUC: ' + "%.2f" % precision_auc + \ ' -- IOU: ' + "%.2f" % iou + \ ' -- Speed: ' + "%.2f" % speed + ' --')
def main(im, bbox): # TODO: allow parameters from command line or leave everything in json files? hp, evaluation, run, env, design = parse_arguments() final_score_sz = hp.response_up * (design.score_sz - 1) + 1 siam = SiameseNet(env.root_pretrained, design.net) with Image.fromarray(im) as img: frame_sz = np.asarray(img.size) frame_sz[1], frame_sz[0] = frame_sz[0], frame_sz[1] im = Image.fromarray(im) torch.save(siam.state_dict(), '/home/nvidia/jlaplaza/siamfc_pytorch_test/siamfc_pretrained.pt') #gt, frame_name_list, _, _ = _init_video(env, evaluation, evaluation.video) pos_x, pos_y, target_w, target_h = region_to_bbox(bbox) print(target_w, target_h) # bboxes, speed = tracker(hp, run, design, frame_name_list, pos_x, pos_y, target_w, target_h, final_score_sz, # filename, image, templates_z, scores, evaluation.start_frame) tracker(hp, run, design, im, pos_x, pos_y, target_w, target_h, final_score_sz, siam, evaluation.start_frame) """
def _compile_results(gt, bboxes, dist_threshold): l = np.size(bboxes, 0) gt4 = np.zeros((l, 4)) new_distance = np.zeros(l) new_ious = np.zeros(l) n_thresholds = 50 precisions_ths = np.zeros(n_thresholds) for i in range(l): gt4[i, :] = region_to_bbox(gt[i, :], center=False) new_distance[i] = _compute_distance(bboxes[i, :], gt4[i, :]) new_ious = _compute_iou(bboxes[i, :], gt4[i, :]) precision = sum( new_distance < dist_threshold) / np.size(new_distance) * 100 # find above result for many thresholds, then report the AUC thresholds = np.linspace(0, 25, n_thresholds + 1) thresholds = thresholds[-n_thresholds:] # reverse it so that higher values of precision goes at the begainning thresholds = thresholds[::-1] for i in range(n_thresholds): precisions_ths[i] = sum( new_distance < thresholds[i]) / np.size(new_distance) precision_auc = np.trapz(precisions_ths) iou = np.mean(new_ious) * 100 return l, precision, precision_auc, iou
def _compile_results(gt, bboxes, dist_threshold): l = np.size(bboxes, 0) gt4 = np.zeros((l, 4)) new_distances = np.zeros(l) new_ious = np.zeros(l) n_thresholds = 50 precisions_ths = np.zeros(n_thresholds) for i in range(l): gt4[i, :] = region_to_bbox(gt[i, :], center=False) new_distances[i] = _compute_distance(bboxes[i, :], gt4[i, :]) new_ious[i] = _compute_iou(bboxes[i, :], gt4[i, :]) # what's the percentage of frame in which center displacement is inferior to given threshold? (OTB metric) precision = sum(new_distances < dist_threshold)/np.size(new_distances) * 100 # find above result for many thresholds, then report the AUC thresholds = np.linspace(0, 25, n_thresholds+1) thresholds = thresholds[-n_thresholds:] # reverse it so that higher values of precision goes at the beginning thresholds = thresholds[::-1] for i in range(n_thresholds): precisions_ths[i] = sum(new_distances < thresholds[i])/np.size(new_distances) # integrate over the thresholds precision_auc = np.trapz(precisions_ths) # per frame averaged intersection over union (OTB metric) iou = np.mean(new_ious) * 100 return l, precision, precision_auc, iou
def train_siam_net(design,hp,frame_name_list,num_frames,gt,filename,conv_W,conv_b,siam_net_z,loss,train_op): #------------------------------------------------------------------------- #index_z:the index of template in the frame_name_list #------------------------------------------------------------------------- with tf.Session() as sess: #tf.global_variables_initializer().run() sess.run(tf.global_variables_initializer()) #tf.local_variables_initializer().run() #Coordinate the loading of image files coord=tf.train.Coordinator() threads=tf.train.start_queue_runners(coord=coord) #TB merged=tf.summary.merge_all() writer=tf.summary.FileWriter('/tmp/tensorlogs/siamtf',sess.graph) for i in range(0,num_frames-1): pos_x,pos_y,target_w,target_h=region_to_bbox(gt[i]) #connect the context to get the size of x and z crops t_sz=(target_w+target_h)*design.context_amount w_crop_z=target_w+t_sz h_crop_z=target_h+t_sz sz_z=np.sqrt(float(w_crop_z)*float(h_crop_z)) sz_x=float(design.instacneSize)/float(design.exemplarSize)*sz_z siam_net_z_ = sess.run([siam_net_z],feed_dict={ #sess.run([train_op],feed_dict={ siam.pos_x:pos_x, siam.pos_y:pos_y, siam.z_size:sz_z, filename:frame_name_list[i]}) #t_start=time.time() #print('begin') #train the image which is the pair of siam_net_z result,train_op_=sess.run([merged,train_op],feed_dict={ siam.pos_x:pos_x, siam.pos_y:pos_y, siam.z_size:sz_z, siam.x_size:float(sz_x), siam_net_z:siam_net_z_[0], filename:frame_name_list[i+1] }) writer.add_summary(result,i) #print('loss end') #train --back propagation #tf.train.AdamOptimizer(hp.learning_rate).minimize(loss_) coord.request_stop() coord.join(threads)
def _compile_results(gt, bboxes, dist_threshold): """ Computes the results for one sequence based on the tracking bounding boxes. Args: gt: Nx4 array: ground truth bounding boxes. bboxes: Nx4 array: predicted bounding boxes. dist_threshold: int: threshold in pixels to calculate the precision. Returns: int: number of boxes/frames in the sequence. float: precision of the results. float: precision AuC of the results. float: IoU of the results. float: success rate of the results. """ l = np.size(bboxes, 0) gt4 = np.zeros((l, 4)) new_distances = np.zeros(l) new_ious = np.zeros(l) n_thresholds = 50 precisions_ths = np.zeros(n_thresholds) for i in range(l): gt4[i, :] = region_to_bbox(gt[i, :], center=False) new_distances[i] = _compute_distance(bboxes[i, :], gt4[i, :]) new_ious[i] = _compute_iou(bboxes[i, :], gt4[i, :]) # what's the percentage of frame in which center displacement is inferior # to given threshold? (OTB metric) precision = (sum(new_distances < dist_threshold) / np.size(new_distances) * 100) success = sum(new_ious > 0.5) / np.size(new_ious) * 100 # find above result for many thresholds, then report the AUC thresholds = np.linspace(0, 25, n_thresholds + 1) thresholds = thresholds[-n_thresholds:] # reverse it so that higher values of precision goes at the beginning thresholds = thresholds[::-1] for i in range(n_thresholds): precisions_ths[i] = (sum(new_distances < thresholds[i]) / np.size(new_distances)) # integrate over the thresholds precision_auc = np.trapz(precisions_ths) # per frame averaged intersection over union (OTB metric) iou = np.mean(new_ious) * 100 return l, precision, precision_auc, iou, success
def main(): #avoid printing TF debugging information(only show error log) os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' hp, evaluation, run, env, design = parse_arguments() #build TF graph in siamese once for all #siam.init_create_net() filename, siam_net_z, loss, train_op = siam.make_siameseFC(env, design, hp) #iterate through all videos of evaluation.dataset if evaluation.video == 'all': #the path of folder of all videos train_data_folder = os.path.join(env.root_train_dataset, evaluation.dataset) videos_list = [v for v in os.listdir(train_data_folder)] videos_list.sort() num_v = len(videos_list) for i in range(num_v): gt, frame_name_list, frame_sz, n_frames = _init_train_video( env, evaluation, videos_list[i]) start_frame = evaluation.start_frame #not sure #gt_=gt[start_frame:,:] gt_ = gt[start_frame:] frame_name_list_ = frame_name_list[start_frame:] num_frames = np.size(frame_name_list_) for j in range(num_frames - 1): pos_x, pos_y, target_w, target_h = region_to_bbox(gt_[j]) #train_siam_net(design,hp,frame_name_list,z_index,pos_x,pos_y,target_w,target_h,filename,siam_net_z,loss) train_siam_net(design, hp, frame_name_list, j, pos_x, pos_y, target_w, target_h, filename, siam_net_z, loss, train_op) else: gt, frame_name_list, _, _ = _init_train_video(env, evaluation, evaluation.video) start_frame = evaluation.start_frame gt_ = gt[start_frame:] frame_name_list_ = frame_name_list[start_frame:] num_frames = np.size(frame_name_list_) train_siam_net(design, hp, frame_name_list, num_frames, gt, filename, siam_net_z, loss, train_op) '''for i in range(num_frames-1):
def main(): # avoid printing TF debugging information os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # TODO: allow parameters from command line or leave everything in json files? hp, evaluation, run, env, design = parse_arguments() # Set size for use with tf.image.resize_images with align_corners=True. # For example, # [1 4 7] => [1 2 3 4 5 6 7] (length 3*(3-1)+1) # instead of # [1 4 7] => [1 1 2 3 4 5 6 7 7] (length 3*3) final_score_sz = hp.response_up * (design.score_sz - 1) + 1 # build TF graph once for all image, templates_z, scores = siam.build_tracking_graph_2( final_score_sz, design, env) # iterate through all videos of evaluation.dataset gt, frame_name_list, _, _ = _init_video(env, evaluation, evaluation.video) pos_x, pos_y, target_w, target_h = region_to_bbox( gt[evaluation.start_frame]) track_cam(hp, run, design, final_score_sz, image, templates_z, scores, evaluation.start_frame)
def _compile_results(gt, bboxes, dist_threshold): l = np.size(bboxes, 0) #np.zeros(shape=(1,4),dtype=float, order='C') gt4 = np.zeros((l, 4)) new_distances = np.zeros(l) new_ious = np.zeros(l) n_thresholds = 50 precisions_ths = np.zeros(n_thresholds) for i in range(l): gt4[i, :] = region_to_bbox(gt[i, :], center=False) new_distances[i] = _compute_distance(bboxes[i, :], gt4[i, :]) #计算重叠率 new_ious[i] = _compute_iou(bboxes[i, :], gt4[i, :]) #what's the percentage of from in which center displacement is inferior to given threshold?(OTB metric) #sum(new_distances<dist_threshold):get the number of (new_distances<dist_threshold) precision = sum( new_distances < dist_threshold) / np.size(new_distances) * 100 #find above result for many thresholds,then report the AUC thresholds = np.linspace(0, 25, n_thresholds + 1) #get the number from the index of 1 thresholds = thresholds[-n_thresholds:] #!!!reverse it so that higer values of precision goes at the beginning thresholds = thresholds[::-1] for i in range(n_thresholds): precisions_ths[i] = sum( new_distances < thresholds[i]) / np.size(new_distances) #integrate over the thresholds #AUC(Area Under Curve)被定义为ROC曲线下的面积 precision_auc = np.trapz(precisions_ths) #per frame averaged interseciton over union (OTB metric) iou = np.mean(new_ious) * 100 return l, precision, precision_auc, iou
def initialize(self, image_file, box): pos_x, pos_y, target_w, target_h = region_to_bbox(box) self.tracker = SiamMcfTracker( self.design.context, self.design.exemplar_sz, self.design.search_sz, self.hp.scale_step, self.hp.scale_num, self.hp.scale_penalty, self.hp.scale_lr, self.hp.window_influence, self.design.tot_stride, self.hp.response_up, self.final_score_sz, pos_x, pos_y, target_w, target_h, image_file, self.sess, self.templates_z, self.filename, )
def evaluate(): # avoid printing TF debugging information os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # TODO: allow parameters from command line or leave everything in json files? hp, evaluation, run, env, design = parse_arguments() # Set size for use with tf.image.resize_images with align_corners=True. # For example, # [1 4 7] => [1 2 3 4 5 6 7] (length 3*(3-1)+1) # instead of # [1 4 7] => [1 1 2 3 4 5 6 7 7] (length 3*3) final_score_sz = hp.response_up * (design.score_sz - 1) + 1 # build the computational graph of Siamese fully-convolutional network siamNet = siam.Siamese(batch_size=1) # get tensors that will be used during tracking image, z_crops, x_crops, templates_z, scores, loss, _, distance_to_gt, summary = siamNet.build_tracking_graph_train( final_score_sz, design, env, hp) # iterate through all videos of evaluation.dataset if evaluation.video == 'all': dataset_folder = os.path.join(env.root_dataset, evaluation.dataset) videos_list = [v for v in os.listdir(dataset_folder)] videos_list.sort() nv = np.size(videos_list) speed = np.zeros(nv) precisions = np.zeros(nv) precisions_auc = np.zeros(nv) ious = np.zeros(nv) lengths = np.zeros(nv) for i in range(nv): gt, frame_name_list, frame_sz, n_frames = _init_video( env, evaluation, videos_list[i]) gt_ = gt[0:, :] frame_name_list_ = frame_name_list[0:] pos_x, pos_y, target_w, target_h = region_to_bbox( gt_[0] ) # coordinate of gt is the bottom left point of the bbox idx = i bboxes, speed[idx] = tracker(hp, run, design, frame_name_list, pos_x, pos_y, target_w, target_h, final_score_sz, image, templates_z, scores, path_ckpt=os.path.join( design.saver_folder, design.path_ckpt), siamNet=siamNet) lengths[idx], precisions[idx], precisions_auc[idx], ious[ idx] = _compile_results(gt_, bboxes, evaluation.dist_threshold) print(str(i) + ' -- ' + videos_list[i] + \ ' -- Precision: ' + "%.2f" % precisions[idx] + \ ' -- Precisions AUC: ' + "%.2f" % precisions_auc[idx] + \ ' -- IOU: ' + "%.2f" % ious[idx] + \ ' -- Speed: ' + "%.2f" % speed[idx] + ' --') tot_frames = np.sum(lengths) mean_precision = np.sum(precisions * lengths) / tot_frames mean_precision_auc = np.sum(precisions_auc * lengths) / tot_frames mean_iou = np.sum(ious * lengths) / tot_frames mean_speed = np.sum(speed * lengths) / tot_frames print('-- Overall stats (averaged per frame) on ' + str(nv) + ' videos (' + str(tot_frames) + ' frames) --') print(' -- Precision ' + "(%d px)" % evaluation.dist_threshold + ': ' + "%.2f" % mean_precision +\ ' -- Precisions AUC: ' + "%.2f" % mean_precision_auc +\ ' -- IOU: ' + "%.2f" % mean_iou +\ ' -- Speed: ' + "%.2f" % mean_speed + ' --') #evaluate only one vedio else: gt, frame_name_list, frame_sz, n_frames = _init_video( env, evaluation, evaluation.video) pos_x, pos_y, target_w, target_h = region_to_bbox(gt[0]) bboxes, speed = tracker(hp, run, design, frame_name_list, pos_x, pos_y, target_w, target_h, final_score_sz, image, templates_z, scores, path_ckpt=os.path.join(design.saver_folder, design.path_ckpt), siamNet=siamNet) _, precision, precision_auc, iou = _compile_results( gt, bboxes, evaluation.dist_threshold) print(evaluation.video + \ ' -- Precision ' + "(%d px)" % evaluation.dist_threshold + ': ' + "%.2f" % precision +\ ' -- Precision AUC: ' + "%.2f" % precision_auc + \ ' -- IOU: ' + "%.2f" % iou + \ ' -- Speed: ' + "%.2f" % speed + ' --') return precision, precision_auc, iou, speed
def main(): # avoid printing TF debugging information os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # TODO: allow parameters from command line or leave everything in json files? hp, evaluation, run, env, design = parse_arguments() # Set size for use with tf.image.resize_images with align_corners=True. # For example, # [1 4 7] => [1 2 3 4 5 6 7] (length 3*(3-1)+1) # instead of # [1 4 7] => [1 1 2 3 4 5 6 7 7] (length 3*3) final_score_sz = hp.response_up * (design.score_sz - 1) + 1 # build TF graph once for all filename, image, templates_z, scores = siam.build_tracking_graph( final_score_sz, design, env) # iterate through all videos of evaluation.dataset if evaluation.video == 'all': dataset_folder = os.path.join(env.root_dataset, evaluation.dataset) videos_list = [v for v in os.listdir(dataset_folder)] videos_list.sort() nv = np.size(videos_list) speed = np.zeros(nv * evaluation.n_subseq) precisions = np.zeros(nv * evaluation.n_subseq) precisions_auc = np.zeros(nv * evaluation.n_subseq) ious = np.zeros(nv * evaluation.n_subseq) lengths = np.zeros(nv * evaluation.n_subseq) for i in range(nv): gt, frame_name_list, frame_sz, n_frames = _init_video( env, evaluation, videos_list[i]) starts = np.rint( np.linspace(0, n_frames - 1, evaluation.n_subseq + 1)) starts = starts[0:evaluation.n_subseq] for j in range(evaluation.n_subseq): start_frame = int(starts[j]) gt_ = gt[start_frame:, :] frame_name_list_ = frame_name_list[start_frame:] pos_x, pos_y, target_w, target_h = region_to_bbox(gt_[0]) idx = i * evaluation.n_subseq + j # Call Tracker for the selected sequence print("Tracking started!") bboxes, speed[idx] = tracker(hp, run, design, frame_name_list_, pos_x, pos_y, target_w, target_h, final_score_sz, filename, image, templates_z, scores, start_frame) lengths[idx], precisions[idx], precisions_auc[idx], ious[ idx] = _compile_results(gt_, bboxes, evaluation.dist_threshold) print( str(i) + ' -- ' + videos_list[i] + \ ' -- Precision: ' + "%.2f" % precisions[idx] + \ ' -- Precisions AUC: ' + "%.2f" % precisions_auc[idx] + \ ' -- IOU: ' + "%.2f" % ious[idx] + \ ' -- Speed: ' + "%.2f" % speed[idx] + ' --' ) tot_frames = np.sum(lengths) mean_precision = np.sum(precisions * lengths) / tot_frames mean_precision_auc = np.sum(precisions_auc * lengths) / tot_frames mean_iou = np.sum(ious * lengths) / tot_frames mean_speed = np.sum(speed * lengths) / tot_frames print('-- Overall stats (averaged per frame) on ' + str(nv) + ' videos (' + str(tot_frames) + ' frames) --') print( ' -- Precision ' + "(%d px)" % evaluation.dist_threshold + ': ' + "%.2f" % mean_precision +\ ' -- Precisions AUC: ' + "%.2f" % mean_precision_auc +\ ' -- IOU: ' + "%.2f" % mean_iou +\ ' -- Speed: ' + "%.2f" % mean_speed + ' --' ) else: gt, frame_name_list, _, n_frames = _init_video(env, evaluation, evaluation.video) #pos_x, pos_y, target_w, target_h = region_to_bbox(gt[evaluation.start_frame]) # np.size(frame_name_list) = Amount of frames # ott = amount of Objects To Track ott = len(gt) if evaluation.multi_object else 1 objects = np.zeros((ott, 4)) for i in range(ott): objects[i, :] = region_to_bbox(gt[i]) # Call Tracker for the selected sequence. print("Tracking started!") bboxes, speed = tracker(hp, run, design, frame_name_list, objects, final_score_sz, filename, image, templates_z, scores, evaluation.start_frame) if evaluation.multi_object: print('No Ground Truth available for multi object, just printing speed result....\n' + \ evaluation.video + \ ' -- Speed: ' + "%.2f" % speed + ' --' ) else: _, precision, precision_auc, iou = _compile_results( gt, bboxes, evaluation.dist_threshold) print( evaluation.video + \ ' -- Precision ' + "(%d px)" % evaluation.dist_threshold + ': ' + "%.2f" % precision +\ ' -- Precision AUC: ' + "%.2f" % precision_auc + \ ' -- IOU: ' + "%.2f" % iou + \ ' -- Speed: ' + "%.2f" % speed + ' --' ) print("Tracking finished!")
def main(): # TODO: allow parameters from command line or leave everything in json files? hp, evaluation, run, env, design = parse_arguments() final_score_sz = hp.response_up * (design.score_sz - 1) + 1 siam = SiameseNet(env.root_pretrained, design.net) # iterate through all videos of evaluation.dataset if evaluation.video == 'all': dataset_folder = os.path.join(env.root_dataset, evaluation.dataset) videos_list = [v for v in os.listdir(dataset_folder) if not v[0] == '.'] videos_list.sort() nv = np.size(videos_list) speed = np.zeros(nv * evaluation.n_subseq) precisions = np.zeros(nv * evaluation.n_subseq) precisions_auc = np.zeros(nv * evaluation.n_subseq) ious = np.zeros(nv * evaluation.n_subseq) lengths = np.zeros(nv * evaluation.n_subseq) for i in range(nv): # -- Iterate through all videos print('video: %d' % (i + 1)) gt, frame_name_list, frame_sz, n_frames = _init_video(env, evaluation, videos_list[i]) starts = np.rint(np.linspace(0, n_frames - 1, evaluation.n_subseq + 1)) starts = starts[0:evaluation.n_subseq] for j in range(evaluation.n_subseq): # -- Iterate through a single video start_frame = int(starts[j]) gt_ = gt[start_frame:, :] frame_name_list_ = frame_name_list[start_frame:] pos_x, pos_y, target_w, target_h = region_to_bbox(gt_[0]) idx = i * evaluation.n_subseq + j # bboxes, speed[idx] = tracker(hp, run, design, frame_name_list_, pos_x, pos_y, # target_w, target_h, final_score_sz, filename, # image, templates_z, scores, start_frame) bboxes, speed[idx] = tracker(hp, run, design, frame_name_list_, pos_x, pos_y, target_w, target_h, final_score_sz, siam, start_frame) # -- here is where tracker.py is called lengths[idx], precisions[idx], precisions_auc[idx], ious[idx] = _compile_results(gt_, bboxes, evaluation.dist_threshold) print(str(i) + ' -- ' + videos_list[i] + \ ' -- Precision: ' + "%.2f" % precisions[idx] + \ ' -- Precisions AUC: ' + "%.2f" % precisions_auc[idx] + \ ' -- IOU: ' + "%.2f" % ious[idx] + \ ' -- Speed: ' + "%.2f" % speed[idx] + ' --\n') tot_frames = np.sum(lengths) mean_precision = np.sum(precisions * lengths) / tot_frames mean_precision_auc = np.sum(precisions_auc * lengths) / tot_frames mean_iou = np.sum(ious * lengths) / tot_frames mean_speed = np.sum(speed * lengths) / tot_frames print('-- Overall stats (averaged per frame) on ' + str(nv) + ' videos (' + str(tot_frames) + ' frames) --') print(' -- Precision ' + "(%d px)" % evaluation.dist_threshold + ': ' + "%.2f" % mean_precision +\ ' -- Precisions AUC: ' + "%.2f" % mean_precision_auc +\ ' -- IOU: ' + "%.2f" % mean_iou +\ ' -- Speed: ' + "%.2f" % mean_speed + ' --\n') else: gt, frame_name_list, _, _ = _init_video(env, evaluation, evaluation.video) pos_x, pos_y, target_w, target_h = region_to_bbox(gt[evaluation.start_frame]) # bboxes, speed = tracker(hp, run, design, frame_name_list, pos_x, pos_y, target_w, target_h, final_score_sz, # filename, image, templates_z, scores, evaluation.start_frame) bboxes, speed = tracker(hp, run, design, frame_name_list, pos_x, pos_y, target_w, target_h, final_score_sz, siam, evaluation.start_frame) _, precision, precision_auc, iou = _compile_results(gt, bboxes, evaluation.dist_threshold) print(evaluation.video + \ ' -- Precision ' + "(%d px)" % evaluation.dist_threshold + ': ' + "%.2f" % precision +\ ' -- Precision AUC: ' + "%.2f" % precision_auc + \ ' -- IOU: ' + "%.2f" % iou + \ ' -- Speed: ' + "%.2f" % speed + ' --\n')
def main(): #avoid printing TF debugging information #仅显示error log os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' #TODO:allow parameters from command line or leave everything in json files? hp, evaluation, run, env, design = parse_arguments() #gt_,frame_name_list_,_,_=_init_video(env,evaluation,evaluation.video) #pos_x,pos_y,target_w,target_h=region_to_bbox(gt_[0]) #print('---target_w---'+"%d"%target_w+'--target_h---'+"%d"%target_h) #why????????????? #Set size for use with tf.image.resize_images with align_corners=True #For example: # [1,4,7]=>[1 2 3 4 5 6 7] (length 3*(3-1)+1) #instead of #[1,4,7]=>[1 1 2 3 4 5 6 7 7](length 3*3) #Why hp.response_up??? #design.score_sz=33 #hp.response_up=8 final_score_sz = hp.response_up * (design.score_sz - 1) + 1 #build TF graph once for all #filename,image,templates_z,scores are only processes.!!! #真正返回信息需要用sess去执行(tracker中执行) #return filename, image, templates_z, scores_up filename, image, templates_z, scores = siam.build_tracking_graph( final_score_sz, design, env) #iterate through all videos of evaluation dataset if evaluation.video == 'all': dataset_folder = os.path.join(env.root_dataset, evaluation.dataset) #os.listdir(path):返回指定路径下的文件和文件夹 videos_list = [v for v in os.listdir(dataset_folder)] videos_list.sort() nv = np.size(videos_list) speed = np.zeros(nv * evaluation.n_subseq) precisions = np.zeros(nv * evaluation.n_subseq) precisions_auc = np.zeros(nv * evaluation.n_subseq) ious = np.zeros(nv * evaluation.n_subseq) lengths = np.zeros(nv * evaluation.n_subseq) #遍历不同的视频样本 for i in range(nv): #frame_name_list:each image of a video sequence gt, frame_name_list, frame_sz, n_frames = _init_video( env, evaluation, videos_list[i]) #np.rint():对浮点数取整但不改变浮点数类型 #n_subseq=3 starts = np.rint( np.linspace(0, n_frame - 1, evaluation.n_subseq + 1)) #分成n_subseq+1份,将数组赋给starts starts = starts[0:evaluation.n_subseq] for j in range(evaluation.n_subseq): start_frame = int(starts[j]) #start_frame:指start_frame及以后(选取了n_subseq中的一份) gt_ = gt[start_frame:, :] frame_name_list_ = frame_name_list[start_frame:] pos_x, pos_y, target_w, target_h = region_to_bbox(gt_[0]) idx = i * evaluation.n_subseq + j #Update bboxes, speed[idx] = tracker( hp, run, design, env, evaluation, frame_name_list_, pos_x, pos_y, #bboxes,speed[idx]=tracker(hp,run,design,frame_name_list_,pos_x,pos_y, target_w, target_h, final_score_sz, filename, image, templates_z, scores, start_frame) #gt_:ground truth #bboxes:the result of tracking lengths[idx], precisions[idx], precisions_auc[idx], ious[ idx] = _compile_results(gt_, bboxes, evaluation.dist_threshold) print( str(i) + '--' + videos_list[i] + '--Precision: ' + "%.2f" % precisions[idx] + '--Precisions AUC: ' + "%.2f" % precisions_auc[idx] + '--IOU: ' + "%.2f" % ious[idx] + '--Speed: ' + "%.2f" % speed[idx] + '--') else: #evaluation.video='all' print(evaluation.video) gt, frame_name_list, _, _ = _init_video(env, evaluation, evaluation.video) #evaluation.start_frame=0 pos_x, pos_y, target_w, target_h = region_to_bbox( gt[evaluation.start_frame]) #Update #bboxes,speed=tracker(hp,run,design,frame_name_list,pos_x,pos_y,target_w,target_h,final_score_sz, bboxes, speed = tracker(hp, run, design, env, evaluation, frame_name_list, pos_x, pos_y, target_w, target_h, final_score_sz, filename, image, templates_z, scores, evaluation.start_frame) _, precision, precisions_auc, iou = _compile_results( gt, bboxes, evaluation.dist_threshold) #print(evaluation.video+ print(evaluation.video + '--Precision: ' + "(%d px)" % evaluation.dist_threshold + ': ' + "%.2f" % precision + '--Precisions AUC: ' + "%.2f" % precisions_auc + '--IOU: ' + "%.2f" % iou + '--Speed: ' + "%.2f" % speed + '--')
def main(argv): # avoid printing TF debugging information os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' hp, evaluation, env, design = parse_arguments(root_dir) cmd_args = parse_command_line_arguments() if 'otb13' in cmd_args.dataset_name: dataset_type = 'otb13' elif 'otb15' in cmd_args.dataset_name: dataset_type = 'otb15' elif 'vot16' in cmd_args.dataset_name: dataset_type = 'vot16' elif 'vot17' in cmd_args.dataset_name: dataset_type = 'vot17' # Set size for use with tf.image.resize_images with align_corners=True. # For example, # [1 4 7] => [1 2 3 4 5 6 7] (length 3*(3-1)+1) # instead of # [1 4 7] => [1 1 2 3 4 5 6 7 7] (length 3*3) final_score_sz = hp.response_up * (design.score_sz - 1) + 1 # build TF graph once for all filename, image, templates_x, templates_z, scores_list =\ siam.build_tracking_graph( root_dir, final_score_sz, design, env, hp) # iterate through all videos of dataset_name videos_folder = os.path.join(root_dir, env.root_dataset, cmd_args.dataset_name) videos_list = [ v for v in os.listdir(videos_folder) if os.path.isdir(os.path.join(videos_folder, v)) ] videos_list.sort() nv = np.size(videos_list) speed = np.zeros(nv * evaluation.n_subseq) precisions = np.zeros(nv * evaluation.n_subseq) precisions_auc = np.zeros(nv * evaluation.n_subseq) ious = np.zeros(nv * evaluation.n_subseq) lengths = np.zeros(nv * evaluation.n_subseq) successes = np.zeros(nv * evaluation.n_subseq) config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: tf.global_variables_initializer().run() vars_to_load = [] for v in tf.global_variables(): if 'postnorm' not in v.name: vars_to_load.append(v) siam_ckpt_name = 'pretrained/siam_mcf.ckpt-50000' siam_saver = tf.train.Saver(vars_to_load) siam_saver.restore(sess, siam_ckpt_name) for i in range(nv): gt, frame_name_list, frame_sz, n_frames = _init_video( videos_list[i], videos_folder, dataset_type) starts = np.rint( np.linspace(0, n_frames - 1, evaluation.n_subseq + 1)) starts = starts[0:evaluation.n_subseq] for j in range(evaluation.n_subseq): start_frame = int(starts[j]) gt_ = gt[start_frame:, :] frame_name_list_ = frame_name_list[start_frame:] pos_x, pos_y, target_w, target_h = region_to_bbox(gt_[0]) idx = i * evaluation.n_subseq + j bboxes, speed[idx] = track_one_sequence(hp, design, frame_name_list_, pos_x, pos_y, target_w, target_h, final_score_sz, filename, image, templates_x, templates_z, scores_list, videos_list[i], dataset_type, sess, cmd_args.visualize, cmd_args.save_images, cmd_args.save_bboxes, vot_handle=None, gt=gt_) (lengths[idx], precisions[idx], precisions_auc[idx], ious[idx], successes[idx]) = _compile_results(gt_, bboxes, evaluation.dist_threshold) print( str(i) + ' -- ' + videos_list[i] + ' -- Precision: ' + "%.2f" % precisions[idx] + ' -- Precisions AUC: ' + "%.2f" % precisions_auc[idx] + ' -- IOU: ' + "%.2f" % ious[idx] + ' -- [email protected]: ' + "%.2f" % successes[idx] + ' -- Speed: ' + "%.2f" % speed[idx] + ' --') tot_frames = np.sum(lengths) mean_precision = np.sum(precisions * lengths) / tot_frames mean_precision_auc = np.sum(precisions_auc * lengths) / tot_frames mean_iou = np.sum(ious * lengths) / tot_frames mean_speed = np.sum(speed * lengths) / tot_frames mean_success = np.sum(successes * lengths) / tot_frames print('-- Overall stats (averaged per frame) on ' + str(nv) + ' videos (' + str(tot_frames) + ' frames) --') print(' -- Precision ' + "(%d px)" % evaluation.dist_threshold + ': ' + '%.2f' % mean_precision + ' -- Precisions AUC: ' + "%.2f" % mean_precision_auc + ' -- IOU: ' + "%.2f" % mean_iou + ' -- [email protected]: ' + "%.2f" % mean_success + ' -- Speed: ' + "%.2f" % mean_speed + ' --')
def main(): # avoid printing TF debugging information os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # TODO: allow parameters from command line or leave everything in json files? hp, evaluation, run, env, design = parse_arguments(mode="conv2") # Set size for use with tf.image.resize_images with align_corners=True. # For example, # [1 4 7] => [1 2 3 4 5 6 7] (length 3*(3-1)+1) # instead of # [1 4 7] => [1 1 2 3 4 5 6 7 7] (length 3*3) final_score_sz = hp.response_up * design.score_sz # build TF graph once for all image, templates_z, scores = siam.build_tracking_graph( final_score_sz, design, env) # iterate through all videos of evaluation.dataset if evaluation.video == 'all': dataset_folder = os.path.join(env.root_dataset, evaluation.dataset) videos_list = [v for v in os.listdir(dataset_folder)] videos_list.sort() # videos_list = videos_list[91:][:] #only use vot 2016 nv = np.size(videos_list) speed = np.zeros(nv * evaluation.n_subseq) precisions = np.zeros(nv * evaluation.n_subseq) precisions_auc = np.zeros(nv * evaluation.n_subseq) ious = np.zeros(nv * evaluation.n_subseq) success_auc = np.zeros(nv * evaluation.n_subseq) lengths = np.zeros(nv * evaluation.n_subseq) for i in range(nv): # gt, frame_name_list, frame_sz, n_frames, img_mode = _init_video(env, evaluation, videos_list[i]) gt, frame_name_list, frame_sz, n_frames, img_mode = _init_video_OTB( env, evaluation, videos_list[i]) starts = np.rint( np.linspace(0, n_frames - 1, evaluation.n_subseq + 1)) starts = starts[0:evaluation.n_subseq] for j in range(evaluation.n_subseq): start_frame = int(starts[j]) gt_ = gt[start_frame:, :] frame_name_list_ = frame_name_list[start_frame:] pos_x, pos_y, target_w, target_h = region_to_bbox(gt_[0]) idx = i * evaluation.n_subseq + j bboxes, speed[idx] = tracker(hp, run, design, frame_name_list_, pos_x, pos_y, target_w, target_h, final_score_sz, image, templates_z, scores, start_frame) lengths[idx], precisions[idx], precisions_auc[idx], ious[ idx], success_auc[idx] = _compile_results( gt_, bboxes, evaluation.dist_threshold) print str(i) + ' -- ' + videos_list[i] + \ ' -- Precision: ' + "%.2f" % precisions[idx] + \ ' -- Precisions AUC: ' + "%.2f" % precisions_auc[idx] + \ ' -- IOU: ' + "%.2f" % ious[idx] + \ ' -- Success AUC: ' + "%.2f" % success_auc[idx] + \ ' -- Speed: ' + "%.2f" % speed[idx] + ' --' print tot_frames = np.mean(lengths) mean_precision = np.mean(precisions) mean_precision_auc = np.mean(precisions_auc) mean_iou = np.mean(ious) mean_success_auc = np.mean(success_auc) mean_speed = np.mean(speed) print 'data set ' + evaluation.dataset + ' z_lr %f scale_step %f scale_penalty %f scale_lr %f window_influence %f' % ( hp.z_lr, hp.scale_step, hp.scale_penalty, hp.scale_lr, hp.window_influence) print '-- Overall stats (averaged per frame) on ' + str( nv) + ' videos (' + str(tot_frames) + ' frames) --' print ' -- Precision ' + "(%d px)" % evaluation.dist_threshold + ': ' + "%.2f" % mean_precision +\ ' -- Precisions AUC: ' + "%.2f" % mean_precision_auc +\ ' -- IOU: ' + "%.2f" % mean_iou +\ ' -- Success AUC: ' + "%.2f" % mean_success_auc +\ ' -- Speed: ' + "%.2f" % mean_speed + ' --' print with open('log_test.txt', 'a+') as f: f.write(time.asctime(time.localtime(time.time())) + '\r\n') f.write( 'data set ' + evaluation.dataset + ' z_lr %f scale_step %f scale_penalty %f scale_lr %f window_influence %f \r\n' % (hp.z_lr, hp.scale_step, hp.scale_penalty, hp.scale_lr, hp.window_influence)) f.write('-- Overall stats (averaged per frame) on ' + str(nv) + ' videos (' + str(tot_frames) + ' frames) --\r\n') f.write(' -- Precision ' + "(%d px)" % evaluation.dist_threshold + ': ' + "%.2f" % mean_precision + \ ' -- Precisions AUC: ' + "%.2f" % mean_precision_auc + \ ' -- IOU: ' + "%.2f" % mean_iou + \ ' -- AUC: ' + "%.3f" % mean_success_auc + \ ' -- Speed: ' + "%.2f" % mean_speed + ' --\r\n') f.write('\r\n') else: gt, frame_name_list, _, _ = _init_video(env, evaluation, evaluation.video) pos_x, pos_y, target_w, target_h = region_to_bbox( gt[evaluation.start_frame]) bboxes, speed = tracker(hp, run, design, frame_name_list, pos_x, pos_y, target_w, target_h, final_score_sz, filename, image, templates_z, scores, evaluation.start_frame) _, precision, precision_auc, iou = _compile_results( gt, bboxes, evaluation.dist_threshold) print evaluation.video + \ ' -- Precision ' + "(%d px)" % evaluation.dist_threshold + ': ' + "%.2f" % precision +\ ' -- Precision AUC: ' + "%.2f" % precision_auc + \ ' -- IOU: ' + "%.2f" % iou + \ ' -- Speed: ' + "%.2f" % speed + ' --' print
def track_one_sequence(hp, design, frame_name_list, pos_x, pos_y, target_w, target_h, final_score_sz, filename, image, templates_x, templates_z, scores_list, vid_name, dataset_type, sess, visualize_results, save_images, save_bboxes, vot_handle, gt=None): """ Handles tracking for one whole sequence. Inputs are fed to the network and the results are collected and can be shown on the screen and saved to the disk. Args: hp: namespace: hyperparameters. design: namespace: design parameters. frame_name_list: string list: list of sorted image paths to be read. pos_x: int: horizontal center of the target. pos_y: int: vertical center of the target. target_w: int: target width. target_h: int: target height. final_score_sz: int: size of the score map after upsampling. filename: string tensor: placeholder for the image path to be read. image: 3D tensor: the image read from the path. templates_x: 4D tensor: instance features from one or more layers concatenated by channels. See siam_mcf_net.inference comments for more details. templates_z: 4D tensor: exemplar features from one or more layers concatenated by channels. See siam_mcf_net.inference comments for more details. scores_list: 5D tensor: batch of score heatmaps for each of the selected layers. vid_name: string: name of this sequence (only for saving purposes). dataset_type: string: name of this dataset (only for saving purposes). sess: an open tf.Session to execute the graph. visualize_results: boolean: whether to show the results on the screen. save_images: boolean: whether to save image results to the disk. save_bboxes: boolean: whether to save bounding boxes to the disk. vot_handle: vot handle for running the VOT toolkit. gt: Nx4 array: optional ground truth bounding boxes (only for visualization purposes). Returns: Nx4 array: the resulting bounding boxes from the tracking. float: the tracking speed in frames per second. """ num_frames = np.size(frame_name_list) # stores tracker's output for evaluation bboxes = np.zeros((num_frames, 4)) if save_images: res_dir = 'results/%s/frames/%s' % ( dataset_type, vid_name) if not os.path.exists(res_dir): os.makedirs(res_dir) if save_bboxes: bb_res_dir = 'results/%s/bboxes' % (dataset_type) if not os.path.exists(bb_res_dir): os.makedirs(bb_res_dir) # save first frame position (from ground-truth) bboxes[0, :] = pos_x-target_w/2, pos_y-target_h/2, target_w, target_h if vot_handle is not None: frame_path = vot_handle.frame() else: frame_path = frame_name_list[0] tracker = SiamMcfTracker( design.context, design.exemplar_sz, design.search_sz, hp.scale_step, hp.scale_num, hp.scale_penalty, hp.scale_lr, hp.window_influence, design.tot_stride, hp.response_up, final_score_sz, pos_x, pos_y, target_w, target_h, frame_path, sess, templates_z, filename) t_start = time.time() # Get an image from the queue for i in range(1, num_frames): if vot_handle is not None: frame_path = vot_handle.frame() else: frame_path = frame_name_list[i] if save_images or visualize_results: image_ = sess.run(image, feed_dict={filename: frame_path}) bbox = tracker.track( frame_path, sess, templates_z, templates_x, scores_list, filename) # convert <cx,cy,w,h> to <x,y,w,h> and save output bboxes[i, :] = bbox if vot_handle is not None: vot_rect = vot.Rectangle(bbox[0], bbox[1], bbox[2], bbox[3]) vot_handle.report(vot_rect) if visualize_results: show_frame(image_, bboxes[i, :], 1) if save_images: out_img = Image.fromarray(image_.copy().astype(np.uint8)) out_draw = ImageDraw.Draw(out_img) if gt is not None: gt_rect = np.array(region_to_bbox(gt[i, :], False)).astype( np.int32) gt_rect[2:] = gt_rect[:2] + gt_rect[2:] rect = bboxes[i].copy() rect[2:] = rect[:2] + rect[2:] rect = rect.astype(np.int32) pillow_version = [int(x) for x in PIL.__version__.split('.')] if (pillow_version[0] > 5 or (pillow_version[0] == 5 and pillow_version[1] >= 3)): if gt is not None: out_draw.rectangle( [tuple(gt_rect[:2]), tuple(gt_rect[2:])], outline=(0, 0, 255), width=2) out_draw.rectangle( [tuple(rect[:2]), tuple(rect[2:])], outline=(255, 0, 0), width=3) else: if gt is not None: out_draw.rectangle( [tuple(gt_rect[:2]), tuple(gt_rect[2:])], outline=(0, 0, 255)) out_draw.rectangle( [tuple(rect[:2]), tuple(rect[2:])], outline=(255, 0, 0)) out_img.save(os.path.join(res_dir, '%05d.jpg' % (i + 1))) t_elapsed = time.time() - t_start speed = num_frames/t_elapsed if save_bboxes: with open(os.path.join(bb_res_dir, vid_name+'.txt'), 'w') as f: for bb in bboxes: f.write('%.02f,%.02f,%.02f,%.02f\n' % tuple(bb)) return bboxes, speed
def main(): hp, evaluation, run, env, design = parse_arguments() final_score_sz = hp.response_up * (design.score_sz - 1) + 1 siam = SiameseNet(env.root_pretrained, design.net) if torch.cuda.is_available(): siam = siam.cuda() if evaluation.video == 'all': dataset_folder = os.path.join(env.root_dataset, evaluation.dataset) video_list = [v for v in os.listdir(dataset_folder) if not v[0] == '.'] video_list.sort() nv = np.size(video_list) speed = np.zeros(nv * evaluation.n_subseq) precisions = np.zeros(nv * evaluation.n_subseq) precision_auc = np.zeros(nv * evaluation.n_subseq) ious = np.zeros(nv * evaluation.n_subseq) lengths = np.zeros(nv * evaluation.n_subseq) for i in range(nv): print('video: %d' % (i + 1)) gt, frame_name_list, frame_sz, n_frame = _init_video( env, evaluation, video_list[i]) starts = np.rint( np.linspace(0, n_frame - 1, evaluation.n_subseq + 1)) starts = starts[0:evaluation.n_subseq] for j in range(evaluation.n_subseq): starts_frame = int(starts[j]) gt_ = gt[starts_frame:, :] frame_name_list = frame_name_list[starts_frame:] pos_x, pos_y, target_w, target_h = region_to_bbox(gt_[0]) idx = i * evaluation.n_subseq + j bboxes, speed[idx] = tracker(hp, run, design, frame_name_list, pos_x, pos_y, target_w, target_h, final_score_sz, siam, starts_frame) lengths[idx], precisions[idx], precision_auc[idx], ious[ idx] = _compile_results(gt_, bboxes, evaluation.dist_threshold) print(str(i) + ' -- ' + video_list[i] + \ ' -- Precision: ' + "%.2f" %precisions[idx] + \ ' -- Precisions AUC: ' + "%.2f" %precision_auc[idx] + \ ' -- IOU: ' + "%.2f" % ious[idx] + \ '-- Speed: ' + "%.2f" % speed[idx] + ' --\n') tot_frames = np.sum(lengths) mean_precision = np.sum(precisions * lengths) / tot_frames mean_iou = np.sum(ious * lengths) / tot_frames mean_precision_auc = np.sum(precision_auc * lengths) / tot_frames mean_iou = np.sum(ious * lengths) / tot_frames mean_speed = np.sum(speed * lengths) / tot_frames print(' -- Overall stats (averaged per frame) on ' + str(nv) + 'videos(' + str(tot_frames) + 'frames) --') print(' -- Precision ' + "(%d px)" % evaluation.dist_threshold + ': ' + "%.2f" %mean_precision +\ ' -- Precisions AUC: ' + "%.2f" % mean_precision_auc +\ ' -- IOU: ' + "%.2f" %mean_iou +\ ' -- Speed: ' + "%.2f" %mean_speed + '-- \n') else: gt, frame_name_list, _, _ = _init_video(env, evaluation, evaluation.video) pos_x, pos_y, target_w, target_h = region_to_bbox( gt[evaluation.start_frame]) bboxes, speed = tracker(hp, run, design, frame_name_list, pos_x, pos_y, target_w, target_h, final_score_sz, siam, evaluation.start_frame) _, precision, precision_auc, iou = _compile_results( gt, bboxes, evaluation.dist_threshold) print(evaluation.video + \ ' -- precision ' + "(%d px)" %evaluation.dist_threshold + ': ' + "%.2f" % precision +\ ' -- precision AUC: ' + "%.2f" %precision_auc + \ ' -- IOU: ' + "%.2f" %iou +\ '-- Speed: ' + "%.2f" %speed + ' -- \n')
def get_gt_bbox_cv(bbox): x, y, width, height = region_to_bbox(bbox, False) return (int(x), int(y)), (int(x + width), int(y + height))
def main(): # avoid printing TF debugging information os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # Load hyperparameter (hp), evaluation, run, environment (env) and design parameters from # parameters/ directory. hp, evaluation, run, env, design = parse_arguments() # Set size for use with tf.image.resize_images with align_corners=True. # For example, # [1 4 7] => [1 2 3 4 5 6 7] (length 3*(3-1)+1) # instead of # [1 4 7] => [1 1 2 3 4 5 6 7 7] (length 3*3) final_score_sz = hp.response_up * (design.score_sz - 1) + 1 # build TF graph once for all filename, image, templates_z, scores = siam.build_tracking_graph( final_score_sz, design, env) # iterate through all videos of evaluation.dataset if evaluation.video == 'all': dataset_folder = os.path.join(env.root_dataset, evaluation.dataset) videos_list = [v for v in os.listdir(dataset_folder)] videos_list.sort() nv = np.size(videos_list) speed = np.zeros(nv * evaluation.n_subseq) precisions = np.zeros(nv * evaluation.n_subseq) precisions_auc = np.zeros(nv * evaluation.n_subseq) ious = np.zeros(nv * evaluation.n_subseq) lengths = np.zeros(nv * evaluation.n_subseq) for i in range(nv): gt, frame_name_list, frame_sz, n_frames = _init_video( env, evaluation, videos_list[i]) starts = np.rint( np.linspace(0, n_frames - 1, evaluation.n_subseq + 1)) starts = starts[0:evaluation.n_subseq] for j in range(evaluation.n_subseq): start_frame = int(starts[j]) gt_ = gt[start_frame:, :] frame_name_list_ = frame_name_list[start_frame:] pos_x, pos_y, target_w, target_h = region_to_bbox(gt_[0]) idx = i * evaluation.n_subseq + j bboxes, speed[idx] = tracker(hp, run, design, frame_name_list_, pos_x, pos_y, target_w, target_h, final_score_sz, filename, image, templates_z, scores, start_frame) lengths[idx], precisions[idx], precisions_auc[idx], ious[ idx] = _compile_results(gt_, bboxes, evaluation.dist_threshold) print str(i) + ' -- ' + videos_list[i] + \ ' -- Precision: ' + "%.2f" % precisions[idx] + \ ' -- Precisions AUC: ' + "%.2f" % precisions_auc[idx] + \ ' -- IOU: ' + "%.2f" % ious[idx] + \ ' -- Speed: ' + "%.2f" % speed[idx] + ' --' print tot_frames = np.sum(lengths) mean_precision = np.sum(precisions * lengths) / tot_frames mean_precision_auc = np.sum(precisions_auc * lengths) / tot_frames mean_iou = np.sum(ious * lengths) / tot_frames mean_speed = np.sum(speed * lengths) / tot_frames print '-- Overall stats (averaged per frame) on ' + str( nv) + ' videos (' + str(tot_frames) + ' frames) --' print ' -- Precision ' + "(%d px)" % evaluation.dist_threshold + ': ' + "%.2f" % mean_precision +\ ' -- Precisions AUC: ' + "%.2f" % mean_precision_auc +\ ' -- IOU: ' + "%.2f" % mean_iou +\ ' -- Speed: ' + "%.2f" % mean_speed + ' --' print else: gt, frame_name_list, _, _ = _init_video(env, evaluation, evaluation.video) pos_x, pos_y, target_w, target_h = region_to_bbox( gt[evaluation.start_frame]) bboxes, speed = tracker(hp, run, design, frame_name_list, pos_x, pos_y, target_w, target_h, final_score_sz, filename, image, templates_z, scores, evaluation.start_frame) _, precision, precision_auc, iou = _compile_results( gt, bboxes, evaluation.dist_threshold) print evaluation.video + \ ' -- Precision ' + "(%d px)" % evaluation.dist_threshold + ': ' + "%.2f" % precision +\ ' -- Precision AUC: ' + "%.2f" % precision_auc + \ ' -- IOU: ' + "%.2f" % iou + \ ' -- Speed: ' + "%.2f" % speed + ' --' print
def main(): # avoid printing TF debugging information os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # TODO: allow parameters from command line or leave everything in json files? # hp, evaluation, run, env, design = parse_arguments() # Set size for use with tf.image.resize_images with align_corners=True. # For example, # [1 4 7] => [1 2 3 4 5 6 7] (length 3*(3-1)+1) # instead of # [1 4 7] => [1 1 2 3 4 5 6 7 7] (length 3*3) # final_score_sz = hp.response_up * (design.score_sz - 1) + 1 # build TF graph once for all # filename, image, templates_z, templates_x, scores, scores_original = siam.build_tracking_graph(final_score_sz, design, env) # create dataset dataset = DatasetFactory.create_dataset(name=args_dataset, dataset_root=dataset_root, load_img=False) # iterate through all videos of evaluation.dataset videos_list = list(dataset.videos.keys()) videos_list.sort() nv = np.size(videos_list) tracker = None for i in range(nv): current_key = sorted(list(dataset.videos.keys()))[i] gt, frame_name_list, frame_sz, n_frames = _init_video( dataset, current_key) rect = region_to_bbox(gt[0], False) # tracker = SiamFCTracker(frame_name_list[0], vot.Rectangle(rect[0],rect[1],rect[2],rect[3])) # tracker = ColorizationTracker(frame_name_list[0], vot.Rectangle(rect[0], rect[1], rect[2], rect[3])) if tracker is None: tracker = HybridTracker( frame_name_list[0], vot.Rectangle(rect[0], rect[1], rect[2], rect[3])) else: tracker.HotInit(frame_name_list[0], vot.Rectangle(rect[0], rect[1], rect[2], rect[3])) bboxes = [] for i in range(0, n_frames): bbox, confidence = tracker.track(frame_name_list[i]) bboxes.append(bbox) #Visualize if visualize: for i, (bbox, groundt, frame_name) in enumerate(zip(bboxes, gt, frame_name_list)): image = cv2.imread(frame_name) bbox_pt1, bbox_pt2 = get_bbox_rect(bbox) bbox_gt1, bbox_gt2 = get_gt_bbox_cv(groundt) #Draw result cv2.rectangle(image, bbox_pt1, bbox_pt2, (0, 255, 0)) #Draw ground truth cv2.rectangle(image, bbox_gt1, bbox_gt2, (0, 0, 0)) cv2.imshow("Results:", image) if save_vis: cv2.imwrite(os.path.join(vis_output, str(i) + ".jpg"), image) cv2.waitKey() #Reset the tracker, get prepared for next subset tracker.reset() # Write the results to disc for evaluation bboxes_n = [] for bbox in bboxes: bboxes_n.append([bbox.x, bbox.y, bbox.width, bbox.height]) bboxes_n[0] = [1] target_dir = os.path.join(result_output, current_key) if not os.path.exists(target_dir): os.mkdir(target_dir) results_file = current_key + "_" + "{:03d}".format(1) + ".txt" results_abs_file = os.path.join(target_dir, results_file) with open(results_abs_file, "w") as f: for bbox in bboxes_n: if len(bbox) == 1: f.write('%d\n' % (bbox[0])) else: f.write('%.2f, %.2f, %.2f, %.2f\n' % (bbox[0], bbox[1], bbox[2], bbox[3]))
def main(): # avoid printing TF debugging information os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # TODO: allow parameters from command line or leave everything in json files? hp, evaluation, run, env, design = parse_arguments() # Set size for use with tf.image.resize_images with align_corners=True. # For example, # [1 4 7] => [1 2 3 4 5 6 7] (length 3*(3-1)+1) # instead of # [1 4 7] => [1 1 2 3 4 5 6 7 7] (length 3*3) final_score_sz = hp.response_up * (design.score_sz - 1) + 1 # build TF graph once for all filename, image, templates_z, scores = siam.build_tracking_graph(final_score_sz, design, env) # iterate through all videos of evaluation.dataset if evaluation.video == 'all': dataset_folder = os.path.join(env.root_dataset, evaluation.dataset) videos_list = [v for v in os.listdir(dataset_folder)] videos_list.sort() nv = np.size(videos_list) speed = np.zeros(nv * evaluation.n_subseq) precisions = np.zeros(nv * evaluation.n_subseq) precisions_auc = np.zeros(nv * evaluation.n_subseq) ious = np.zeros(nv * evaluation.n_subseq) lengths = np.zeros(nv * evaluation.n_subseq) for i in range(nv): images_arr, gt, frame_name_list, frame_sz, n_frames = _init_video(env, evaluation, videos_list[i]) starts = np.rint(np.linspace(0, n_frames - 1, evaluation.n_subseq + 1)) starts = starts[0:evaluation.n_subseq] for j in range(evaluation.n_subseq): start_frame = int(starts[j]) gt_ = gt[start_frame:, :] frame_name_list_ = frame_name_list[start_frame:] pos_x, pos_y, target_w, target_h = region_to_bbox(gt_[0]) idx = i * evaluation.n_subseq + j bboxes, speed[idx] = tracker(hp, run, design, frame_name_list_, pos_x, pos_y, target_w, target_h, final_score_sz, filename, image, templates_z, scores, start_frame) lengths[idx], precisions[idx], precisions_auc[idx], ious[idx] = compile_results(gt, bboxes, evaluation.dist_threshold) print(str(i) + ' -- ' + videos_list[i] + ' -- Precision: ' + "%.2f" % precisions[idx] + ' -- Precisions AUC: ' + "%.2f" % precisions_auc[idx] + ' -- IOU: ' + "%.2f" % ious[idx] + ' -- Speed: ' + "%.2f" % speed[idx] + ' --') tot_frames = np.sum(lengths) mean_precision = np.sum(precisions * lengths) / tot_frames mean_precision_auc = np.sum(precisions_auc * lengths) / tot_frames mean_iou = np.sum(ious * lengths) / tot_frames mean_speed = np.sum(speed * lengths) / tot_frames print('-- Overall stats (averaged per frame) on ' + str(nv) + ' videos (' + str(tot_frames) + ' frames) --') print(' -- Precision ' + "(%d px)" % evaluation.dist_threshold + ': ' + "%.2f" % mean_precision + ' -- Precisions AUC: ' + "%.2f" % mean_precision_auc + ' -- IOU: ' + "%.2f" % mean_iou + ' -- Speed: ' + "%.2f" % mean_speed + ' --') else: images_arr, gt, frame_name_list, _, _ = _init_video(env, evaluation, evaluation.video) pos_x, pos_y, target_w, target_h = region_to_bbox(gt[evaluation.start_frame]) bboxes, speed = tracker(hp, run, design, frame_name_list, pos_x, pos_y, target_w, target_h, final_score_sz, filename, image, templates_z, scores, evaluation.start_frame) num_frames = np.size(frame_name_list) bboxes_final = np.zeros((num_frames,4)) lk_params = dict( winSize = (5,5), maxLevel = 2, criteria = (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 1, 0.03)) bboxes_final = np.zeros((num_frames, 4)) for i in range(1,len(images_arr)-1): # Create some random colors color = np.random.randint(0,255,(100,3)) # Take first frame and find corners in it #ret, old_frame = cap.read() frame = images_arr[i+1] old_frame = images_arr[i] old_gray = cv2.cvtColor(old_frame, cv2.COLOR_BGR2GRAY) p0 = np.zeros((1,1,2), dtype=np.float32) bbox_i = bboxes[i] c, r, w, h = int(bbox_i[0]), int(bbox_i[1]), int(bbox_i[2]), int(bbox_i[3]) p0[0,0,0] = c p0[0,0,1] = r frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) p1, st, err = cv2.calcOpticalFlowPyrLK(old_gray, frame_gray, p0, None, **lk_params) good_new = p1[st==1] bboxes_final[i,:] = p1[0][0][0], p1[0][0][1], w, h good_old = p0[st==1] for i,(new,old) in enumerate(zip(good_new,good_old)): a,b = new.ravel() c,d = old.ravel() fig = plt.figure(1) ax = fig.add_subplot(111) r1 = patches.ConnectionPatch((a,b),(c,d),'data','data',arrowstyle="-|>") r2 = patches.Circle((a,b),5,color='r')#((x,y), w, h, linewidth=2, edgecolor='r', fill=False) ax.imshow(np.uint8(frame)) ax.add_patch(r2) ax.add_patch(r1) plt.ion() plt.show() plt.pause(0.001) plt.clf() old_gray = frame_gray.copy() p0 = good_new.reshape(-1,1,2) _, precision, precision_auc, iou = _compile_results(gt, bboxes_final, evaluation.dist_threshold) print(evaluation.video + ' -- Precision ' + "(%d px)" % evaluation.dist_threshold + ': ' + "%.2f" % precision + ' -- Precision AUC: ' + "%.2f" % precision_auc + ' -- IOU: ' + "%.2f" % iou + ' -- Speed: ' + "%.2f" % speed + ' --')
def cozmo_program(robot: cozmo.robot.Robot): global angle angle = 25. robot.set_head_angle(degrees(angle)).wait_for_completed() robot.set_lift_height(0.0).wait_for_completed() robot.camera.image_stream_enabled = True robot.camera.color_image_enabled = True robot.camera.enable_auto_exposure = True os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' frame = os.path.join(directory, "current.jpeg") print("Starting Tensorflow...") with tf.Session() as sess: print("Session successfully started") model = load_model('modelv1.07-0.96.hdf5') while True: global X, Y, W, H global result X = 245. Y = 165. W = 150. H = 150. gt = [X, Y, W, H] pos_x, pos_y, target_w, target_h = region_to_bbox(gt) frame = os.path.join(directory, "current.jpeg") result = 0 dog_counter = 0 cat_counter = 0 background_counter = 0 next_state = 0 current_state = 0 #Background: 0, Cat:1, Dog:2 while True: latest_img = robot.world.latest_image if latest_img is not None: pilImage = latest_img.raw_image pilImage.resize((640,480), Image.ANTIALIAS).save(os.path.join(directory, "current.jpeg"), "JPEG") show_frame(np.asarray(Image.open(frame)), [900.,900.,900.,900.], 1) img = load_image(frame) [result,out_relu,global_average_pooling2d] = sess.run([model.outputs,model.get_layer('out_relu').output\ ,model.get_layer('global_average_pooling2d').output ], feed_dict={model.input.name:img}) next_state = np.argmax(result) print('Arg max: ',next_state) # Initial Current State is Background if current_state == 0: print('Background') if next_state == 1: # Detected a Cat current_state = 1 # Transition to Cat State background_counter = 0 cat_counter = 1 dog_counter = 0 elif next_state == 2: # Detected a Dog current_state = 2 # Transition to Dog state background_counter = 0 cat_counter = 0 dog_counter = 1 # Current State is Cat elif current_state == 1: print('\t\t\t\t\t\tCat') if next_state == 0: # Detected Background background_counter += 1 if background_counter >= 6: # Transition to Background only if Background appeared for more than 6 times background_counter = 0 current_state = 0 cat_counter = 0 elif next_state == 1: # Detected Cat itself cat_counter +=1 if cat_counter >= 30: print('Cozmo sees a cat') dense = model.get_layer('dense').get_weights() weights = dense[0].T testing_counter = 0 detected_centroid = 0 xmin_avg = 0 xmax_avg = 0 ymin_avg = 0 ymax_avg = 0 frame_average = 2 frame_count = 0 while True: latest_img = robot.world.latest_image if latest_img is not None: pilImage = latest_img.raw_image pilImage.resize((640,480), Image.ANTIALIAS).save(os.path.join(directory, "current.jpeg"), "JPEG") img = load_image(frame) [result,out_relu,global_average_pooling2d] = sess.run([model.outputs,model.get_layer('out_relu').output\ ,model.get_layer('global_average_pooling2d').output ], feed_dict={model.input.name:img}) kernels = out_relu.reshape(7,7,1280) final = np.dot(kernels,weights[result[0].argmax()]) final1 = array_to_img(final.reshape(7,7,1)) final1 = final1.resize((224,224), Image.ANTIALIAS) box = img_to_array(final1).reshape(224,224) #box = cv2.blur(box,(30,30)) temp = (box > box.max()*.8) *1 temp_adjusted = np.ndarray(shape=np.shape(temp), dtype=np.dtype(np.uint8)) temp_adjusted[:,:] = np.asarray(temp)*255 contours, hierarchy = cv2.findContours(temp_adjusted, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)[-2:] contours = np.array(contours) max_area = [0,0] # contours index and area for index, contour in enumerate(contours): if(max_area[1]< len(contour)): max_area = [index,len(contour)] contours_adjusted = contours[max_area[0]].squeeze(axis=1).T xmin = contours_adjusted[0].min() * (640./224.) ymin = contours_adjusted[1].min() * (480./224.) xmax = contours_adjusted[0].max() * (640./224.) ymax = contours_adjusted[1].max() * (480./224.) if result[0].argmax() == 1: # Frame smoothing frame_count = frame_count + 1 xmin_avg = xmin_avg + xmin xmax_avg = xmax_avg + xmax ymin_avg = ymin_avg + ymin ymax_avg = ymax_avg + ymax if frame_count % frame_average == 0: frame_count = 0 xmin_avg = xmin_avg/frame_average xmax_avg = xmax_avg/frame_average ymin_avg = ymin_avg/frame_average ymax_avg = ymax_avg/frame_average print(xmin_avg, end=",") print(ymin_avg, end=",") print(xmax_avg, end=",") print(ymax_avg, end="\n") ymin_avg = ymin_avg + (ymax_avg - ymin_avg)/2. - H/2. xmin_avg = xmin_avg + (xmax_avg - xmin_avg)/2. - W/2. print("150: ",xmin_avg, end=",") print("150: ",ymin_avg, end="\n") gt = [xmin_avg, ymin_avg, W, H] xmin_avg = 0 xmax_avg = 0 ymin_avg = 0 ymax_avg = 0 pos_x, pos_y, target_w, target_h = region_to_bbox(gt) bboxes = np.zeros((1, 4)) #bboxes[0,:] = pos_x-target_w/2, pos_y-target_h/2, target_w, target_h bboxes[0,:] = pos_x-W/2, pos_y-H/2, W, H print(len(contours)) testing_counter = testing_counter + 1 print("Testing_counter: ",testing_counter) show_frame(np.asarray(Image.open(frame)), gt, 1) print("Cat is detected") print("Starting the tracker ...") if (bboxes[0,1] + bboxes[0,3]/2) < (Y + H/2 - 40): print("Command: Raise the head") angle = angle + 0.5 if angle > 44.5: angle = 44.5 elif (bboxes[0,1] + bboxes[0,3]/2) > (Y + H/2 + 40): print("Command: Lower the head") angle = angle - 0.5 if angle < 0: angle = 0 else: pass set_head_angle_action = robot.set_head_angle(degrees(angle), max_speed=20, in_parallel=True) if straight(bboxes[0,:])[0] != 0 and turn(bboxes[0,:])[0] != 0: robot.drive_wheel_motors(straight(bboxes[0,:])[0] + turn(bboxes[0,:])[0], straight(bboxes[0,:])[1] + turn(bboxes[0,:])[1]) detected_centroid = 0 elif straight(bboxes[0,:])[0] == 0 and turn(bboxes[0,:])[0] == 0: robot.stop_all_motors() detected_centroid = detected_centroid + 1 elif straight(bboxes[0,:])[0] == 0: robot.drive_wheel_motors(turn(bboxes[0,:])[0], turn(bboxes[0,:])[1]) detected_centroid = 0 elif turn(bboxes[0,:])[0] == 0: robot.drive_wheel_motors(straight(bboxes[0,:])[0], straight(bboxes[0,:])[1]) detected_centroid = 0 else: robot.stop_all_motors() detected_centroid = detected_centroid + 1 if detected_centroid > 20//frame_average: detected_centroid = 0 print("Reached a stable state.........\t\t\t\t\t\t\t\t STABLE") # Go near the object set_head_angle_action.wait_for_completed() robot.abort_all_actions(log_abort_messages=True) robot.wait_for_all_actions_completed() robot.set_head_angle(degrees(0.5)).wait_for_completed() print("Robot's head angle: ",robot.head_angle) target_frame_count = 1 while True: latest_img = None while latest_img is None: latest_img = robot.world.latest_image target_frame1 = latest_img.raw_image target_frame1 = target_frame1.resize((640,480), Image.ANTIALIAS) #target_frame1 = target_frame1.convert('L') target_frame1 = np.asarray(target_frame1) #orb1 = cv2.ORB_create(500) #kp1 = orb1.detect(target_frame1,None) #kp1, des1 = orb1.compute(target_frame1, kp1) #features_img1 = cv2.drawKeypoints(target_frame1, kp1, None, color=(255,0,0), flags=0) #plt.imsave("target_frame1_"+str(target_frame_count)+".jpeg",features_img1) plt.imsave("target_frame1_"+str(target_frame_count)+".jpeg",target_frame1) drive_straight_action = robot.drive_straight(distance=cozmo.util.distance_mm(distance_mm=10),speed=cozmo.util.speed_mmps(10), in_parallel=True) drive_straight_action.wait_for_completed() robot.set_head_angle(degrees(0.5)).wait_for_completed() print("Robot's head angle: ",robot.head_angle) latest_img = None while latest_img is None: latest_img = robot.world.latest_image target_frame2 = latest_img.raw_image target_frame2 = target_frame2.resize((640,480), Image.ANTIALIAS) #target_frame2 = target_frame2.convert('L') target_frame2 = np.asarray(target_frame2) #orb2 = cv2.ORB_create(500) #kp2 = orb2.detect(target_frame2,None) #kp2, des2 = orb2.compute(target_frame2, kp2) #features_img2 = cv2.drawKeypoints(target_frame2, kp2, None, color=(255,0,0), flags=0) #plt.imsave("target_frame2_"+str(target_frame_count)+".jpeg",features_img2) plt.imsave("target_frame2_"+str(target_frame_count)+".jpeg",target_frame2) target_frame_count = target_frame_count + 1 ''' matcher = cv2.DescriptorMatcher_create(cv2.DESCRIPTOR_MATCHER_BRUTEFORCE_HAMMING) matches = matcher.match(des1, des2, None) matches.sort(key=lambda x: x.distance, reverse=False) matches = matches[:10] imMatches = cv2.drawMatches(target_frame1, kp1, target_frame2, kp2, matches, None) cv2.imwrite("matches_tf1_tf2.jpg", imMatches) points1 = np.zeros((len(matches), 2), dtype=np.float32) points2 = np.zeros((len(matches), 2), dtype=np.float32) for i, match in enumerate(matches): points1[i, :] = kp1[match.queryIdx].pt points2[i, :] = kp2[match.trainIdx].pt print("Points1 [{}]: {}".format(i,points1[i][0]), points1[i][1],"\tPoints2: ",points2[i][0], points2[i][1]) index = None dist1_x = [] dist2_x = [] for index in range(len(points1)): dist1_x.append((W/2.)-points1[index][0]) # Extract only the x-coordinate dist2_x.append((W/2.)-points2[index][0]) # Extract only the x-coordinate fw_x = 1./((1./np.array(dist2_x)) - (1./np.array(dist1_x))) # Calculate the image plane to obj plane mapping in x direction pt1_x = [] pt2_x = [] for index in range(len(points1)): pt1_x.append(fw_x[index]/(W/2. - points1[index][0])) pt2_x.append(fw_x[index]/(W/2. - points2[index][0])) print("Approx. distance[{}]: {}".format(index, pt1_x[index])) if len(pt2_x) < 10: break ''' sys.exit(0) else: # Detected Dog dog_counter += 1 if dog_counter >= 6: # Transition to Dog only if Dog appeared for more than 6 times cat_counter = 0 current_state = 2 # Current State is Dog elif current_state == 2: print('\t\t\t\t\t\t\t\t\t\t\t\tDog') if next_state == 0: # Detected Background background_counter += 1 if background_counter >= 6: # Transition to Background only if Background appeared for more than 6 times background_counter = 0 current_state = 0 dog_counter = 0 elif next_state == 2: # Detected Dog itself dog_counter +=1 if dog_counter >= 30: print('Cozmo sees a Dog') robot.drive_wheels(-50, -50) time.sleep(3) robot.drive_wheels(70, -70) time.sleep(2.8) robot.drive_wheels(0, 0) break else: # Detected Cat cat_counter += 1 if cat_counter >= 6: # Transition to Cat only if Cat appeared for more than 6 times dog_counter = 0 current_state = 1
def main(): hp, evaluation, run, env, design = parse_arguments() final_score_sz = hp.response_up * (design.score_sz - 1) + 1 print("final_score_sz is:%d" % (final_score_sz)) gt, frame_name_list, frame_sz, n_frames = _init_video( env, evaluation, videos_path) num_frames = np.size(frame_name_list) scale_factors = hp.scale_step**np.linspace(-np.ceil(hp.scale_num / 2), np.ceil(hp.scale_num / 2), hp.scale_num) # cosine window to penalize large displacements hann_1d = np.expand_dims(np.hanning(final_score_sz), axis=0) penalty = np.transpose(hann_1d) * hann_1d penalty = penalty / np.sum(penalty) pos_x, pos_y, target_w, target_h = region_to_bbox(gt[start_frame]) context = design.context * (target_w + target_h) z_sz = np.sqrt(np.prod((target_w + context) * (target_h + context))) x_sz = float(design.search_sz) / design.exemplar_sz * z_sz scaled_exemplar = z_sz * scale_factors scaled_search_area = x_sz * scale_factors scaled_target_w = target_w * scale_factors scaled_target_h = target_h * scale_factors # thresholds to saturate patches shrinking/growing min_z = hp.scale_min * z_sz max_z = hp.scale_max * z_sz min_x = hp.scale_min * x_sz max_x = hp.scale_max * x_sz #search size x_sz0_ph = scaled_search_area[0] x_sz1_ph = scaled_search_area[1] x_sz2_ph = scaled_search_area[2] image = Image.open(frame_name_list[0]) image.show() image = np.array(image) # used to pad the crops if design.pad_with_image_mean: avg_chan = np.mean(image, axis=(0, 1)) else: avg_chan = None # pad with if necessary frame_padded_z, npad_z = pad_frame_numpy(image, frame_sz, pos_y, pos_x, z_sz, avg_chan) # extract tensor of z_crops # print type(design.exemplar_sz) z_crops = extract_crops_z_numpy(frame_padded_z, npad_z, pos_y, pos_x, z_sz, design.exemplar_sz) print 'the shape of the img z_crops is :' + ' ' + str(np.shape(z_crops)) z_crops = np.squeeze(z_crops) img = Image.fromarray(z_crops.astype('uint8'), 'RGB') img.show() frame_padded_x, npad_x = pad_frame_numpy(image, frame_sz, pos_y, pos_x, x_sz2_ph, avg_chan) # extract tensor of x_crops (3 scales) x_crops = extract_crops_x_numpy(frame_padded_x, npad_x, pos_y, pos_x, x_sz0_ph, x_sz1_ph, x_sz2_ph, design.search_sz) print 'the shape of the img x_crops is :' + ' ' + str(np.shape(x_crops)) x_crops_1 = np.squeeze(x_crops[0, :, :]) img_1 = Image.fromarray(x_crops_1.astype('uint8'), 'RGB') img_1.show() x_crops_2 = np.squeeze(x_crops[1, :, :]) img_2 = Image.fromarray(x_crops_2.astype('uint8'), 'RGB') img_2.show() x_crops_3 = np.squeeze(x_crops[2, :, :]) img_3 = Image.fromarray(x_crops_3.astype('uint8'), 'RGB') img_3.show()
def main(): # avoid printing TF debugging information os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # TODO: allow parameters from command line or leave everything in json files? hp, evaluation, run, env, design = parse_arguments() # Set size for use with tf.image.resize_images with align_corners=True. # For example, # [1 4 7] => [1 2 3 4 5 6 7] (length 3*(3-1)+1) # instead of # [1 4 7] => [1 1 2 3 4 5 6 7 7] (length 3*3) final_score_sz = hp.response_up * (design.score_sz - 1) + 1 # build TF graph once for all filename, image, templates_z, templates_x, scores, scores_original = siam.build_tracking_graph( final_score_sz, design, env) # create dataset dataset = DatasetFactory.create_dataset(name=args_dataset, dataset_root=dataset_root, load_img=False) # iterate through all videos of evaluation.dataset videos_list = list(dataset.videos.keys()) videos_list.sort() nv = np.size(videos_list) for i in range(nv): current_key = sorted(list(dataset.videos.keys()))[i] gt, frame_name_list, frame_sz, n_frames = _init_video( dataset, current_key) for j in range(1): start_frame = 0 gt_ = gt[start_frame:, :] frame_name_list_ = frame_name_list[start_frame:] pos_x, pos_y, target_w, target_h = region_to_bbox(gt_[0]) bboxes, _ = tracker(videos_list[i], hp, run, design, frame_name_list_, pos_x, pos_y, target_w, target_h, final_score_sz, filename, image, templates_z, templates_x, scores, scores_original, start_frame) #Visualize if visualize: for bbox, groundt, frame_name in zip(bboxes, gt_, frame_name_list_): image = cv2.imread(frame_name) bbox_pt1, bbox_pt2 = get_bbox_cv(bbox) bbox_gt1, bbox_gt2 = get_gt_bbox_cv(groundt) #Draw result cv2.rectangle(image, bbox_pt1, bbox_pt2, (0, 255, 0)) #Draw ground truth cv2.rectangle(image, bbox_gt1, bbox_gt2, (0, 0, 0)) cv2.imshow("Results:", image) cv2.waitKey() bboxes = bboxes.tolist() bboxes[0] = [1] target_dir = os.path.join(result_output, current_key) if not os.path.exists(target_dir): os.mkdir(target_dir) results_file = current_key + "_" + "{:03d}".format(1) + ".txt" results_abs_file = os.path.join(target_dir, results_file) with open(results_abs_file, "w") as f: for bbox in bboxes: if len(bbox) == 1: f.write('%d\n' % (bbox[0])) else: f.write('%.2f, %.2f, %.2f, %.2f\n' % (bbox[0], bbox[1], bbox[2], bbox[3]))