def track_tune(tracker, net, video, config): arch = config['arch'] benchmark_name = config['benchmark'] resume = config['resume'] hp = config[ 'hp'] # penalty_k, scale_lr, window_influence, adaptive size (for vot2017 or later) tracker_path = join('test', (benchmark_name + resume.split('/')[-1].split('.')[0] + '_small_size_{:.4f}'.format(hp['small_sz']) + '_big_size_{:.4f}'.format(hp['big_sz']) + '_penalty_k_{:.4f}'.format(hp['penalty_k']) + '_w_influence_{:.4f}'.format(hp['window_influence']) + '_scale_lr_{:.4f}'.format(hp['lr'])).replace( '.', '_')) # no . if not os.path.exists(tracker_path): os.makedirs(tracker_path) if 'VOT' in benchmark_name: baseline_path = join(tracker_path, 'baseline') video_path = join(baseline_path, video['name']) if not os.path.exists(video_path): os.makedirs(video_path) result_path = join(video_path, video['name'] + '_001.txt') else: raise ValueError('Only VOT is supported') # occ for parallel running if not os.path.exists(result_path): fin = open(result_path, 'w') fin.close() else: if benchmark_name.startswith('VOT'): return 0 else: raise ValueError('Only VOT is supported') start_frame, lost_times, toc = 0, 0, 0 regions = [] # result and states[1 init / 2 lost / 0 skip] image_files, gt = video['image_files'], video['gt'] for f, image_file in enumerate(image_files): im = cv2.imread(image_file) if len(im.shape) == 2: im = cv2.cvtColor(im, cv2.COLOR_GRAY2BGR) if f == start_frame: # init cx, cy, w, h = get_axis_aligned_bbox(gt[f]) target_pos = np.array([cx, cy]) target_sz = np.array([w, h]) state = tracker.init(im, target_pos, target_sz, net, hp=hp) # init tracker regions.append([float(1)] if 'VOT' in benchmark_name else gt[f]) elif f > start_frame: # tracking state = tracker.track(state, im) # track location = cxy_wh_2_rect(state['target_pos'], state['target_sz']) b_overlap = poly_iou(gt[f], location) if 'VOT' in benchmark_name else 1 if b_overlap > 0: regions.append(location) else: regions.append([float(2)]) lost_times += 1 start_frame = f + 5 # skip 5 frames else: # skip regions.append([float(0)]) # save results for OTB if benchmark_name.startswith('VOT'): return regions else: raise ValueError('Only VOT is supported')
def track(tracker, net, video, args): start_frame, lost_times, toc = 0, 0, 0 # save result to evaluate if args.epoch_test: suffix = args.resume.split('/')[-1] suffix = suffix.split('.')[0] tracker_path = os.path.join('result', args.dataset, args.arch + suffix) else: tracker_path = os.path.join('result', args.dataset, args.arch) if not os.path.exists(tracker_path): os.makedirs(tracker_path) if 'VOT' in args.dataset: baseline_path = join(tracker_path, 'baseline') video_path = join(baseline_path, video['name']) if not os.path.exists(video_path): os.makedirs(video_path) result_path = join(video_path, video['name'] + '_001.txt') else: result_path = join(tracker_path, '{:s}.txt'.format(video['name'])) if os.path.exists(result_path): return 0 # for mult-gputesting regions = [] # result and states[1 init / 2 lost / 0 skip] image_files, gt = video['image_files'], video['gt'] for f, image_file in enumerate(image_files): im = cv2.imread(image_file) if len(im.shape) == 2: im = cv2.cvtColor(im, cv2.COLOR_GRAY2BGR) tic = cv2.getTickCount() if f == start_frame: # init cx, cy, w, h = get_axis_aligned_bbox(gt[f]) target_pos = np.array([cx, cy]) target_sz = np.array([w, h]) state = tracker.init(im, target_pos, target_sz, net) # init tracker regions.append(1 if 'VOT' in args.dataset else gt[f]) elif f > start_frame: # tracking state = tracker.track(state, im) # track location = cxy_wh_2_rect(state['target_pos'], state['target_sz']) b_overlap = poly_iou(gt[f], location) if 'VOT' in args.dataset else 1 if b_overlap > 0: regions.append(location) else: regions.append(2) lost_times += 1 start_frame = f + 5 # skip 5 frames else: # skip regions.append(0) toc += cv2.getTickCount() - tic toc /= cv2.getTickFrequency() with open(result_path, "w") as fin: if 'VOT' in args.dataset: for x in regions: if isinstance(x, int): fin.write("{:d}\n".format(x)) else: p_bbox = x.copy() fin.write(','.join([str(i) for i in p_bbox]) + '\n') else: for x in regions: p_bbox = x.copy() fin.write(','.join([ str(i + 1) if idx == 0 or idx == 1 else str(i) for idx, i in enumerate(p_bbox) ]) + '\n') print('Video: {:12s} Time: {:2.1f}s Speed: {:3.1f}fps Lost: {:d}'.format( video['name'], toc, f / toc, lost_times)) return lost_times
def track_tune(tracker, net, video, config): arch = config['arch'] benchmark_name = config['benchmark'] resume = config['resume'] hp = config['hp'] # scale_step, scale_penalty, scale_lr, window_influence tracker_path = join('test', (benchmark_name + resume.split('/')[-1].split('.')[0] + '_step_{:.4f}'.format(hp['scale_step']) + '_penalty_s_{:.4f}'.format(hp['scale_penalty']) + '_w_influence_{:.4f}'.format(hp['w_influence']) + '_scale_lr_{:.4f}'.format(hp['scale_lr'])).replace( '.', '_')) # no . if not os.path.exists(tracker_path): os.makedirs(tracker_path) result_path = join(tracker_path, '{:s}.txt'.format(video['name'])) # occ for parallel running if not os.path.exists(result_path): fin = open(result_path, 'w') fin.close() else: return tracker_path start_frame, lost_times, toc = 0, 0, 0 regions = [] # result and states[1 init / 2 lost / 0 skip] image_files, gt = video['image_files'], video['gt'] for f, image_file in enumerate(image_files): im = cv2.imread(image_file) if len(im.shape) == 2: im = cv2.cvtColor(im, cv2.COLOR_GRAY2BGR) if f == start_frame: # init cx, cy, w, h = get_axis_aligned_bbox(gt[f]) target_pos = np.array([cx, cy]) target_sz = np.array([w, h]) state = tracker.init(im, target_pos, target_sz, net, hp=hp) # init tracker location = cxy_wh_2_rect(state['target_pos'], state['target_sz']) regions.append([float(1)] if 'VOT' in benchmark_name else gt[f]) elif f > start_frame: # tracking state = tracker.track(state, im) # track location = cxy_wh_2_rect(state['target_pos'], state['target_sz']) b_overlap = poly_iou(gt[f], location) if 'VOT' in benchmark_name else 1 if b_overlap > 0: regions.append(location) else: regions.append([float(2)]) lost_times += 1 start_frame = f + 5 # skip 5 frames else: # skip regions.append([float(0)]) if benchmark_name.startswith('VOT'): return regions elif benchmark_name.startswith('OTB'): with open(result_path, "w") as fin: for x in regions: p_bbox = x.copy() fin.write(','.join([ str(i + 1) if idx == 0 or idx == 1 else str(i) for idx, i in enumerate(p_bbox) ]) + '\n') return tracker_path else: raise ValueError('not supported')
def track(siam_tracker, online_tracker, siam_net, video, args): start_frame, toc = 0, 0 # save result to evaluate if args.epoch_test: suffix = args.resume.split('/')[-1] suffix = suffix.split('.')[0] tracker_path = os.path.join('result', args.dataset, args.arch + suffix) else: tracker_path = os.path.join('result', args.dataset, args.arch) if not os.path.exists(tracker_path): os.makedirs(tracker_path) if 'VOT' in args.dataset: baseline_path = os.path.join(tracker_path, 'baseline') video_path = os.path.join(baseline_path, video['name']) if not os.path.exists(video_path): os.makedirs(video_path) result_path = os.path.join(video_path, video['name'] + '_001.txt') else: result_path = os.path.join(tracker_path, '{:s}.txt'.format(video['name'])) if os.path.exists(result_path): return # for mult-gputesting regions = [] lost = 0 image_files, gt = video['image_files'], video['gt'] for f, image_file in enumerate(image_files): im = cv2.imread(image_file) rgb_im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) if len(im.shape) == 2: im = cv2.cvtColor(im, cv2.COLOR_GRAY2BGR) # align with training tic = cv2.getTickCount() if f == start_frame: # init cx, cy, w, h = get_axis_aligned_bbox(gt[f]) target_pos = np.array([cx, cy]) target_sz = np.array([w, h]) state = siam_tracker.init(im, target_pos, target_sz, siam_net) # init tracker if args.online: online_tracker.init(im, rgb_im, siam_net, target_pos, target_sz, True, dataname=args.dataset, resume=args.resume) # location = cxy_wh_2_rect(state['target_pos'], state['target_sz']) regions.append(1 if 'VOT' in args.dataset else gt[f]) elif f > start_frame: # tracking if args.online: state = online_tracker.track(im, rgb_im, siam_tracker, state) else: state = siam_tracker.track(state, im) location = cxy_wh_2_rect(state['target_pos'], state['target_sz']) b_overlap = poly_iou(gt[f], location) if 'VOT' in args.dataset else 1 if b_overlap > 0: regions.append(location) else: regions.append(2) start_frame = f + 5 lost += 1 else: regions.append(0) toc += cv2.getTickCount() - tic with open(result_path, "w") as fin: if 'VOT' in args.dataset: for x in regions: if isinstance(x, int): fin.write("{:d}\n".format(x)) else: p_bbox = x.copy() fin.write(','.join([str(i) for i in p_bbox]) + '\n') elif 'OTB' in args.dataset or 'LASOT' in args.dataset: for x in regions: p_bbox = x.copy() fin.write(','.join([ str(i + 1) if idx == 0 or idx == 1 else str(i) for idx, i in enumerate(p_bbox) ]) + '\n') elif 'VISDRONE' in args.dataset or 'GOT10K' in args.dataset or 'TN' in args.dataset: for x in regions: p_bbox = x.copy() fin.write(','.join([str(i) for idx, i in enumerate(p_bbox)]) + '\n') toc /= cv2.getTickFrequency() print('Video: {:12s} Time: {:2.1f}s Speed: {:3.1f}fps Lost {}'.format( video['name'], toc, f / toc, lost))
def track(video, args): start_frame, toc = 0, 0 # save result to evaluate tracker_path = os.path.join('result', args.dataset) if not os.path.exists(tracker_path): os.makedirs(tracker_path) if 'VOT' in args.dataset: baseline_path = os.path.join(tracker_path, 'baseline') video_path = os.path.join(baseline_path, video['name']) if not os.path.exists(video_path): os.makedirs(video_path) result_path = os.path.join(video_path, video['name'] + '_001.txt') else: result_path = os.path.join(tracker_path, '{:s}.txt'.format(video['name'])) if os.path.exists(result_path): return # for mult-gputesting regions = [] lost = 0 # for rgbt splited test in_image_files, rgb_image_files, gt = video['infrared_imgs'], video[ 'visiable_imgs'], video['gt'] for f, in_f in enumerate(in_image_files): in_im = cv2.imread(in_f) in_im = cv2.cvtColor(in_im, cv2.COLOR_BGR2RGB) # align with training rgb_im = cv2.imread(rgb_image_files[f]) rgb_im = cv2.cvtColor(rgb_im, cv2.COLOR_BGR2RGB) # align with training tic = cv2.getTickCount() if f == start_frame: # init print('===============> init tracker') tracker = AASTracker(rgb_im, in_im, gt[f]) regions.append(1) elif f > start_frame: # tracking state = tracker.track(rgb_im, in_im) pos = np.array([state[0], state[1]]) sz = np.array([state[2], state[3]]) location = cxy_wh_2_rect(pos, sz) b_overlap = poly_iou(gt[f], location) if 'VOT' in args.dataset else 1 if b_overlap > 0: regions.append(location) else: regions.append(2) start_frame = f + 5 lost += 1 else: regions.append(0) toc += cv2.getTickCount() - tic with open(result_path, "w") as fin: if 'VOT' in args.dataset: for x in regions: if isinstance(x, int): fin.write("{:d}\n".format(x)) else: p_bbox = x.copy() fin.write(','.join([str(i) for i in p_bbox]) + '\n') toc /= cv2.getTickFrequency() print('Video: {:12s} Time: {:2.1f}s Speed: {:3.1f}fps Lost {}'.format( video['name'], toc, f / toc, lost))
def track_tune(tracker, net, video, config): arch = config['arch'] benchmark_name = config['benchmark'] resume = config['resume'] hp = config['hp'] # scale_step, scale_penalty, scale_lr, window_influence tracker_path = join('test', (benchmark_name + resume.split('/')[-1].split('.')[0] + '_small_size_{:.4f}'.format(hp['small_sz']) + '_big_size_{:.4f}'.format(hp['big_sz']) + '_lambda_u_{:.4f}'.format(hp['choose_thr']) + '_lambda_s_{:.4f}'.format(hp['choose_thr']) + '_cyclic_thr_{:.4f}'.format(hp['choose_thr']) + '_choose_thr_{:.4f}'.format(hp['choose_thr']) + '_penalty_k_{:.4f}'.format(hp['penalty_k']) + '_w_influence_{:.4f}'.format(hp['window_influence']) + '_scale_lr_{:.4f}'.format(hp['lr'])).replace('.', '_')) # no . if not os.path.exists(tracker_path): os.makedirs(tracker_path) if 'VOT' in benchmark_name: baseline_path = join(tracker_path, 'baseline') video_path = join(baseline_path, video['name']) if not os.path.exists(video_path): os.makedirs(video_path) result_path = join(video_path, video['name'] + '_001.txt') elif 'GOT10K' in benchmark_name: re_video_path = os.path.join(tracker_path, video['name']) if not exists(re_video_path): os.makedirs(re_video_path) result_path = os.path.join(re_video_path, '{:s}.txt'.format(video['name'])) else: result_path = join(tracker_path, '{:s}.txt'.format(video['name'])) # occ for parallel running if not os.path.exists(result_path): fin = open(result_path, 'w') fin.close() else: if benchmark_name.startswith('OTB'): return tracker_path elif benchmark_name.startswith('VOT') or benchmark_name.startswith('GOT10K'): return 0 else: print('benchmark not supported now') return start_frame, lost_times, toc = 0, 0, 0 regions = [] # result and states[1 init / 2 lost / 0 skip] # for rgbt splited test image_files, gt = video['image_files'], video['gt'] for f, image_file in enumerate(image_files): im = cv2.imread(image_file) if len(im.shape) == 2: im = cv2.cvtColor(im, cv2.COLOR_GRAY2BGR) if f == start_frame: # init cx, cy, w, h = get_axis_aligned_bbox(gt[f]) target_pos = np.array([cx, cy]) target_sz = np.array([w, h]) mask_gt = None state = tracker.init(im, target_pos, target_sz, net, online=False, mask=mask_gt, debug=False, hp=hp) # init tracker location = cxy_wh_2_rect(state['target_pos'], state['target_sz']) regions.append([float(1)] if 'VOT' in benchmark_name else gt[f]) elif f > start_frame: # tracking state = tracker.track(state, im) # track location = cxy_wh_2_rect(state['target_pos'], state['target_sz']) b_overlap = poly_iou(gt[f], location) if 'VOT' in benchmark_name else 1 polygon = state['polygon'] if not polygon is None: polygon = [polygon[0][0], polygon[0][1], polygon[1][0], polygon[1][1], polygon[2][0], polygon[2][1], polygon[3][0], polygon[3][1]] polygon = np.array(polygon) else: x1, y1, w, h = location x2, y2 = x1 + w, y1 + h polygon = np.array([x1, y1, x2, y1, x2, y2, x1, y2]) if poly_iou(np.array(location), np.array(polygon)) > state['choose_thr']: record = polygon else: x1, y1, w, h = location x2, y2 = x1 + w, y1 + h polygon = np.array([x1, y1, x2, y1, x2, y2, x1, y2]) record = polygon if not 'VOT' in benchmark_name: # change polygon to [x, y, w, h] x1, y1, x2, y2 = record[0], record[1], record[4], record[5] record = np.array([x1, y1, x2 - x1 + 1, y2 - y1 + 1]) if b_overlap > 0: regions.append(record) else: regions.append([float(2)]) lost_times += 1 start_frame = f + 5 # skip 5 frames else: # skip regions.append([float(0)]) # save results for OTB if 'OTB' in benchmark_name or 'LASOT' in benchmark_name: with open(result_path, "w") as fin: for x in regions: p_bbox = x.copy() fin.write( ','.join([str(i + 1) if idx == 0 or idx == 1 else str(i) for idx, i in enumerate(p_bbox)]) + '\n') elif 'VISDRONE' in benchmark_name or 'GOT10K' in benchmark_name: with open(result_path, "w") as fin: for x in regions: p_bbox = x.copy() fin.write(','.join([str(i) for idx, i in enumerate(p_bbox)]) + '\n') elif 'VOT' in benchmark_name: with open(result_path, "w") as fin: for x in regions: if isinstance(x, int): fin.write("{:d}\n".format(x)) else: p_bbox = x.copy() fin.write(','.join([str(i) for i in p_bbox]) + '\n') if 'OTB' in benchmark_name or 'VIS' in benchmark_name or 'VOT' in benchmark_name or 'GOT10K' in benchmark_name: return tracker_path else: print('benchmark not supported now')
def track_box(siam_tracker, online_tracker, siam_net, video, args): """ track a benchmark with only box annoated attention: not for benchmark evaluation, just a demo """ tracker_path = os.path.join('result', args.dataset, args.arch) if 'VOT' in args.dataset: baseline_path = os.path.join(tracker_path, 'baseline') video_path = os.path.join(baseline_path, video['name']) if not os.path.exists(video_path): os.makedirs(video_path) result_path = os.path.join(video_path, video['name'] + '_001.txt') else: result_path = os.path.join(tracker_path, '{:s}.txt'.format(video['name'])) if os.path.exists(result_path): return # for mult-gputesting regions = [] b_overlaps, b_overlaps2, b_overlaps3 = [], [], [] lost = 0 start_frame, toc = 0, 0 image_files, gt = video['image_files'], video['gt'] for f, image_file in enumerate(image_files): im = cv2.imread(image_file) if args.online: rgb_im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) if len(im.shape) == 2: im = cv2.cvtColor(im, cv2.COLOR_GRAY2BGR) # align with training tic = cv2.getTickCount() if f == start_frame: # init cx, cy, w, h = get_axis_aligned_bbox(gt[f]) target_pos = np.array([cx, cy]) target_sz = np.array([w, h]) mask_gt = None state = siam_tracker.init(im, target_pos, target_sz, siam_net, online=args.online, mask=mask_gt, debug=args.debug) # init siamese tracker if args.online: online_tracker.init(im, rgb_im, siam_net, target_pos, target_sz, True, dataname=args.dataset, resume=args.resume) elif f > start_frame: # tracking if args.online: state = online_tracker.track(im, rgb_im, siam_tracker, state) else: state = siam_tracker.track(state, im, name=image_file) mask = state['mask'] location = cxy_wh_2_rect(state['target_pos'], state['target_sz']) b_overlap = poly_iou(gt[f], location) if 'VOT' in args.dataset else 1 polygon = state['polygon'] if not polygon is None: polygon = [polygon[0][0], polygon[0][1], polygon[1][0], polygon[1][1], polygon[2][0], polygon[2][1], polygon[3][0], polygon[3][1]] polygon = np.array(polygon) # b_overlap2 = poly_iou(gt[f], polygon) else: x1, y1, w, h = location x2, y2 = x1 + w, y1 + h polygon = np.array([x1, y1, x2, y1, x2, y2, x1, y2]) if poly_iou(np.array(location), np.array(polygon)) > state['choose_thr']: record = polygon # b_overlaps3.append(b_overlap2) else: x1, y1, w, h = location x2, y2 = x1 + w, y1 + h polygon = np.array([x1, y1, x2, y1, x2, y2, x1, y2]) record = polygon # b_overlaps3.append(b_overlap) # print('b_overlap: {}, b_overlap2: {}'.format(b_overlap, b_overlap2)) # b_overlaps.append(b_overlap) # b_overlaps2.append(b_overlap2) if not 'VOT' in benchmark_name: # change polygon to [x, y, w, h] x1, y1, x2, y2 = record[0], record[1], record[4], record[5] record = np.array([x1, y1, x2 - x1 + 1, y2 - y1 + 1]) if b_overlap > 0: regions.append(record) else: regions.append(2) start_frame = f + 5 lost += 1 if args.vis: COLORS = np.random.randint(128, 255, size=(1, 3), dtype="uint8") COLORS = np.vstack([[0, 0, 0], COLORS]).astype("uint8") mask = COLORS[mask] output = ((0.4 * im) + (0.6 * mask)).astype("uint8") cv2.imshow("mask", output) cv2.waitKey(1) toc += cv2.getTickCount() - tic # print('b_overlap: {}, b_overlap2: {}, b_overlap3: {}'.format(np.array(b_overlaps).mean(), np.array(b_overlaps2).mean(), np.array(b_overlaps3).mean())) with open(result_path, "w") as fin: if 'VOT' in args.dataset: for x in regions: if isinstance(x, int): fin.write("{:d}\n".format(x)) else: p_bbox = x.copy() fin.write(','.join([str(i) for i in p_bbox]) + '\n') elif 'OTB' in args.dataset or 'LASOT' in args.dataset: for x in regions: p_bbox = x.copy() fin.write( ','.join([str(i + 1) if idx == 0 or idx == 1 else str(i) for idx, i in enumerate(p_bbox)]) + '\n') elif 'VISDRONE' in args.dataset or 'GOT10K' in args.dataset: for x in regions: p_bbox = x.copy() fin.write(','.join([str(i) for idx, i in enumerate(p_bbox)]) + '\n') toc /= cv2.getTickFrequency() print('Video: {:12s} Time: {:2.1f}s Speed: {:3.1f}fps'.format(video['name'], toc, f / toc))