class SiamFC(BaseTracker): def __init__(self): super(SiamFC, self).__init__("SiamFC") # TODO: edit this path self.net_file = path_config.SIAMFC_MODEL self.tracker = TrackerSiamFC(net_path=self.net_file) def initialize(self, image_file, box): image = Image.open(image_file).convert("RGB") self.tracker.init(image, box) def track(self, image_file): image = Image.open(image_file).convert("RGB") return self.tracker.update(image)
def __init__(self, siamese_model_path, unet_path, dataset_path, use_cuda, new_w, new_h): self.dataset_path = dataset_path self.unet_path = unet_path self.new_w = new_w self.new_h = new_h self.tracker = TrackerSiamFC(net_path=siamese_model_path, use_cuda=use_cuda) if unet_path is not None: print("Loading pretrained model") self.seg_net, pretrained = self.load_unet(), True else: print("Did not load pretrained model") self.seg_net, pretrained = None, False self.train_images = None self.tracks = {} self.track_count = 0 self.result = [] self.set_01, self.set_02 = None, None
def main(args): print("Initialising Tracker and Segmentation network") img_shape = get_img_size(args.dataset_path, args.sequence) track_model = TrackerSiamFC(net_path=args.siamese_path, use_cuda=args.use_cuda) seg_model = unet_model(img_shape, args.unet_path) imgs = load_images(args.dataset_path, args.sequence, args.img_extension) sorted_img_indx = sorted(imgs.keys(), key=natural_keys) if args.debug or args.debug_v2: if args.debug: sorted_img_indx = sorted_img_indx[ int(len(sorted_img_indx) / 2):int(len(sorted_img_indx) / 2) + 3] else: sorted_img_indx = sorted_img_indx[:2] print("Debugging conditions set") frames = dict() if args.save_model_preds_to_file is not None: print("Creating '{}' to save model predictions".format( args.save_model_preds_to_file)) args.save_model_preds_to_file = os.path.join( args.dataset_path, args.save_model_preds_to_file) prepare_dir(args.save_model_preds_to_file) if args.load_model_preds_from_file is not None: args.load_model_preds_from_file = os.path.join( args.dataset_path, args.load_model_preds_from_file) print("Getting segmentation") if args.get_initial_seg_from_lux: dataset_name = args.dataset_path.split('/')[-1] if not args.dataset_path[-1] == '/' else \ args.dataset_path.split('/')[ -2] print("Getting initial segmentation for {}".format(dataset_name)) predict_dataset( name=dataset_name, sequence="0{}".format(args.sequence), model_path='model_weights/unet_{}.h5'.format(dataset_name), output_path="mulux_0{}".format(args.sequence)) predict_dataset_2(path="mulux_0{}".format(args.sequence), output_path="mulux_0{}".format(args.sequence)) for img_indx_id, img_name in enumerate( sorted(glob.glob("mulux_0{}/mask*.{}".format( args.sequence, args.img_extension)), key=natural_keys)): frames[sorted_img_indx[img_indx_id]] = get_frame( cv2.imread(img_name, cv2.IMREAD_ANYDEPTH)) # shutil.rmtree("mulux_0{}".format(args.sequence), ignore_errors=True) elif args.load_segs_from_file is None: for img_id, img_indx in enumerate(sorted_img_indx): if args.load_model_preds_from_file is not None: img_path = os.path.join( args.load_model_preds_from_file, "{}{}.tif".format(args.preds_extension, img_indx)) pred = cv2.imread(img_path, cv2.IMREAD_ANYDEPTH) if pred is None or (pred is not None and np.sum(pred) == 0): print(img_path) assert pred is not None else: pred = get_segmentation(imgs[img_indx], seg_model) if args.load_preds_as_are: frames[img_indx] = pred else: frames[img_indx] = get_frame(get_labels(pred)) if args.save_model_preds_to_file is not None: img_path_name = os.path.join(args.save_model_preds_to_file, "{}.tif".format(img_indx)) cv2.imwrite(img_path_name, pred.astype(np.uint8)) else: load_segs_dir = os.path.join(args.dataset_path, args.load_segs_from_file) imgs_list = sorted(make_list_of_imgs_only(os.listdir(load_segs_dir), args.img_extension), key=natural_keys) print("Reading segs from: {}".format(load_segs_dir)) for img_indx_id, img_name in enumerate(imgs_list): img_path = os.path.join(load_segs_dir, img_name) frames[sorted_img_indx[img_indx_id]] = get_frame( cv2.imread(img_path, cv2.IMREAD_ANYDEPTH)) if args.save_segs_to_file is not None: save_segs_dir = os.path.join(args.dataset_path, args.save_segs_to_file) prepare_dir(save_segs_dir) print("Saving segs to: {}".format(save_segs_dir)) for img_id, img_indx in enumerate(sorted_img_indx): img_path_name = os.path.join( save_segs_dir, "{}.{}".format(img_indx, args.img_extension)) cv2.imwrite(img_path_name, frames[img_indx].astype(np.uint8)) exit() do_siamese_tracking(frames, imgs, sorted_img_indx, track_model, args)
def main(mode='IR', visulization=False): assert mode in ['IR', 'RGB'], 'Only Support IR or RGB to evalute' # setup tracker net_path = 'model.pth' tracker = TrackerSiamFC(net_path=net_path) # setup experiments video_paths = glob.glob(os.path.join('dataset', 'test-dev', '*')) video_num = len(video_paths) output_dir = os.path.join('results', tracker.name) if not os.path.exists(output_dir): os.makedirs(output_dir) overall_performance = [] # run tracking experiments and report performance for video_id, video_path in enumerate(video_paths, start=1): video_name = os.path.basename(video_path) video_file = os.path.join(video_path, '%s.mp4' % mode) res_file = os.path.join(video_path, '%s_label.json' % mode) with open(res_file, 'r') as f: label_res = json.load(f) init_rect = label_res['gt_rect'][0] capture = cv2.VideoCapture(video_file) frame_id = 0 out_res = [] while True: ret, frame = capture.read() if not ret: capture.release() break if frame_id == 0: tracker.init(frame, init_rect) # initialization out = init_rect out_res.append(init_rect) else: out = tracker.update(frame) # tracking out_res.append(out.tolist()) if visulization: _gt = label_res['gt_rect'][frame_id] _exist = label_res['exist'][frame_id] if _exist: cv2.rectangle(frame, (int(_gt[0]), int(_gt[1])), (int(_gt[0] + _gt[2]), int(_gt[1] + _gt[3])), (0, 255, 0)) cv2.putText(frame, 'exist' if _exist else 'not exist', (frame.shape[1] // 2 - 20, 30), 1, 2, (0, 255, 0) if _exist else (0, 0, 255), 2) cv2.rectangle(frame, (int(out[0]), int(out[1])), (int(out[0] + out[2]), int(out[1] + out[3])), (0, 255, 255)) cv2.imshow(video_name, frame) cv2.waitKey(1) frame_id += 1 if visulization: cv2.destroyAllWindows() # save result output_file = os.path.join(output_dir, '%s_%s.txt' % (video_name, mode)) with open(output_file, 'w') as f: json.dump({'res': out_res}, f) mixed_measure = eval(out_res, label_res) overall_performance.append(mixed_measure) print('[%03d/%03d] %20s %5s Fixed Measure: %.03f' % (video_id, video_num, video_name, mode, mixed_measure)) print('[Overall] %5s Mixed Measure: %.03f\n' % (mode, np.mean(overall_performance)))
from __future__ import absolute_import import os os.environ["CUDA_VISIBLE_DEVICES"] = "0" import glob import numpy as np import torch from siamfc import TrackerSiamFC if __name__ == '__main__': print(torch.cuda.is_available()) seq_dir = os.path.expanduser('D:\Dataset\OTB100\Crossing\\') #seq_dir='F:\\data\\OTB100\\Crossing\img' img_files = sorted(glob.glob(seq_dir + 'img/*.jpg')) #img_files是视频序列 glob.glob获取指定目录下的所有jpg文件,再进行排序 #img_files[0]='F:\\data\\OTB100\\Crossing\\img\\0001.jpg' 依次类推 # img_files = sorted(glob.glob('F:\data\OTB100\Crossing\img/*.jpg')) #这个跟上面一样 anno = np.loadtxt(seq_dir + 'groundtruth_rect.txt', delimiter=',') #anno[0]=array([205.,151.,17.,50.])就是第一帧中的groundtruth,依此类推 net_path = 'pretrained/siamfc_alexnet_e42.pth' tracker = TrackerSiamFC(net_path=net_path) tracker.track(img_files, anno[0], visualize=True) #传入120帧图片以及第一个groundtruth,进行可视化
'Bolt2', 'Boy', 'Car2', 'Car24', 'Coke', 'Coupon', 'Crossing', 'Dancer', 'Dancer2', 'David2', 'David3', 'Dog', 'Dog1', 'Doll', 'FaceOcc1', 'FaceOcc2', 'Fish', 'FleetFace', 'Football1', 'Freeman1', 'Freeman3', 'Girl2', 'Gym', 'Human2', 'Human5', 'Human7', 'Human8', 'Jogging', 'KiteSurf', 'Lemming', 'Man', 'Mhyang', 'MountainBike', 'Rubik', 'Singer1', 'Skater', 'Skater2', 'Subway', 'Suv', 'Tiger1', 'Toy', 'Trans', 'Twinnings', 'Vase' ] if __name__ == '__main__': nbins_iou = 21 nbins_ce = 51 video_path = 'E:\\xxx\\OTB2015\\Bolt' img_files = sorted(glob.glob(os.path.join(video_path, 'img/*.jpg'))) anno_files = glob.glob(os.path.join(video_path, 'groundtruth_rect*.txt')) with open(anno_files[0], 'r') as f: anno = np.loadtxt(io.StringIO(f.read().replace(',', ' '))) net_path = './pretrained/model.pth' tracker = TrackerSiamFC(net_path=net_path) boxes, _, fps = tracker.track(img_files, anno[0, :], visualize=True, debug=False, gt=anno) ious, center_errors = _calc_metrics(boxes, anno) succ_curve, prec_curve = _calc_curves(ious, center_errors) print('OP is {:.3f},DP is {:.3f},AUC is {:.3f},fps is {:.3f}'.format( len(ious[ious > 0.5]) / len(ious), prec_curve[20], np.mean(succ_curve), fps))
from __future__ import absolute_import import os import glob import numpy as np from siamfc import TrackerSiamFC if __name__ == '__main__': seq_dir = os.path.expanduser('E:/Datasets/OTB100/hongwai/') img_files = sorted(glob.glob(seq_dir + 'img/*.jpg')) anno = np.loadtxt(seq_dir + 'groundtruth_rect.txt', delimiter=',') net_path = 'C:/Users/however/Desktop/siamfc-pytorch-master/pretrained/siamfc_alexnet_e49.pth' tracker = TrackerSiamFC(net_path=net_path) tracker.track(img_files, anno[0], visualize=True)
from __future__ import absolute_import import os from got10k.experiments import * from siamfc import TrackerSiamFC if __name__ == '__main__': net_path = 'models/siamfc_alexnet_e50.pth'# tracker = TrackerSiamFC(net_path=net_path) #初始化一个追踪器 # root_dir = os.path.abspath('datasets/OTB') # e = ExperimentOTB(root_dir, version=2013) root_dir = os.path.abspath('datasets/OTB') e = ExperimentOTB(root_dir, version=2015) # root_dir = os.path.abspath('datasets/UAV123') # e = ExperimentUAV123(root_dir, version='UAV123') # root_dir = os.path.abspath('datasets/UAV123') # e = ExperimentUAV123(root_dir, version='UAV20L') # root_dir = os.path.abspath('datasets/DTB70') # e = ExperimentDTB70(root_dir) # root_dir = os.path.abspath('datasets/UAVDT') # e = ExperimentUAVDT(root_dir) # root_dir = os.path.abspath('datasets/VisDrone')
pair_dataset = Pairwise(seq_got_dataset) + Pairwise(seq_vid_dataset) print(len(pair_dataset)) # setup data loader cuda = torch.cuda.is_available() loader = DataLoader(pair_dataset, batch_size = config.batch_size, shuffle = True, pin_memory = cuda, drop_last = True, num_workers= config.num_workers) # setup tracker net_path = 'model2/model_e13.pth' tracker = TrackerSiamFC() # training loop for epoch in range(config.epoch_num): train_loss = [] for step, batch in enumerate(tqdm(loader)): # loss = tracker.step(batch, # backward=True, # update_lr=(step == 0)) # train_loss.append(loss) # sys.stdout.flush() test = step # save checkpoint
seq_dataset = ImageNetVID(root_dir, subset=('train', 'val')) else: raise NotImplementedError pair_dataset = Pairwise(seq_dataset) # setup data loader loader = DataLoader(pair_dataset, batch_size=opt.batch_size, shuffle=True, pin_memory=opt.cuda, drop_last=True, num_workers=opt.num_workers) # setup tracker tracker = TrackerSiamFC(name=opt.name, weight=opt.weight, device=opt.device) # training loop itr = 0 num_itrs = int((opt.num_epochs * len(loader)) / opt.print_freq) + 1 loss_logger = Logger(os.path.join(opt.log_dir, 'loss.csv'), num_itrs) loss_meter = AverageMeter() for epoch in range(opt.num_epochs): for step, batch in enumerate(loader): loss = tracker.step(batch, backward=True, update_lr=(step == 0)) itr += 1 loss_meter.update(loss) if itr % opt.print_freq == 0: print('Epoch [{}/{}] itr [{}]: Loss: {:.5f}'.format(
subset=('train', 'val')) pair_dataset = Pairwise(seq_got_dataset) + Pairwise(seq_vid_dataset) print(len(pair_dataset)) # setup data loader cuda = torch.cuda.is_available() loader = DataLoader(pair_dataset, batch_size=config.batch_size, shuffle=True, pin_memory=cuda, drop_last=True, num_workers=config.num_workers) # setup tracker tracker = TrackerSiamFC() # training loop for epoch in range(config.epoch_num): for step, batch in enumerate(loader): loss = tracker.step(batch, backward=True, update_lr=(step == 0)) if step % config.show_step == 0: print('Epoch [{}][{}/{}]: Loss: {:.3f}'.format( epoch + 1, step + 1, len(loader), loss)) sys.stdout.flush() # save checkpoint net_path = os.path.join('model', 'model_e%d.pth' % (epoch + 1)) torch.save(tracker.net.state_dict(), net_path)
def main(dataset, data_time, detector): path_result = os.path.join('results', data_time + '_' + detector, dataset) os.makedirs(path_result, exist_ok=True) # initialize detector device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") model = torchvision.models.detection.fasterrcnn_resnet50_fpn( pretrained=True) # model = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=True) model.to(device=device) model.eval() # load background img_bkgd_bev = cv2.imread('calibration/' + dataset + '_background_calibrated.png') # load transformation matrix transform_cam2world = np.loadtxt('calibration/' + dataset + '_matrix_cam2world.txt') # open video of dataset if dataset == 'oxford_town': cap = cv2.VideoCapture(os.path.join('datasets', 'TownCentreXVID.avi')) frame_skip = 10 # oxford town dataset has fps of 25 thr_score = 0.9 elif dataset == 'oxford_town_group': cap = cv2.VideoCapture(os.path.join('datasets', 'TownCentreXVID.avi')) path_track_frames = os.path.join(os.getcwd(), 'datasets', 'dataset_tracks', 'TownCentre', 'img1') frame_images = sorted(glob.glob(path_track_frames + '\****.jpg')) net_path = os.path.join(os.getcwd(), 'tracker', 'siamfc_pytorch', 'tools', 'pretrained\siamfc_alexnet_e50.pth') tracker = TrackerSiamFC(net_path=net_path) frame_skip = 10 # oxford town dataset has fps of 25 thr_score = 0.9 elif dataset == 'mall': cap = cv2.VideoCapture(os.path.join('datasets', 'mall.mp4')) frame_skip = 1 thr_score = 0.9 elif dataset == 'grand_central': cap = cv2.VideoCapture(os.path.join('datasets', 'grandcentral.avi')) frame_skip = 25 # grand central dataset has fps of 25 thr_score = 0.5 else: raise Exception('Invalid Dataset') # f = open(os.path.join(path_result, 'statistics.txt'), 'w') statistic_data = [] i_frame = 0 # while cap.isOpened() and i_frame < 5000: while cap.isOpened() and i_frame <= 7450: ret, img = cap.read() print("at frame " + str(i_frame) + "------") if ret is False: break if i_frame % frame_skip == 0: #only run the social distancing system every 10 frames. #ret, img = cap.read() # print('Frame %d - ' % i_frame) # if i_frame > 50: # break # skip frames to achieve 1hz detection # if not i_frame % frame_skip == 0: # conduct detection per second # i_frame += 1 # continue #vis = True if i_frame <= 3000: # if i_frame / frame_skip < 20: vis = True else: vis = False # counting process time t0 = time.time() # convert image from OpenCV format to PyTorch tensor format img_t = np.moveaxis(img, -1, 0) / 255 img_t = torch.tensor(img_t, device=device).float() # pedestrian detection predictions = model([img_t]) boxes = predictions[0]['boxes'].cpu().data.numpy() classIDs = predictions[0]['labels'].cpu().data.numpy() scores = predictions[0]['scores'].cpu().data.numpy() box_id = [0] * len(boxes) # array to hold box ids for tracking #box 1 at (x1,y1), (x2,y2) #box 2 at (x1,y1), (x2,y2) #reg box_1 array - box_2 array # get positions and plot on raw image pts_world = [] iter_tracks = [] for i in range(len(boxes)): ##if class is a person and threshold is met if classIDs[i] == 1 and scores[i] > thr_score: # extract the bounding box coordinates (x1, y1) = (boxes[i][0], boxes[i][1]) (x2, y2) = (boxes[i][2], boxes[i][3]) #detector gives coords x1 ,y1, x2, y2 #convert these coords to tracker input #input for tracker is a bounding box [x1,y1, width, height] track_box_in = [ boxes[i][0], boxes[i][1], boxes[i][2] - boxes[i][0], boxes[i][3] - boxes[i][1] ] #adjust so input images are 3 frames at i, i +5, i +10 rather than the whole set track_images = [] for z in range(4): #number of frames to prepare if i + 5 * z < len(frame_images): track_images.append(frame_images[i_frame + 5 * z]) ##step 1 tracker curr_track = tracker.track(track_images, track_box_in) ##assign labels to the bounding boxes ##label box = x box_id[i] = i + 1 iter_tracks.append(curr_track) #run tracker on each box ################################################ #(if box_id > 0 run tracker) #takes box (pixel coord) #10 frames skip #output is a tracklet area of the coord in each frame #if ( box_id[i] > 0): #convert coord of tracket to real #run regression on coord of traklet if their is a violation #regression confidence is high that difference is low that means no violation #regress(y1,y2 -> x1,x2 from the difference array ############################################################################## if vis: # draw a bounding box rectangle and label on the image cv2.rectangle(img, (x1, y1), (x2, y2), [0, 0, 255], 2) text = "{}: {:.2f}".format(LABELS[classIDs[i]], scores[i], box_id[i]) cv2.putText(img, text, (int(x1), int(y1) - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, [0, 255, 0], 2) # find the bottom center position and convert it to world coordinate p_c = np.array([[(x1 + x2) / 2], [y2], [1]]) p_w = transform_cam2world @ p_c p_w = p_w / p_w[2] pts_world.append([p_w[0][0], p_w[1][0]]) ## convert all tracks coords to real world track_btm_cntr = np.zeros( (len(iter_tracks), 4, 3)) ##to hold bounding boxes adjusted to bottom center coord track_world = np.zeros( (len(iter_tracks), 4, 3)) ##to hold real world coord ## for each track iterate through each bounding box in a track and convert it to realworld coord ## foll steps above in which p_c and p_w are calculated for w in range(len(iter_tracks)): for u in range( 4 ): #add each of the boxes from the 4 frame of the track row_converted = np.array([[ (iter_tracks[w][0][u][0] + iter_tracks[w][0][u][0] + iter_tracks[w][0][u][2]) / 2 ], [iter_tracks[w][0][u][1] + iter_tracks[w][0][u][3]], [1]]) track_btm_cntr[w][u] = [ row_converted[0], row_converted[1], row_converted[2] ] track_world[w][ u] = transform_cam2world @ track_btm_cntr[w][u] track_world[w][ u] = track_world[w][u] / track_world[w][u][2] #get every combination of difference between each track #because difference between track i and track j is the just the negative of the difference of track j and i #only store i - j track_differences = {(w, u): 0 for w in range(len(track_world) - 1) for u in range(1 + w, len(track_world))} for w in range(len(track_differences)): for u in range(w + 1, len(track_world)): track_diff_w = track_world[w, :, :2] track_diff_u = track_world[u, :, :2] track_diff = track_diff_w - track_diff_u track_differences[w, u] = track_diff ##regress each item in the difference dictionary against 0. If the p > 0.05 we fail to reject that their ##there is a difference between two tracks (that is to say they are walking together) #holds the outcome for a track pair (i,j) if they are a group or not track_regression_out = {(w, u): 0 for w in range(len(track_world) - 1) for u in range(1 + w, len(track_world))} for pair in track_differences: pair_x = track_differences[pair][:, 0].reshape(-1, 1) pair_y = track_differences[pair][:, 1].reshape(-1, 1) pair_norm = [0, 0, 0, 0] x_sample = [1, 2, 3, 4] for j in range(4): if j == 0: pair_norm[j] = np.linalg.norm([pair_x[j], pair_y[j] ]) * 0.001 else: pair_norm[j] = np.linalg.norm([ pair_x[j], pair_y[j] ]) - np.linalg.norm([pair_x[0], pair_y[0]]) reg_pair = sm.OLS(pair_norm, x_sample) #reg_pair = sm.OLS(pair_y, pair_x) reg_pair = reg_pair.fit() p_value = reg_pair.pvalues if pair == (5, 6): x = "test" #if pvalue is less than 0.05 we reject null in favour that is there is a difference between track i and j #so they are not a group and set track_regression_out to false if p_value < 0.05: track_regression_out[pair] = False #else set track regression out to true because we fail to reject the null and therefore conclude that #the two tracks are a group else: track_regression_out[pair] = True t1 = time.time() pts_world = np.array(pts_world) if dataset == 'oxford_town': pts_world[:, [0, 1]] = pts_world[:, [1, 0]] pass elif dataset == 'oxford_town_group': pts_world[:, [0, 1]] = pts_world[:, [1, 0]] pass elif dataset == 'mall': # pts_world[:, [0, 1]] = pts_world[:, [1, 0]] pass elif dataset == 'grand_central': # pts_world[:, [0, 1]] = pts_world[:, [1, 0]] pass statistic_data.append( (i_frame, t1 - t0, pts_world, track_regression_out)) # visualize if vis: violation_pairs = find_violation(pts_world, track_regression_out) pts_roi_world, pts_roi_cam = get_roi_pts( dataset=dataset, roi_raw=ROIs[dataset], matrix_c2w=transform_cam2world) fig = plot_frame_one_row(dataset=dataset, img_raw=img, pts_roi_cam=pts_roi_cam, pts_roi_world=pts_roi_world, pts_w=pts_world, pairs=violation_pairs) # fig = plot_frame( # dataset=dataset, # img_raw=img, # img_bev_bkgd_10x=img_bkgd_bev, # pts_roi_cam=pts_roi_cam, # pts_roi_world=pts_roi_world, # pts_w=pts_world, # pairs=violation_pairs # ) fig.savefig( os.path.join(path_result, 'frame%04d.png' % i_frame)) plt.close(fig) # update loop info print('Frame %d - Inference Time: %.2f' % (i_frame, t1 - t0)) print('=======================') i_frame += 1 if cap.isOpened(): cap.release() # save statistics # f.close() pickle.dump(statistic_data, open(os.path.join(path_result, 'statistic_data.p'), 'wb'))
from __future__ import absolute_import #?? import os from got10k.datasets import * from siamfc import TrackerSiamFC import multiprocessing multiprocessing.set_start_method('spawn',True) if __name__ == '__main__': root_dir = os.path.abspath('data/GOT-10k')#获取当前工作目录 seqs = GOT10k(root_dir, subset='train', return_meta=True) tracker = TrackerSiamFC(net_path=None) #优化器,GPU,损失函数,网络模型 tracker.train_over(seqs)
from __future__ import absolute_import import os import glob import numpy as np from siamfc import TrackerSiamFC import cv2 as cv if __name__ == '__main__': #X:\F_public\public\workspace\exchange\pedestrian\sampleclips\bsd\project_tracking\London\correct tracker = TrackerSiamFC(net_path=net_path) filePath = '/home/streamx/workspace/train4/' # '/home/streamx/workspace/tracking' for videoPath in os.listdir(filePath): if '.avi' in videoPath: tracker.track_video(filePath+videoPath)
from siamfc import TrackerSiamFC from config import config import os import glob import numpy as np if __name__ == '__main__': # setup the tracker to access the pre-trained model folder_path = 'model' results = 'results' reports = 'reports' model = np.sort(glob.glob(os.path.join(folder_path, "*.pth"))) for i in model: model_name = os.path.splitext(os.path.basename(i))[0] results_path_bbox = os.path.join(results, model_name) reports_path_graph = os.path.join(reports, model_name) tracker_test = TrackerSiamFC(net_path=i) experiments = ExperimentOTB(config.OTB_dataset_directoty, version=2015, result_dir=results_path_bbox, report_dir=reports_path_graph) # run the experiments for tracking to report the performance experiments.run(tracker_test, visualize=False) experiments.report([tracker_test.name])
# -*- coding: utf-8 -*- """ Created on Sun Jun 28 15:47:29 2020 @author: Xiang Li """ import os import glob import numpy as np from siamfc import TrackerSiamFC if __name__ == '__main__': seq_dir=os.path.expanduser('C:/Users/xw/Desktop/Siamese-based-object-tracking/data/OTB/Walking2') img_files=sorted(glob.glob(seq_dir+'/img/*.jpg')) #anno=np.loadtxt(seq_dir+'/groundtruth_rect.txt',delimiter=',') anno=np.loadtxt(seq_dir+'/groundtruth_rect.txt',delimiter='\t') net_path='C:/Users/xw/Desktop/Siamese-based-object-tracking/nets/siamfc_alexnetv1_2000_e50.pth' #net_path='C:/Users/xw/Desktop/Siamese-based-object-tracking/nets/siamfc_alexnetV2_2000_e50.pth' tracker=TrackerSiamFC(net_path=net_path) _,_,entropy,peak=tracker.track_with_entropy(img_files,anno[0],visualize='True') print('average entropy of the feature maps: ',entropy) print('peak value',peak)
from __future__ import absolute_import from got10k.experiments import * from siamfc import TrackerSiamFC from options import TestOptions if __name__ == '__main__': opt = TestOptions().parse() # setup tracker net_path = 'pretrained/siamfc/model.pth' tracker = TrackerSiamFC(name=opt.name, weight=opt.weight, device=opt.device) # setup experiments experiments = [] for i in range(len(opt.exps)): if opt.exps[i] in ['otb2013', 'OTB2013', 'OTB-2013']: experiments.append(ExperimentOTB('data/OTB', version=2013)) elif opt.exps[i] in ['otb2015', 'OTB2015', 'OTB-2015']: experiments.append(ExperimentOTB('data/OTB', version=2015)) elif opt.exps[i] in ['vot2018', 'VOT2018', 'VOT-2018']: experiments.append(ExperimentVOT('data/vot2018', version=2018)) elif opt.exps[i] in ['got10k', 'GOT10k', 'GOT-10k']: experiments.append(ExperimentGOT10k('data/GOT-10k', subset='test')) else: raise NotImplementederror
from __future__ import absolute_import import os from got10k.datasets import * from siamfc import TrackerSiamFC if __name__ == '__main__': root_dir = os.path.expanduser('~/data/GOT-10k') seqs = GOT10k(root_dir, subset='train', return_meta=True) tracker = TrackerSiamFC() tracker.train_over(seqs)
parser.add_argument("--weights_path", type=str, default="weights/yolov3.weights", help="path to weights file") parser.add_argument("--class_path", type=str, default="data/coco.names", help="path to class label file") parser.add_argument("--conf_thres", type=float, default=0.1, help="object confidence threshold") parser.add_argument("--nms_thres", type=float, default=0.4, help="iou thresshold for non-maximum suppression") parser.add_argument("--one_scale_thres", type=float, default=3, help="one scale change thresshold for filtering detected candidates") parser.add_argument("--two_scale_thres", type=float, default=1.4, help="two scale change thresshold for filtering detected candidates") parser.add_argument("--batch_size", type=int, default=1, help="size of the batches") parser.add_argument("--n_cpu", type=int, default=0, help="number of cpu threads to use during batch generation") parser.add_argument("--img_size", type=int, default=416, help="size of each image dimension") parser.add_argument("--checkpoint_model", type=str, help="path to checkpoint model") if __name__ == '__main__': opt = parser.parse_args() # setup tracker tracker = TrackerSiamFC() dataset_name='VOT2018' seq_name='gymnastics1' gt_boxes=[] if dataset_name=='VOT2018': img_path = BASE_PATH + dataset_name + '/' + seq_name gt_file=BASE_PATH+dataset_name+'/'+seq_name+'/groundtruth.txt' with open(gt_file, 'r') as f: lines = f.readlines() for line in lines: x1, y1, x2, y2, x3, y3, x4, y4 = np.array(line.strip().split(',')).astype('float') x = np.min((x1, x4)) y = np.min((y1, y2)) w = np.max((x2, x3)) - np.min((x1, x4))
from __future__ import absolute_import import os from got10k.experiments import * import sys sys.path.append('.') from siamfc import TrackerSiamFC if __name__ == '__main__': net_path = 'pretrained/siamfc_alexnet_pruning_e50.pth' tracker = TrackerSiamFC(net_path=net_path) root_dir = os.path.expanduser('~/dataset/otb100') e = ExperimentOTB(root_dir, version=2015) e.run(tracker) e.report([tracker.name])
class CellTracker: def __init__(self, siamese_model_path, unet_path, dataset_path, use_cuda, new_w, new_h): self.dataset_path = dataset_path self.unet_path = unet_path self.new_w = new_w self.new_h = new_h self.tracker = TrackerSiamFC(net_path=siamese_model_path, use_cuda=use_cuda) if unet_path is not None: print("Loading pretrained model") self.seg_net, pretrained = self.load_unet(), True else: print("Did not load pretrained model") self.seg_net, pretrained = None, False self.train_images = None self.tracks = {} self.track_count = 0 self.result = [] self.set_01, self.set_02 = None, None def load_unet(self): model = create_model(self.unet_path, self.new_w, self.new_h) return model def load_evaluation_images(self, sequence, extension=".tif"): seg_dir = "/0{}".format(sequence) result = [] print("Loading test images from {}".format( os.path.join(self.dataset_path + seg_dir, "*" + extension))) for frame_id, img_path in enumerate( glob.glob( os.path.join(self.dataset_path + seg_dir, "*" + extension))): # name = img_path.split("\\t")[-1].split(extension)[0] name = img_path.split("/t")[-1].split(extension)[0] # print("Image name: {}".format(name)) img = cv2.imread(img_path, cv2.IMREAD_ANYDEPTH) seg_img = None result.append( CellImage(img, name, self.dataset_path, sequence, seg_img)) result = sorted(result, key=lambda x: x.image_name, reverse=False) return result # predicts binary segmentation for input image using the unet def predict_seg(self, input_img, thr_markers=240, thr_cell_mask=230): w = np.shape(input_img)[0] h = np.shape(input_img)[1] img = cv2.equalizeHist(np.minimum(input_img, 255).astype( np.uint8)) / 255 img = img.reshape((1, w, h, 1)) - .5 if self.new_w > 0 or self.new_h > 0: img2 = np.zeros((1, self.new_w, self.new_h, 1), dtype=np.float32) img2[:, :w, :h, :] = img img = img2 prediction = self.seg_net.predict(img, batch_size=1) # prediction = prediction[0, :w, :h, 1] # New watershed # naive_seg = postprocess_cell_mask(prediction[0, :w, :h, 3] * 255, threshold=thr_cell_mask) # distance = ndi.distance_transform_edt(naive_seg) # local_maxi = peak_local_max(distance, labels=naive_seg, footprint=np.ones((15, 15)), indices=False) # markers = ndi.label(local_maxi)[0] # prediction = watershed(-distance, markers, mask=naive_seg) # # Watershed # m = prediction[0, :w, :h, 1] * 255 # c = prediction[0, :w, :h, 3] * 255 # o = (img + .5) * 255 # # # postprocess the result of prediction # idx, markers = postprocess_markers(m, threshold=thr_markers, erosion_size=1, circular=False, # step=30) # cell_mask = postprocess_cell_mask(c, threshold=thr_cell_mask) # # correct border # cell_mask = np.maximum(cell_mask, markers) # prediction = (watershed(-c, markers, mask=cell_mask) > 0)*1.0 # # Previous unet # img = input_img / 255 # img = torch.Tensor(list(transform.resize(input_img, (512, 512), mode='symmetric'))).unsqueeze(0).unsqueeze( # 0).permute(0, 2, 3, 1).numpy() # prediction = self.seg_net.predict(img) # # prediction = cv2.resize(prediction[0, :, :, 0], tuple(reversed(input_img.shape))) # prediction = np.array(prediction) # _, prediction = cv2.threshold(prediction, 0.6, 1, cv2.THRESH_BINARY) # prediction = prediction#.astype(np.uint16) return prediction # predict segmentation for all frames: # def segment_images(self, sequence): # for cell_img in tqdm(self.train_images[sequence]): # cell_img.binary_seg = self.predict_seg(cell_img.image) # writes the frames with cell locations to a video file def store_footage(self, sequence, fps: int = 3): # output_data = np.array([x for x in self.result]) # output_data = output_data.astype(np.uint8) # skvideo.io.vwrite("result {} 0{}.mp4".format(self.name, sequence), output_data, inputdict={'-r': str(fps)}) # Define the codec and create VideoWriter object fourcc = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter( "{}/result_0{}.avi".format(self.dataset_path, sequence), fourcc, float(fps), (self.result[0].shape[1], self.result[0].shape[0])) for frame in self.result: out.write(frame.astype(np.uint8)) out.release() # predict the new location of a cell located in frame1: def predict_cell_location(self, frame1, frame2, cell): self.tracker.init( frame1, [cell.min_col, cell.min_row, cell.width, cell.height]) [x, y, w, h] = self.tracker.update(frame2) return [int(x), int(y), int(x + w), int(y + h)] # uses random walker with markers from previous frame to predict a new segmentation # for collided cells @staticmethod def resegmentation(initial_segmentation, local_maxi): markers = measure.label(local_maxi) markers[~initial_segmentation] = -1 labels = random_walker(initial_segmentation, markers) labels[labels == -1] = 0 return labels # stores the tracks' desciptions in the right format def store_track(self, filename, sequence): store_path = os.path.join(self.dataset_path, "0{}_RES/".format(sequence), filename) keys = sorted(list(self.tracks.keys())) print("Storing track at {}".format(store_path)) with open(store_path, 'w', encoding='utf-8') as file: for k in keys: file.write(str(self.tracks[k]) + "\n") # get the backward matches and resegment when a collision is detected: def get_new_detections_dict(self, previous_frame, prev_img, current_frame, track_dict, alt=True): cur_img = np.stack((current_frame.image.astype(np.int16), ) * 3, axis=-1) available_cells = current_frame.get_cell_locations() # print("available_cells: {}".format(available_cells)) new_detections_dict = {c: [] for c in available_cells} prev_num_cells = len(new_detections_dict) cur_num_cells = 0 # return current_frame.seg_output, new_detections_dict, cur_img while alt and prev_num_cells != cur_num_cells: prev_num_cells = cur_num_cells alt = False # cell detection backward pass for dest_cell in new_detections_dict.keys(): # get the predicted location of the cell in the previous frame [tl_x, tl_y, br_x, br_y] = self.predict_cell_location(cur_img, prev_img, deepcopy(dest_cell)) # match all cell located in that area for cell_track in track_dict.keys(): c = cell_track.current_cell if tl_x < c.centroid_x < br_x and tl_y < c.centroid_y < br_y: try: new_detections_dict[dest_cell].append(cell_track) except Exception as e: print("new_detections_dict.keys(): {}".format( new_detections_dict.keys())) print("dest_cell: {}".format(dest_cell)) # print(new_detections_dict[str(dest_cell)]) print(new_detections_dict[dest_cell]) print("\n\n") print(new_detections_dict) raise e # forward checking: if len(new_detections_dict[dest_cell]) > 1: final = [] for c_t in new_detections_dict[dest_cell]: [tl_x, tl_y, br_x, br_y] = self.predict_cell_location( prev_img, cur_img, c_t.current_cell) pred_center_x, pred_center_y = int( (br_x + tl_x) / 2), int((br_y + tl_y) / 2) if tl_x < dest_cell.centroid_x < br_x and tl_y < dest_cell.centroid_y < br_y: final.append(c_t) elif dest_cell.min_col < pred_center_x < dest_cell.max_col and \ dest_cell.min_row < pred_center_y < dest_cell.max_row: final.append(c_t) new_detections_dict[dest_cell] = final # if two or more cells are matching a collision has occured and the frame has to be resegmented if len(new_detections_dict[dest_cell]) > 1: alt = True tl_x = min([ t.current_cell.min_col for t in new_detections_dict[dest_cell] ]) tl_y = min([ t.current_cell.min_row for t in new_detections_dict[dest_cell] ]) br_x = max([ t.current_cell.max_col for t in new_detections_dict[dest_cell] ]) br_y = max([ t.current_cell.max_row for t in new_detections_dict[dest_cell] ]) frame1_seg = previous_frame.seg_output[tl_y:br_y, tl_x:br_x] distance = np.zeros( (int(frame1_seg.shape[0]), int(frame1_seg.shape[1]))) for color, t in enumerate(new_detections_dict[dest_cell]): a = max(0, int(t.current_cell.centroid_y) - tl_y - 1) b = max(0, int(t.current_cell.centroid_x) - tl_x - 1) distance[a:a + 2, b:b + 2] = np.array(np.full((2, 2), color + 1)) distance = cv2.resize( distance, (int(dest_cell.width), int(dest_cell.height)), interpolation=cv2.INTER_NEAREST) current_frame.seg_output[current_frame.seg_output == dest_cell.color] = 0 new_seg = self.resegmentation(dest_cell.segmentation, distance) # plot new segmentation result: # self.multiplot([frame1_seg, dest_cell.segmentation, new_seg]) global bla_count bla_count += 1 # print(bla_count) # vis = np.concatenate((dest_cell.segmentation, new_seg), axis=1) # plt.imshow(vis) # plt.show() current_frame.seg_output[ dest_cell.min_row:dest_cell.max_row, dest_cell.min_col:dest_cell.max_col] = np.maximum( current_frame.seg_output[ dest_cell.min_row:dest_cell.max_row, dest_cell.min_col:dest_cell.max_col], new_seg) # relabel the frame and redetect the cells current_frame.seg_output = measure.label( current_frame.seg_output) available_cells = current_frame.get_cell_locations() new_detections_dict = {c: [] for c in available_cells} cur_num_cells = len(new_detections_dict) break # display frame final segmentation: # plt.imshow(current_frame.seg_output) # plt.show() return current_frame.seg_output, new_detections_dict, cur_img @staticmethod def multiplot(image_list): abc = ["A", "B", "C", "D", "E", "F", "G", "H"] # abc2 = ["t=i", "t=i+1", "t=i+1", "t=i+1"] fig, axes = plt.subplots(nrows=1, ncols=len(image_list), figsize=(10, 5)) for num, x in enumerate(image_list): plt.subplot(1, len(image_list), num + 1) plt.title(abc[num], fontsize=25) plt.axis('off') plt.imshow(x) # plt.subplots_adjust(left=0.1, right=0.1, top=0.1, bottom=0.1) # fig.tight_layout() plt.savefig("segres.png", bbox_inches='tight') @staticmethod # makes sure that all cells in a given track have the same pixel value in segmentation def propagate_labels(frame, living_tracks, track, cell): free_label = max(np.unique(frame.seg_output)) + 1 if track.cell_id != cell.color: for swap_id, swap_track in living_tracks: if swap_id != track.cell_id and swap_track.current_cell.color == track.cell_id: frame.seg_output = np.where( frame.seg_output == swap_track.current_cell.color, free_label, frame.seg_output) swap_track.current_cell.color = free_label break frame.seg_output = np.where(frame.seg_output == cell.color, track.cell_id, frame.seg_output) cell.color = track.cell_id return frame, living_tracks # runs the tracking algorithm and exports results for evaluation def run_test(self, sequence, collision_detection=True, store_footage=0, load_segs_from_file=None): set_01 = self.load_evaluation_images(sequence) self.result = [] print("Segmenting footage:") # apply initial segmentation to footage: if load_segs_from_file is None: for frame in tqdm(set_01, position=0): frame.binary_seg = self.predict_seg(frame.image) frame.binary_seg_to_output() else: imgs_list = sorted(make_list_of_imgs_only( os.listdir(load_segs_from_file), 'tif'), key=natural_keys) for frame in tqdm(set_01, position=0): img_indx = int(frame.image_name) img_path = os.path.join(load_segs_from_file, imgs_list[img_indx]) print("reading img: {} at {}".format(img_indx, img_path)) frame.binary_seg = mpimg.imread(img_path) frame.binary_seg_to_output() # load first frame previous_frame = set_01[0] prev_img = np.stack((previous_frame.image.astype(np.int16), ) * 3, axis=-1) # init tracks from detected cells in first frame self.tracks = { c_id + 1: CellTrack(c, 0, c_id + 1, 0) for c_id, c in enumerate(previous_frame.get_cell_locations()) } self.track_count = len(self.tracks) + 1 i = 0 print("Tracking:") for current_frame in tqdm(set_01[1:]): track_dict = {t: [] for t in self.tracks.values() if t.alive} # solve collisions in segmentation and locate all cells in the new frame: current_frame.seg_output, new_detections_dict, cur_img = \ self.get_new_detections_dict(previous_frame, prev_img, current_frame, track_dict, collision_detection) # image for video visualisation cur_copy = cur_img.copy() # cell detection forward pass: for cell_track in track_dict.keys(): cell = cell_track.current_cell [tl_x, tl_y, br_x, br_y] = self.predict_cell_location(prev_img, cur_img, cell) pred_center_x, pred_center_y = int((br_x + tl_x) / 2), int( (br_y + tl_y) / 2) for c in new_detections_dict.keys(): if c.min_col < pred_center_x < c.max_col and c.min_row < pred_center_y < c.max_row: track_dict[cell_track].append(c) elif tl_x < c.centroid_x < br_x and tl_y < c.centroid_y < br_y: track_dict[cell_track].append(c) # match cells from the previous frame to newly located cells: new_tracks = {} for track_id, cell_track in self.tracks.items(): matched = False # if death cell add it to the new stack if not cell_track.alive: new_tracks[track_id] = cell_track # else try to find a track continuation elif cell_track.alive: forward_match = track_dict[cell_track] # case 1->_ if not forward_match: # check if 1<-1: for dest_cell, t in new_detections_dict.items(): if cell_track in t: cell_track.add_cell(dest_cell) matched = True break # remove the cell from free new detection if matched: del new_detections_dict[cell_track.current_cell] # else the cell has no match and has died: else: cell_track.alive = False new_tracks[track_id] = cell_track # case 1->1 elif len(forward_match) == 1: dest_cell = forward_match[0] # should not occur: if dest_cell not in new_detections_dict: cell_track.alive = False # if 1<-1 or _<-1 match elif not new_detections_dict[ dest_cell] or cell_track in new_detections_dict[ dest_cell]: cell_track.add_cell(dest_cell) del new_detections_dict[dest_cell] # if 2<-1 death else: cell_track.alive = False new_tracks[track_id] = cell_track # case 1->1,2 (mitosis) else: available = [ c for c in forward_match if c in new_detections_dict ] if not available or len(available) > 1: cell_track.alive = False for dest_cell in available: del new_detections_dict[dest_cell] new_track = CellTrack(dest_cell, i + 1, self.track_count, cell_track.cell_id) new_tracks[self.track_count] = new_track self.track_count += 1 else: del new_detections_dict[available[0]] cell_track.add_cell(available[0]) new_tracks[track_id] = cell_track # create new tracks for the unmatched cells in the new frame: for dest_cell, t in new_detections_dict.items(): new_track = CellTrack(dest_cell, i + 1, self.track_count, 0) new_tracks[self.track_count] = new_track self.track_count += 1 living_tracks = [(track_id, track) for track_id, track in new_tracks.items() if track.last_frame == i + 1] # displaying cell locations on frame for visual evaluation: for track_id, track in living_tracks: c = track.current_cell current_frame, living_tracks = self.propagate_labels( current_frame, living_tracks, track, c) cv2.rectangle(cur_copy, c.tl, c.br, track.display_color, 3) x, y = int(c.centroid_x), int(c.centroid_y) cv2.putText(cur_copy, str(track_id), (x, y), cv2.FONT_HERSHEY_SIMPLEX, .4, track.display_color, 1, cv2.LINE_AA) cv2.putText(cur_copy, str(i + 1), (5, 25), cv2.FONT_HERSHEY_SIMPLEX, .5, (255, 0, 0), 1, cv2.LINE_AA) self.result.append(cur_copy) self.tracks = new_tracks previous_frame = current_frame prev_img = cur_img i += 1 print("Saving results") for frame in tqdm(set_01): frame.store() self.store_track("res_track.txt", sequence) if store_footage: print("Creating video") self.store_footage(sequence=sequence, fps=3)
if __name__ == '__main__': # setup dataset root_dir = 'data/GOT-10k' seq_dataset = got10k(root_dir, subset='train') pair_dataset = Pairwise(seq_dataset) # setup data loader # cuda = torch.cuda.is_available() loader = DataLoader(pair_dataset, batch_size=8, shuffle=True, drop_last=True, num_workers=2) # setup tracker tracker = TrackerSiamFC() # path for saving checkpoints net_dir = 'pretrained/siamfc_new' if not os.path.exists(net_dir): os.makedirs(net_dir) # training loop epoch_num = 50 for epoch in range(epoch_num): for step, batch in enumerate(loader): loss = tracker.step(batch, backward=True, update_lr=(step == 0)) if step % 20 == 0: print('Epoch [{}][{}/{}]: Loss: {:.3f}'.format( epoch + 1, step + 1, len(loader), loss)) sys.stdout.flush()
def __init__(self): super(SiamFC, self).__init__("SiamFC") # TODO: edit this path self.net_file = path_config.SIAMFC_MODEL self.tracker = TrackerSiamFC(net_path=self.net_file)