lasot_dir = lasot_dir + "/*/" folders = glob.glob(lasot_dir) # print(folders[0].split('\\')[-2]) tracker = Tracking( config='tracking/experiments/siamrpn_r50_l234_dwxcorr/config.yaml', snapshot='tracking/experiments/siamrpn_r50_l234_dwxcorr/model.pth') detector = Detection( config="./detectron2/configs/COCO-InstanceSegmentation/small.yaml", model= "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl" ) reid_module = REID(model=REID_BACKBONE) tracklet = Tracklet(TRACKLET_SIZE) running_stats = RunningStats() def reid_rescore(reid_module, frame, template_features, bboxes, scores): # rescore detection and tracking results with REID module and sort results. batch = [] for bbox in bboxes: target = frame[bbox[1]:bbox[3], bbox[0]:bbox[2], :] # print(target.shape) target = cv2.resize(target, (128, 128))
import sys import os import cv2 import torch import numpy as np from glob import glob sys.path.append('./tracking/') print(sys.path) from tracking.sot import Tracking from reid import REID from detection import Detection tracker = Tracking( config='tracking/experiments/siamrpn_r50_l234_dwxcorr/config.yaml', snapshot='tracking/experiments/siamrpn_r50_l234_dwxcorr/model.pth') detector = Detection( config="./detectron2/configs/COCO-InstanceSegmentation/small.yaml", model= "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl" ) reid_module = REID(model='resnet18')
def main(yolo): print('Using {} model'.format(yolo)) # Definition of the parameters max_cosine_distance = 0.2 nn_budget = None nms_max_overlap = 0.4 # deep_sort model_filename = 'model_data/models/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename,batch_size=1) # use to get feature metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric, max_age=100) output_frames = [] output_rectanger = [] output_areas = [] output_wh_ratio = [] is_vis = True out_dir = 'videos/output/' print('The output folder is',out_dir) if not os.path.exists(out_dir): os.mkdir(out_dir) all_frames = [] for video in args.videos: loadvideo = LoadVideo(video) video_capture, frame_rate, w, h = loadvideo.get_VideoLabels() while True: ret, frame = video_capture.read() if ret != True: video_capture.release() break all_frames.append(frame) frame_nums = len(all_frames) tracking_path = out_dir+'tracking'+'.avi' combined_path = out_dir+'allVideos'+'.avi' if is_vis: fourcc = cv2.VideoWriter_fourcc(*'MJPG') out = cv2.VideoWriter(tracking_path, fourcc, frame_rate, (w, h)) out2 = cv2.VideoWriter(combined_path, fourcc, frame_rate, (w, h)) #Combine all videos for frame in all_frames: out2.write(frame) out2.release() #Initialize tracking file filename = out_dir+'/tracking.txt' open(filename, 'w') fps = 0.0 frame_cnt = 0 t1 = time.time() track_cnt = dict() images_by_id = dict() ids_per_frame = [] for frame in all_frames: image = Image.fromarray(frame[...,::-1]) #bgr to rgb boxs = yolo.detect_image(image) # n * [topleft_x, topleft_y, w, h] features = encoder(frame,boxs) # n * 128 detections = [Detection(bbox, 1.0, feature) for bbox, feature in zip(boxs, features)] # length = n text_scale, text_thickness, line_thickness = get_FrameLabels(frame) # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.delete_overlap_box(boxes, nms_max_overlap, scores) #preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # length = len(indices) # Call the tracker tracker.predict() tracker.update(detections) tmp_ids = [] for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() area = (int(bbox[2]) - int(bbox[0])) * (int(bbox[3]) - int(bbox[1])) if bbox[0] >= 0 and bbox[1] >= 0 and bbox[3] < h and bbox[2] < w: tmp_ids.append(track.track_id) if track.track_id not in track_cnt: track_cnt[track.track_id] = [[frame_cnt, int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3]), area]] images_by_id[track.track_id] = [frame[int(bbox[1]):int(bbox[3]), int(bbox[0]):int(bbox[2])]] else: track_cnt[track.track_id].append([frame_cnt, int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3]), area]) images_by_id[track.track_id].append(frame[int(bbox[1]):int(bbox[3]), int(bbox[0]):int(bbox[2])]) cv2_addBox(track.track_id,frame,int(bbox[0]),int(bbox[1]),int(bbox[2]),int(bbox[3]),line_thickness,text_thickness,text_scale) write_results(filename,'mot',frame_cnt+1,str(track.track_id),int(bbox[0]),int(bbox[1]),int(bbox[2]),int(bbox[3]),w,h) ids_per_frame.append(set(tmp_ids)) # save a frame if is_vis: out.write(frame) t2 = time.time() frame_cnt += 1 print(frame_cnt, '/', frame_nums) if is_vis: out.release() print('Tracking finished in {} seconds'.format(int(time.time() - t1))) print('Tracked video : {}'.format(tracking_path)) print('Combined video : {}'.format(combined_path)) os.environ["CUDA_VISIBLE_DEVICES"] = "0,1,2,3" reid = REID() threshold = 320 exist_ids = set() final_fuse_id = dict() print('Total IDs = ',len(images_by_id)) feats = dict() for i in images_by_id: print('ID number {} -> Number of frames {}'.format(i, len(images_by_id[i]))) feats[i] = reid._features(images_by_id[i]) #reid._features(images_by_id[i][:min(len(images_by_id[i]),100)]) ids_per_frame2 = copy.deepcopy(ids_per_frame) for f in ids_per_frame: if f: if len(exist_ids) == 0: for i in f: final_fuse_id[i] = [i] exist_ids = exist_ids or f else: new_ids = f-exist_ids for nid in new_ids: dis = [] if len(images_by_id[nid])<10: exist_ids.add(nid) continue unpickable = [] for i in f: for key,item in final_fuse_id.items(): if i in item: unpickable += final_fuse_id[key] print('exist_ids {} unpickable {}'.format(exist_ids,unpickable)) for oid in (exist_ids-set(unpickable))&set(final_fuse_id.keys()): tmp = np.mean(reid.compute_distance(feats[nid],feats[oid])) print('nid {}, oid {}, tmp {}'.format(nid, oid, tmp)) dis.append([oid, tmp]) exist_ids.add(nid) if not dis: final_fuse_id[nid] = [nid] continue dis.sort(key=operator.itemgetter(1)) if dis[0][1] < threshold: combined_id = dis[0][0] images_by_id[combined_id] += images_by_id[nid] final_fuse_id[combined_id].append(nid) else: final_fuse_id[nid] = [nid] print('Final ids and their sub-ids:',final_fuse_id) print('MOT took {} seconds'.format(int(time.time() - t1))) t2 = time.time() # To generate MOT for each person, declare 'is_vis' to True is_vis=False if is_vis: print('Writing videos for each ID...') output_dir = 'videos/output/tracklets/' if not os.path.exists(output_dir): os.mkdir(output_dir) loadvideo = LoadVideo(combined_path) video_capture,frame_rate, w, h = loadvideo.get_VideoLabels() fourcc = cv2.VideoWriter_fourcc(*'MJPG') for idx in final_fuse_id: tracking_path = os.path.join(output_dir, str(idx)+'.avi') out = cv2.VideoWriter(tracking_path, fourcc, frame_rate, (w, h)) for i in final_fuse_id[idx]: for f in track_cnt[i]: video_capture.set(cv2.CAP_PROP_POS_FRAMES, f[0]) _, frame = video_capture.read() text_scale, text_thickness, line_thickness = get_FrameLabels(frame) cv2_addBox(idx, frame, f[1], f[2], f[3], f[4], line_thickness, text_thickness, text_scale) out.write(frame) out.release() video_capture.release() # Generate a single video with complete MOT/ReID if args.all: loadvideo = LoadVideo(combined_path) video_capture, frame_rate, w, h = loadvideo.get_VideoLabels() fourcc = cv2.VideoWriter_fourcc(*'MJPG') complete_path = out_dir+'/Complete'+'.avi' out = cv2.VideoWriter(complete_path, fourcc, frame_rate, (w, h)) for frame in range(len(all_frames)): frame2 = all_frames[frame] video_capture.set(cv2.CAP_PROP_POS_FRAMES, frame) _, frame2 = video_capture.read() for idx in final_fuse_id: for i in final_fuse_id[idx]: for f in track_cnt[i]: #print('frame {} f0 {}'.format(frame,f[0])) if frame == f[0]: text_scale, text_thickness, line_thickness = get_FrameLabels(frame2) cv2_addBox(idx, frame2, f[1], f[2], f[3], f[4], line_thickness, text_thickness, text_scale) out.write(frame2) out.release() video_capture.release() os.remove(combined_path) print('\nWriting videos took {} seconds'.format(int(time.time() - t2))) print('Final video at {}'.format(complete_path)) print('Total: {} seconds'.format(int(time.time() - t1)))
def main(args): cudnn.benchmark = True cudnn.enabled = True # get all the labeled and unlabeled data for training dataset_all = datasets.create(args.dataset, osp.join(args.data_dir, args.dataset)) l_data, u_data = get_one_shot_in_cam1( dataset_all, load_path="./examples/oneshot_{}_used_in_paper.pickle".format( dataset_all.name)) mv_num = math.ceil(len(u_data) / args.total_step) #最后一轮没有mv_num的量 # 总的训练step数的计算 # total_step = math.ceil(math.pow((100 / args.EF), (1 / args.q))) # 这里应该取上限或者 +2 多一轮进行one-shot训练的 # EUG base 采样策略 # total_step = math.ceil((2 * NN * args.step_s + args.yita + len(u_data)) / (args.yita + NN + len(l_data))) + 2 # big start 策略 #实验信息 print( "{}/{} is training with {}, the max_frames is {}, and will be saved to {}" .format(args.exp_name, args.exp_order, args.dataset, args.max_frames, args.logs_dir)) # 参数信息 print("parameters are setted as follows:") print("\ttotal_step:\t{}".format(args.total_step)) # print("\tEF:\t{}".format(args.EF)) # print("\tq:\t{}".format(args.q)) # print("\ttrain_tagper_step:\t{}".format(args.train_tagper_step)) print("\tepoch:\t{}".format(args.epoch)) print("\tstep_size:\t{}".format(args.step_size)) print("\tbatch_size:\t{}".format(args.batch_size)) print("\tmv_num:\t{}".format(mv_num)) # 指定输出文件 # 第三部分要说明关键参数的设定 sys.stdout = Logger( osp.join(args.logs_dir, args.dataset, args.exp_name, args.exp_order, 'log' + time.strftime(".%m_%d_%H-%M-%S") + '.txt')) data_file = codecs.open(osp.join(args.logs_dir, args.dataset, args.exp_name, args.exp_order, 'data.txt'), mode='a') time_file = codecs.open(osp.join(args.logs_dir, args.dataset, args.exp_name, args.exp_order, 'time.txt'), mode='a') save_path = osp.join(args.logs_dir, args.dataset, args.exp_name, args.exp_order) resume_step, ckpt_file = -1, '' if args.resume: # 重新训练的时候用 resume_step, ckpt_file = resume(args) # initial the EUG algorithm reid = REID(model_name=args.arch, batch_size=args.batch_size, mode=args.mode, num_classes=dataset_all.num_train_ids, data_dir=dataset_all.images_dir, l_data=l_data, u_data=u_data, save_path=save_path, max_frames=args.max_frames) select_data = [] # 开始的时间记录 exp_start = time.time() for step in range(args.total_step + 1): # while(not isout): print('-' * 20 + 'training step:{}/{}'.format(step + 1, args.total_step + 1) + '-' * 20) # 开始训练 train_start = time.time() train_data = l_data + select_data reid.train(train_data, step, epochs=args.epoch, step_size=args.step_size, init_lr=0.1) # 开始评估 evaluate_start = time.time() # mAP, top1, top5, top10, top20 = 0,0,0,0,0 mAP, top1, top5, top10, top20 = reid.evaluate(dataset_all.query, dataset_all.gallery) # 标签估计 estimate_start = time.time() # pred_y, pred_score, label_pre, id_num = 0,0,0,0 pred_y, pred_score, label_pre = reid.estimate_label_FSM( step) # step \in [0,total_step] estimate_end = time.time() selected_idx = reid.select_top_data( pred_score, min(mv_num * (step + 1), len(u_data))) select_data, select_pre = reid.generate_new_train_data( selected_idx, pred_y) # 输出该epoch的信息 data_file.write( "step:{} mAP:{:.2%} top1:{:.2%} top5:{:.2%} top10:{:.2%} top20:{:.2%} len(l_data):{} label_pre:{:.2%} select_pre:{:.2%}\n" .format(int(step + 1), mAP, top1, top5, top10, top20, len(select_data), label_pre, select_pre)) print( "reid step:{} mAP:{:.2%} top1:{:.2%} top5:{:.2%} top10:{:.2%} top20:{:.2%} len(l_data):{} label_pre:{:.2%} select_pre:{:.2%} \n" .format(int(step + 1), mAP, top1, top5, top10, top20, len(select_data), label_pre, select_pre)) if args.clock: train_time = evaluate_start - train_start evaluate_time = estimate_start - evaluate_start estimate_time = estimate_end - estimate_start epoch_time = train_time - estimate_time time_file.write( "step:{} train:{} evaluate:{} estimate:{} epoch:{}\n".format( int(step + 1), train_time, evaluate_time, estimate_time, epoch_time)) data_file.close() if (args.clock): exp_end = time.time() exp_time = exp_end - exp_start h, m, s = changetoHSM(exp_time) print("experiment is over, cost %02d:%02d:%02.6f" % (h, m, s)) time_file.close()