Example #1
0
lasot_dir = lasot_dir + "/*/"
folders = glob.glob(lasot_dir)
# print(folders[0].split('\\')[-2])

tracker = Tracking(
    config='tracking/experiments/siamrpn_r50_l234_dwxcorr/config.yaml',
    snapshot='tracking/experiments/siamrpn_r50_l234_dwxcorr/model.pth')

detector = Detection(
    config="./detectron2/configs/COCO-InstanceSegmentation/small.yaml",
    model=
    "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl"
)

reid_module = REID(model=REID_BACKBONE)

tracklet = Tracklet(TRACKLET_SIZE)

running_stats = RunningStats()


def reid_rescore(reid_module, frame, template_features, bboxes, scores):

    #  rescore detection and tracking results with REID module and sort results.
    batch = []
    for bbox in bboxes:

        target = frame[bbox[1]:bbox[3], bbox[0]:bbox[2], :]
        # print(target.shape)
        target = cv2.resize(target, (128, 128))
Example #2
0
import sys
import os
import cv2
import torch
import numpy as np
from glob import glob

sys.path.append('./tracking/')
print(sys.path)
from tracking.sot import Tracking
from reid import REID
from detection import Detection

tracker = Tracking(
    config='tracking/experiments/siamrpn_r50_l234_dwxcorr/config.yaml',
    snapshot='tracking/experiments/siamrpn_r50_l234_dwxcorr/model.pth')

detector = Detection(
    config="./detectron2/configs/COCO-InstanceSegmentation/small.yaml",
    model=
    "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl"
)

reid_module = REID(model='resnet18')
Example #3
0
def main(yolo):

    print('Using {} model'.format(yolo))
       
   # Definition of the parameters
    max_cosine_distance = 0.2
    nn_budget = None
    nms_max_overlap = 0.4
    
   # deep_sort 
    model_filename = 'model_data/models/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename,batch_size=1) # use to get feature
    
    metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget)
    tracker = Tracker(metric, max_age=100)

    output_frames = []
    output_rectanger = []
    output_areas = []
    output_wh_ratio = []

    is_vis = True
    out_dir = 'videos/output/'
    print('The output folder is',out_dir)
    if not os.path.exists(out_dir):
        os.mkdir(out_dir)

    all_frames = []
    for video in args.videos:
        loadvideo = LoadVideo(video)
        video_capture, frame_rate, w, h = loadvideo.get_VideoLabels()
        while True:
            ret, frame = video_capture.read() 
            if ret != True:
                video_capture.release()
                break
            all_frames.append(frame)

    frame_nums = len(all_frames)
    tracking_path = out_dir+'tracking'+'.avi'
    combined_path = out_dir+'allVideos'+'.avi'
    if is_vis:
        fourcc = cv2.VideoWriter_fourcc(*'MJPG')
        out = cv2.VideoWriter(tracking_path, fourcc, frame_rate, (w, h))
        out2 = cv2.VideoWriter(combined_path, fourcc, frame_rate, (w, h))
        #Combine all videos
        for frame in all_frames:
            out2.write(frame)
        out2.release()
        
    #Initialize tracking file
    filename = out_dir+'/tracking.txt'
    open(filename, 'w')
    
    fps = 0.0
    frame_cnt = 0
    t1 = time.time()
    
    track_cnt = dict()
    images_by_id = dict()
    ids_per_frame = []
    for frame in all_frames:
        image = Image.fromarray(frame[...,::-1]) #bgr to rgb
        boxs = yolo.detect_image(image) # n * [topleft_x, topleft_y, w, h]
        features = encoder(frame,boxs) # n * 128
        detections = [Detection(bbox, 1.0, feature) for bbox, feature in zip(boxs, features)] # length = n
        text_scale, text_thickness, line_thickness = get_FrameLabels(frame)

        
        # Run non-maxima suppression.
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        indices = preprocessing.delete_overlap_box(boxes, nms_max_overlap, scores) #preprocessing.non_max_suppression(boxes, nms_max_overlap, scores)
        detections = [detections[i] for i in indices] # length = len(indices)

        # Call the tracker 
        tracker.predict()
        tracker.update(detections)
        tmp_ids = []
        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue 
            
            bbox = track.to_tlbr()
            area = (int(bbox[2]) - int(bbox[0])) * (int(bbox[3]) - int(bbox[1]))
            if bbox[0] >= 0 and bbox[1] >= 0 and bbox[3] < h and bbox[2] < w:
                tmp_ids.append(track.track_id)
                if track.track_id not in track_cnt:
                    track_cnt[track.track_id] = [[frame_cnt, int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3]), area]]
                    images_by_id[track.track_id] = [frame[int(bbox[1]):int(bbox[3]), int(bbox[0]):int(bbox[2])]]
                else:
                    track_cnt[track.track_id].append([frame_cnt, int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3]), area])
                    images_by_id[track.track_id].append(frame[int(bbox[1]):int(bbox[3]), int(bbox[0]):int(bbox[2])])
            cv2_addBox(track.track_id,frame,int(bbox[0]),int(bbox[1]),int(bbox[2]),int(bbox[3]),line_thickness,text_thickness,text_scale)
            write_results(filename,'mot',frame_cnt+1,str(track.track_id),int(bbox[0]),int(bbox[1]),int(bbox[2]),int(bbox[3]),w,h)
        ids_per_frame.append(set(tmp_ids))

        # save a frame               
        if is_vis:
            out.write(frame)
        t2 = time.time()
        
        frame_cnt += 1
        print(frame_cnt, '/', frame_nums)

    if is_vis:
        out.release()
    print('Tracking finished in {} seconds'.format(int(time.time() - t1)))
    print('Tracked video : {}'.format(tracking_path))
    print('Combined video : {}'.format(combined_path))

    os.environ["CUDA_VISIBLE_DEVICES"] = "0,1,2,3"
    reid = REID()
    threshold = 320
    exist_ids = set()
    final_fuse_id = dict()

    print('Total IDs = ',len(images_by_id))
    feats = dict()
    for i in images_by_id:
        print('ID number {} -> Number of frames {}'.format(i, len(images_by_id[i])))
        feats[i] = reid._features(images_by_id[i]) #reid._features(images_by_id[i][:min(len(images_by_id[i]),100)])
    
    ids_per_frame2 = copy.deepcopy(ids_per_frame)
    
    for f in ids_per_frame:
        if f:
            if len(exist_ids) == 0:
                for i in f:
                    final_fuse_id[i] = [i]
                exist_ids = exist_ids or f
            else:
                new_ids = f-exist_ids
                for nid in new_ids:
                    dis = []
                    if len(images_by_id[nid])<10:
                        exist_ids.add(nid)
                        continue
                    unpickable = []
                    for i in f:
                        for key,item in final_fuse_id.items():
                            if i in item:
                                unpickable += final_fuse_id[key]
                    print('exist_ids {} unpickable {}'.format(exist_ids,unpickable))
                    for oid in (exist_ids-set(unpickable))&set(final_fuse_id.keys()):
                        tmp = np.mean(reid.compute_distance(feats[nid],feats[oid]))
                        print('nid {}, oid {}, tmp {}'.format(nid, oid, tmp))
                        dis.append([oid, tmp])
                    exist_ids.add(nid)
                    if not dis:
                        final_fuse_id[nid] = [nid]
                        continue
                    dis.sort(key=operator.itemgetter(1))
                    if dis[0][1] < threshold:
                        combined_id = dis[0][0]
                        images_by_id[combined_id] += images_by_id[nid]
                        final_fuse_id[combined_id].append(nid)
                    else:
                        final_fuse_id[nid] = [nid]
    print('Final ids and their sub-ids:',final_fuse_id)
    print('MOT took {} seconds'.format(int(time.time() - t1)))
    t2 = time.time()

    # To generate MOT for each person, declare 'is_vis' to True
    is_vis=False
    if is_vis:
        print('Writing videos for each ID...')
        output_dir = 'videos/output/tracklets/'
        if not os.path.exists(output_dir):
            os.mkdir(output_dir)
        loadvideo = LoadVideo(combined_path)
        video_capture,frame_rate, w, h = loadvideo.get_VideoLabels()
        fourcc = cv2.VideoWriter_fourcc(*'MJPG')
        for idx in final_fuse_id:
            tracking_path = os.path.join(output_dir, str(idx)+'.avi')
            out = cv2.VideoWriter(tracking_path, fourcc, frame_rate, (w, h))
            for i in final_fuse_id[idx]:
                for f in track_cnt[i]:
                    video_capture.set(cv2.CAP_PROP_POS_FRAMES, f[0])
                    _, frame = video_capture.read()
                    text_scale, text_thickness, line_thickness = get_FrameLabels(frame)
                    cv2_addBox(idx, frame, f[1], f[2], f[3], f[4], line_thickness, text_thickness, text_scale)
                    out.write(frame)
            out.release()
        video_capture.release()

    # Generate a single video with complete MOT/ReID              
    if args.all:
        loadvideo = LoadVideo(combined_path)
        video_capture, frame_rate, w, h = loadvideo.get_VideoLabels()
        fourcc = cv2.VideoWriter_fourcc(*'MJPG')
        complete_path = out_dir+'/Complete'+'.avi'
        out = cv2.VideoWriter(complete_path, fourcc, frame_rate, (w, h))
        
        for frame in range(len(all_frames)):
            frame2 = all_frames[frame]
            video_capture.set(cv2.CAP_PROP_POS_FRAMES, frame)
            _, frame2 = video_capture.read()
            for idx in final_fuse_id:
                for i in final_fuse_id[idx]:
                    for f in track_cnt[i]:
                        #print('frame {} f0 {}'.format(frame,f[0]))
                        if frame == f[0]:
                            text_scale, text_thickness, line_thickness = get_FrameLabels(frame2)
                            cv2_addBox(idx, frame2, f[1], f[2], f[3], f[4], line_thickness, text_thickness, text_scale)
            out.write(frame2)
        out.release()
        video_capture.release()

    os.remove(combined_path)
    print('\nWriting videos took {} seconds'.format(int(time.time() - t2)))
    print('Final video at {}'.format(complete_path))
    print('Total: {} seconds'.format(int(time.time() - t1)))
Example #4
0
def main(args):

    cudnn.benchmark = True
    cudnn.enabled = True

    # get all the labeled and unlabeled data for training
    dataset_all = datasets.create(args.dataset,
                                  osp.join(args.data_dir, args.dataset))
    l_data, u_data = get_one_shot_in_cam1(
        dataset_all,
        load_path="./examples/oneshot_{}_used_in_paper.pickle".format(
            dataset_all.name))
    mv_num = math.ceil(len(u_data) / args.total_step)  #最后一轮没有mv_num的量

    # 总的训练step数的计算
    # total_step = math.ceil(math.pow((100 / args.EF), (1 / args.q)))   # 这里应该取上限或者 +2  多一轮进行one-shot训练的  # EUG base 采样策略
    # total_step = math.ceil((2 * NN * args.step_s + args.yita + len(u_data)) / (args.yita + NN + len(l_data))) + 2 # big start 策略

    #实验信息
    print(
        "{}/{} is training with {}, the max_frames is {}, and will be saved to {}"
        .format(args.exp_name, args.exp_order, args.dataset, args.max_frames,
                args.logs_dir))
    # 参数信息
    print("parameters are setted as follows:")
    print("\ttotal_step:\t{}".format(args.total_step))
    # print("\tEF:\t{}".format(args.EF))
    # print("\tq:\t{}".format(args.q))
    # print("\ttrain_tagper_step:\t{}".format(args.train_tagper_step))
    print("\tepoch:\t{}".format(args.epoch))
    print("\tstep_size:\t{}".format(args.step_size))
    print("\tbatch_size:\t{}".format(args.batch_size))
    print("\tmv_num:\t{}".format(mv_num))
    # 指定输出文件
    # 第三部分要说明关键参数的设定
    sys.stdout = Logger(
        osp.join(args.logs_dir, args.dataset, args.exp_name, args.exp_order,
                 'log' + time.strftime(".%m_%d_%H-%M-%S") + '.txt'))
    data_file = codecs.open(osp.join(args.logs_dir, args.dataset,
                                     args.exp_name, args.exp_order,
                                     'data.txt'),
                            mode='a')
    time_file = codecs.open(osp.join(args.logs_dir, args.dataset,
                                     args.exp_name, args.exp_order,
                                     'time.txt'),
                            mode='a')
    save_path = osp.join(args.logs_dir, args.dataset, args.exp_name,
                         args.exp_order)

    resume_step, ckpt_file = -1, ''
    if args.resume:  # 重新训练的时候用
        resume_step, ckpt_file = resume(args)
    # initial the EUG algorithm
    reid = REID(model_name=args.arch,
                batch_size=args.batch_size,
                mode=args.mode,
                num_classes=dataset_all.num_train_ids,
                data_dir=dataset_all.images_dir,
                l_data=l_data,
                u_data=u_data,
                save_path=save_path,
                max_frames=args.max_frames)

    select_data = []
    # 开始的时间记录
    exp_start = time.time()
    for step in range(args.total_step + 1):
        # while(not isout):
        print('-' * 20 +
              'training step:{}/{}'.format(step + 1, args.total_step + 1) +
              '-' * 20)

        # 开始训练
        train_start = time.time()
        train_data = l_data + select_data
        reid.train(train_data,
                   step,
                   epochs=args.epoch,
                   step_size=args.step_size,
                   init_lr=0.1)

        # 开始评估
        evaluate_start = time.time()
        # mAP, top1, top5, top10, top20 = 0,0,0,0,0
        mAP, top1, top5, top10, top20 = reid.evaluate(dataset_all.query,
                                                      dataset_all.gallery)

        # 标签估计
        estimate_start = time.time()
        # pred_y, pred_score, label_pre, id_num = 0,0,0,0
        pred_y, pred_score, label_pre = reid.estimate_label_FSM(
            step)  # step \in [0,total_step]
        estimate_end = time.time()

        selected_idx = reid.select_top_data(
            pred_score, min(mv_num * (step + 1), len(u_data)))
        select_data, select_pre = reid.generate_new_train_data(
            selected_idx, pred_y)

        # 输出该epoch的信息
        data_file.write(
            "step:{} mAP:{:.2%} top1:{:.2%} top5:{:.2%} top10:{:.2%} top20:{:.2%} len(l_data):{} label_pre:{:.2%} select_pre:{:.2%}\n"
            .format(int(step + 1), mAP, top1, top5, top10, top20,
                    len(select_data), label_pre, select_pre))
        print(
            "reid step:{} mAP:{:.2%} top1:{:.2%} top5:{:.2%} top10:{:.2%} top20:{:.2%} len(l_data):{} label_pre:{:.2%} select_pre:{:.2%} \n"
            .format(int(step + 1), mAP, top1, top5, top10, top20,
                    len(select_data), label_pre, select_pre))
        if args.clock:
            train_time = evaluate_start - train_start
            evaluate_time = estimate_start - evaluate_start
            estimate_time = estimate_end - estimate_start
            epoch_time = train_time - estimate_time
            time_file.write(
                "step:{}  train:{} evaluate:{} estimate:{} epoch:{}\n".format(
                    int(step + 1), train_time, evaluate_time, estimate_time,
                    epoch_time))

    data_file.close()
    if (args.clock):
        exp_end = time.time()
        exp_time = exp_end - exp_start
        h, m, s = changetoHSM(exp_time)
        print("experiment is over, cost %02d:%02d:%02.6f" % (h, m, s))
        time_file.close()