Beispiel #1
0
    def predict(self, img0):
        # img0 = cv2.imread(img_path)  # BGR
        img, _, _, _ = letterbox(img0, height=640, width=640)
        # Normalize RGB
        img = img[:, :, ::-1].transpose(2, 0, 1)
        img = np.ascontiguousarray(img, dtype=np.float32)
        img /= 255.0
        im_blob = torch.from_numpy(img).unsqueeze(0).to(self.device)

        width = img0.shape[1]
        height = img0.shape[0]
        inp_height = im_blob.shape[2]
        inp_width = im_blob.shape[3]
        c = np.array([width / 2., height / 2.], dtype=np.float32)
        s = max(float(inp_width) / float(inp_height) * height, width) * 1.0
        meta = {
            'c': c,
            's': s,
            'out_height': inp_height // down_ratio,
            'out_width': inp_width // down_ratio
        }
        ''' Step 1: Network forward, get detections & embeddings'''
        with torch.no_grad():
            output = self.model(im_blob)[-1]
            hm = output['hm'].sigmoid_()
            wh = output['wh']
            id_feature = output['id']
            id_feature = F.normalize(id_feature, dim=1)

            reg = output['reg']
            dets, inds = mot_decode(hm, wh, reg=reg, ltrb=True, K=500)
            id_feature = _tranpose_and_gather_feat(id_feature, inds)
            id_feature = id_feature.squeeze(0)
            id_feature = id_feature.cpu().numpy()

        dets = post_process(dets, meta)
        dets = merge_outputs([dets])[1]
        remain_inds = dets[:, 4] > self.conf_thres
        dets = dets[remain_inds]
        id_feature = id_feature[remain_inds]
        res = []
        for i in range(0, dets.shape[0]):
            bbox = dets[i][0:4]
            bbox = [
                int(min(bbox[0], bbox[2])),
                int(min(bbox[1], bbox[3])),
                int(max(bbox[0], bbox[2])),
                int(max(bbox[1], bbox[3])),
            ]
            res.append({
                "reid": id_feature[i],
                "bbox": bbox,
            })
        return res
Beispiel #2
0
 def save_result(self, output, batch, results):
     reg = output['reg'] if self.opt.reg_offset else None
     dets = mot_decode(
         output['hm'], output['wh'], reg=reg,
         cat_spec_wh=self.opt.cat_spec_wh, K=self.opt.K)
     dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2])
     dets_out = ctdet_post_process(
         dets.copy(), batch['meta']['c'].cpu().numpy(),
         batch['meta']['s'].cpu().numpy(),
         output['hm'].shape[2], output['hm'].shape[3], output['hm'].shape[1])
     results[batch['meta']['img_id'].cpu().numpy()[0]] = dets_out[0]
Beispiel #3
0
 def save_result(self, output, batch, results):
     reg = output["reg"] if self.opt.reg_offset else None
     dets = mot_decode(
         output["hm"],
         output["wh"],
         reg=reg,
         cat_spec_wh=self.opt.cat_spec_wh,
         K=self.opt.K,
     )
     dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2])
     dets_out = ctdet_post_process(
         dets.copy(),
         batch["meta"]["c"].cpu().numpy(),
         batch["meta"]["s"].cpu().numpy(),
         output["hm"].shape[2],
         output["hm"].shape[3],
         output["hm"].shape[1],
     )
     results[batch["meta"]["img_id"].cpu().numpy()[0]] = dets_out[0]
Beispiel #4
0
    def detect(self, im_blob, img0):
        self.frame_id += 1

        width = img0.shape[1]
        height = img0.shape[0]
        inp_height = im_blob.shape[2]
        inp_width = im_blob.shape[3]
        c = np.array([width / 2., height / 2.], dtype=np.float32)
        s = max(float(inp_width) / float(inp_height) * height, width) * 1.0
        meta = {
            'c': c,
            's': s,
            'out_height': inp_height // self.opt.down_ratio,
            'out_width': inp_width // self.opt.down_ratio
        }
        ''' Step 1: Network forward, get detections & embeddings'''
        with torch.no_grad():
            output = self.model(im_blob)[-1]
            hm = output['hm'].sigmoid_()
            wh = output['wh']

            reg = output['reg'] if self.opt.reg_offset else None
            dets, inds = mot_decode(hm,
                                    wh,
                                    reg=reg,
                                    cat_spec_wh=self.opt.cat_spec_wh,
                                    K=self.opt.K)
        dets = self.post_process(dets, meta)
        # print(dets[1].shape, dets[2].shape, dets[3].shape, dets[4].shape, )
        dets = self.merge_outputs([dets])  #[1]
        ## merge all classes to on big array
        # for i in range(1, self.opt.num_classes+1):
        dets = np.concatenate([
            np.hstack((dets[i], i * np.ones((dets[i].shape[0], 1))))
            for i in range(1, self.opt.num_classes + 1)
        ],
                              axis=0)
        # dets = dets_merged

        remain_inds = dets[:, 4] > self.opt.conf_thres
        dets = dets[remain_inds]
        return dets
Beispiel #5
0
    def detect(self, im_blob, img0):
        width = img0.shape[1]
        height = img0.shape[0]
        inp_height = im_blob.shape[2]
        inp_width = im_blob.shape[3]
        c = np.array([width / 2., height / 2.], dtype=np.float32)
        s = max(float(inp_width) / float(inp_height) * height, width) * 1.0
        meta = {'c': c, 's': s,
                'out_height': inp_height // self.opt.down_ratio,
                'out_width': inp_width // self.opt.down_ratio}

        ''' Step 1: Network forward, get detections & embeddings'''
        with torch.no_grad():
            output = self.model(im_blob)[-1]
            hm = output['hm'].sigmoid_()
            wh = output['wh']
            reg = output['reg'] if self.opt.reg_offset else None
            dets, inds = mot_decode(hm, wh, reg=reg, ltrb=self.opt.ltrb, K=self.opt.K)
        dets = self.post_process(dets, meta)
        dets = self.merge_outputs([dets])[1]
        remain_inds = dets[:, 4] > self.opt.conf_thres
        #format tlbr
        dets = dets[remain_inds]
        return dets
Beispiel #6
0
    def update(self, im_blob, img0):
        self.frame_id += 1
        activated_starcks = []
        refind_stracks = []
        lost_stracks = []
        removed_stracks = []

        width = img0.shape[1]
        height = img0.shape[0]
        inp_height = im_blob.shape[2]
        inp_width = im_blob.shape[3]
        c = np.array([width / 2., height / 2.], dtype=np.float32)
        s = max(float(inp_width) / float(inp_height) * height, width) * 1.0
        meta = {'c': c, 's': s,
                'out_height': inp_height // self.opt.down_ratio,
                'out_width': inp_width // self.opt.down_ratio}

        ''' Step 1: Network forward, get detections & embeddings'''
        with torch.no_grad():
            output = self.model(im_blob)[-1]
            hm = output['hm'].sigmoid_()
            wh = output['wh']
            id_feature = output['id']
            id_feature = F.normalize(id_feature, dim=1)

            reg = output['reg'] if self.opt.reg_offset else None
            dets, inds = mot_decode(hm, wh, reg=reg, ltrb=self.opt.ltrb, K=self.opt.K)
            id_feature = _tranpose_and_gather_feat(id_feature, inds)
            id_feature = id_feature.squeeze(0)
            id_feature = id_feature.cpu().numpy()

        dets = self.post_process(dets, meta)
        dets = self.merge_outputs([dets])[1]

        remain_inds = dets[:, 4] > self.opt.conf_thres
        dets = dets[remain_inds]
        id_feature = id_feature[remain_inds]

        # vis
        '''
        for i in range(0, dets.shape[0]):
            bbox = dets[i][0:4]
            cv2.rectangle(img0, (bbox[0], bbox[1]),
                          (bbox[2], bbox[3]),
                          (0, 255, 0), 2)
        cv2.imshow('dets', img0)
        cv2.waitKey(0)
        id0 = id0-1
        '''

        if len(dets) > 0:
            '''Detections'''
            detections = [STrack(STrack.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4], f, 30) for
                          (tlbrs, f) in zip(dets[:, :5], id_feature)]
        else:
            detections = []

        ''' Add newly detected tracklets to tracked_stracks'''
        unconfirmed = []
        tracked_stracks = []  # type: list[STrack]
        for track in self.tracked_stracks:
            if not track.is_activated:
                unconfirmed.append(track)
            else:
                tracked_stracks.append(track)

        ''' Step 2: First association, with embedding'''
        ##Join track ids into one
        strack_pool = joint_stracks(tracked_stracks, self.lost_stracks)
        # Predict the current location with KF
        #for strack in strack_pool:
            #strack.predict()

        ##Calculate joint average mean, dev for kalman tracker
        STrack.multi_predict(strack_pool)

        #Gets cost matrix between tracks and dets
        dists = matching.embedding_distance(strack_pool, detections)
        #dists = matching.iou_distance(strack_pool, detections)

        #If tracks with their assignment are too far away from the kalman filter prediction then assign infinite cose
        #Update cost matrix with kalman filter
        dists = matching.fuse_motion(self.kalman_filter, dists, strack_pool, detections)
        #Find optimum assignment using cost matrix
        #u_track and u_detecion are the unmatched tracks and detections respectively
        matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.4)
        #Update currently tracked tracks with matches found
        for itracked, idet in matches:
            track = strack_pool[itracked]
            det = detections[idet]
            if track.state == TrackState.Tracked:
                track.update(detections[idet], self.frame_id)
                activated_starcks.append(track)
            else:
                track.re_activate(det, self.frame_id, new_id=False)
                refind_stracks.append(track)

        ''' Step 3: Second association, with IOU'''
        detections = [detections[i] for i in u_detection]
        #Get tracked tracks which were not matched before which were tracked
        r_tracked_stracks = [strack_pool[i] for i in u_track if strack_pool[i].state == TrackState.Tracked]
        #Get cost matrix
        dists = matching.iou_distance(r_tracked_stracks, detections)
        matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.6) #Default 0.5
        for itracked, idet in matches:
            track = r_tracked_stracks[itracked]
            det = detections[idet]
            if track.state == TrackState.Tracked:
                track.update(det, self.frame_id)
                activated_starcks.append(track)
            else:
                track.re_activate(det, self.frame_id, new_id=False)
                refind_stracks.append(track)
        #For all of the unmatched tracks, mark them as lost
        for it in u_track:
            track = r_tracked_stracks[it]
            if not track.state == TrackState.Lost:
                track.mark_lost()
                lost_stracks.append(track)

        '''Deal with unconfirmed tracks, usually tracks with only one beginning frame'''
        #For the unconfirmed tracks, tracks with only one beginning frame, use the remaining detection to try to pair them
        detections = [detections[i] for i in u_detection]
        dists = matching.iou_distance(unconfirmed, detections)
        matches, u_unconfirmed, u_detection = matching.linear_assignment(dists, thresh=0.7)
        #Add the matched ones
        for itracked, idet in matches:
            unconfirmed[itracked].update(detections[idet], self.frame_id)
            activated_starcks.append(unconfirmed[itracked])
        #For the ones that couldn't be matched, remove them
        for it in u_unconfirmed:
            track = unconfirmed[it]
            track.mark_removed()
            removed_stracks.append(track)

        """ Step 4: Init new stracks"""
        for inew in u_detection:
            track = detections[inew]
            if track.score < self.det_thresh:
                continue
            track.activate(self.kalman_filter, self.frame_id)
            activated_starcks.append(track)
        """ Step 5: Update state"""
        for track in self.lost_stracks:
            # If dissappeared for max_time_lost then remove
            if self.frame_id - track.end_frame > self.max_time_lost:
                track.mark_removed()
                removed_stracks.append(track)

        # print('Ramained match {} s'.format(t4-t3))

        self.tracked_stracks = [t for t in self.tracked_stracks if t.state == TrackState.Tracked]
        self.tracked_stracks = joint_stracks(self.tracked_stracks, activated_starcks)
        self.tracked_stracks = joint_stracks(self.tracked_stracks, refind_stracks)
        self.lost_stracks = sub_stracks(self.lost_stracks, self.tracked_stracks)
        self.lost_stracks.extend(lost_stracks)
        self.lost_stracks = sub_stracks(self.lost_stracks, self.removed_stracks)
        self.removed_stracks.extend(removed_stracks)
        self.tracked_stracks, self.lost_stracks = remove_duplicate_stracks(self.tracked_stracks, self.lost_stracks)
        # get scores of lost tracks
        output_stracks = [track for track in self.tracked_stracks if track.is_activated]

        logger.debug('===========Frame {}=========='.format(self.frame_id))
        logger.debug('Activated: {}'.format([track.track_id for track in activated_starcks]))
        logger.debug('Refind: {}'.format([track.track_id for track in refind_stracks]))
        logger.debug('Lost: {}'.format([track.track_id for track in lost_stracks]))
        logger.debug('Removed: {}'.format([track.track_id for track in removed_stracks]))

        return output_stracks
Beispiel #7
0
def eval_seq(opt,
             dataloader,
             data_type,
             result_filename,
             save_dir=None,
             show_image=True,
             frame_rate=30):
    if save_dir:
        mkdir_if_missing(save_dir)
    if opt.gpus[0] >= 0:
        opt.device = 'gpu'
    else:
        opt.device = 'cpu'
    paddle.set_device(opt.device)
    print('Creating model...')
    model = create_model(opt.arch, opt.heads, opt.head_conv)
    model = load_model(model, opt.load_model)
    # model = torch.nn.DataParallel(model)
    # model = model.to(opt.device)
    model.eval()
    timer = Timer()
    results = []
    frame_id = 0
    for path, img, img0 in dataloader:
        if frame_id % 20 == 0:
            logger.info('Processing frame {} ({:.2f} fps)'.format(
                frame_id, 1. / max(1e-5, timer.average_time)))
        # run detecting
        timer.tic()
        # blob = torch.from_numpy(img).cuda().unsqueeze(0)
        blob = paddle.to_tensor(img).unsqueeze(0)
        width = img0.shape[1]
        height = img0.shape[0]
        inp_height = blob.shape[2]
        inp_width = blob.shape[3]
        c = np.array([width / 2., height / 2.], dtype=np.float32)
        s = max(float(inp_width) / float(inp_height) * height, width) * 1.0
        meta = {
            'c': c,
            's': s,
            'out_height': inp_height // opt.down_ratio,
            'out_width': inp_width // opt.down_ratio
        }
        # with torch.no_grad():
        with paddle.clear_grad():
            output = model(blob)[-1]
            hm = output['hm'].sigmoid_()
            wh = output['wh']
            reg = output['reg'] if opt.reg_offset else None
            dets, inds = mot_decode(hm, wh, reg=reg, ltrb=opt.ltrb, K=opt.K)

        dets = post_process(opt, dets, meta)
        dets = merge_outputs(opt, [dets])[1]

        dets = dets[dets[:, 4] > 0.1]
        dets[:, :4] = tlbr2tlwh(dets[:, :4])

        tlwhs = []
        scores = []
        for *tlwh, conf in dets:
            tlwhs.append(tlwh)
            scores.append(conf)
        timer.toc()
        # save results
        results.append((frame_id + 1, tlwhs, scores))
        frame_id += 1
    # save results
    write_results_score(result_filename, results)
    #write_results_score_hie(result_filename, results, data_type)
    return frame_id, timer.average_time, timer.calls
Beispiel #8
0
    width = origin_shape[1]
    height = origin_shape[0]
    inp_height = 608
    inp_width = 1088
    c = np.array([width / 2., height / 2.], dtype=np.float32)
    s = max(float(inp_width) / float(inp_height) * height, width) * 1.0
    meta = {'c': c, 's': s,
            'out_height': inp_height // opt.down_ratio,
            'out_width': inp_width // opt.down_ratio}
    hm = output['hm']
    hm_sig = output['hm'].sigmoid_()
    hm_sig_soft_nms = soft_nms(hm_sig.detach().cpu(), 3, 3, 21, 21, thresh=THRESH)
    wh = output['wh']
    reg = output['reg'] if opt.reg_offset else None
    opt.K = 200
    detections, inds = mot_decode(hm_sig, wh, reg=reg, cat_spec_wh=opt.cat_spec_wh, thresh=THRESH, K=opt.K)
    dets = post_process(opt, detections, meta)
    dets = merge_outputs(opt, [dets])[1]
    remain_inds = dets[:, 4] > THRESH
    dets = dets[remain_inds]
    remain_inds = (dets[:, 2] - dets[:, 0]) * (dets[:, 3] - dets[:, 1]) > opt.min_box_area
    dets = dets[remain_inds]
    rects = []
    cmap = get_cmap(dets.shape[0])
    for i, det in enumerate(dets):
        col = cmap(i)
        rect = plt.Rectangle((det[0], det[1]), det[2]-det[0], det[3]-det[1], fill=False, edgecolor=col, linewidth=1)
        ax12.add_patch(rect)

    # hm_nms = (hm_nms - hm_nms.mean())/hm_nms.std()
    im11 = ax11.imshow(hm.detach().cpu().squeeze())
Beispiel #9
0
def test_det(
        opt,
        batch_size=12,
        img_size=(1088, 608),
        iou_thres=0.5,
        print_interval=40,
):
    data_cfg = opt.data_cfg
    f = open(data_cfg)
    data_cfg_dict = json.load(f)
    f.close()
    nC = 1
    test_path = data_cfg_dict['test']
    dataset_root = data_cfg_dict['root']
    if opt.gpus[0] >= 0:
        opt.device = torch.device('cuda')
    else:
        opt.device = torch.device('cpu')
    print('Creating model...')
    model = create_model(opt.arch, opt.heads, opt.head_conv)
    model = load_model(model, opt.load_model)
    #model = torch.nn.DataParallel(model)
    model = model.to(opt.device)
    model.eval()

    # Get dataloader
    transforms = T.Compose([T.ToTensor()])
    dataset = DetDataset(dataset_root,
                         test_path,
                         img_size,
                         augment=False,
                         transforms=transforms)
    dataloader = torch.utils.data.DataLoader(dataset,
                                             batch_size=batch_size,
                                             shuffle=False,
                                             num_workers=8,
                                             drop_last=False,
                                             collate_fn=collate_fn)
    mean_mAP, mean_R, mean_P, seen = 0.0, 0.0, 0.0, 0
    print('%11s' * 5 % ('Image', 'Total', 'P', 'R', 'mAP'))
    outputs, mAPs, mR, mP, TP, confidence, pred_class, target_class, jdict = \
        [], [], [], [], [], [], [], [], []
    AP_accum, AP_accum_count = np.zeros(nC), np.zeros(nC)
    for batch_i, (imgs, targets, paths, shapes,
                  targets_len) in enumerate(dataloader):
        t = time.time()
        #seen += batch_size

        output = model(imgs.cuda())[-1]
        origin_shape = shapes[0]
        width = origin_shape[1]
        height = origin_shape[0]
        inp_height = img_size[1]
        inp_width = img_size[0]
        c = np.array([width / 2., height / 2.], dtype=np.float32)
        s = max(float(inp_width) / float(inp_height) * height, width) * 1.0
        meta = {
            'c': c,
            's': s,
            'out_height': inp_height // opt.down_ratio,
            'out_width': inp_width // opt.down_ratio
        }
        hm = output['hm'].sigmoid_()
        wh = output['wh']
        reg = output['reg'] if opt.reg_offset else None
        opt.K = 200
        detections, inds = mot_decode(hm,
                                      wh,
                                      reg=reg,
                                      cat_spec_wh=opt.cat_spec_wh,
                                      K=opt.K)
        # Compute average precision for each sample
        targets = [targets[i][:int(l)] for i, l in enumerate(targets_len)]
        for si, labels in enumerate(targets):
            seen += 1
            #path = paths[si]
            #img0 = cv2.imread(path)
            dets = detections[si]
            dets = dets.unsqueeze(0)
            dets = post_process(opt, dets, meta)
            dets = merge_outputs(opt, [dets])[1]

            #remain_inds = dets[:, 4] > opt.det_thres
            #dets = dets[remain_inds]
            if dets is None:
                # If there are labels but no detections mark as zero AP
                if labels.size(0) != 0:
                    mAPs.append(0), mR.append(0), mP.append(0)
                continue

            # If no labels add number of detections as incorrect
            correct = []
            if labels.size(0) == 0:
                # correct.extend([0 for _ in range(len(detections))])
                mAPs.append(0), mR.append(0), mP.append(0)
                continue
            else:
                target_cls = labels[:, 0]

                # Extract target boxes as (x1, y1, x2, y2)
                target_boxes = xywh2xyxy(labels[:, 2:6])
                target_boxes[:, 0] *= width
                target_boxes[:, 2] *= width
                target_boxes[:, 1] *= height
                target_boxes[:, 3] *= height
                '''
                path = paths[si]
                img0 = cv2.imread(path)
                img1 = cv2.imread(path)
                for t in range(len(target_boxes)):
                    x1 = target_boxes[t, 0]
                    y1 = target_boxes[t, 1]
                    x2 = target_boxes[t, 2]
                    y2 = target_boxes[t, 3]
                    cv2.rectangle(img0, (x1, y1), (x2, y2), (0, 255, 0), 4)
                cv2.imwrite('gt.jpg', img0)
                for t in range(len(dets)):
                    x1 = dets[t, 0]
                    y1 = dets[t, 1]
                    x2 = dets[t, 2]
                    y2 = dets[t, 3]
                    cv2.rectangle(img1, (x1, y1), (x2, y2), (0, 255, 0), 4)
                cv2.imwrite('pred.jpg', img1)
                abc = ace
                '''

                detected = []
                for *pred_bbox, conf in dets:
                    obj_pred = 0
                    pred_bbox = torch.FloatTensor(pred_bbox).view(1, -1)
                    # Compute iou with target boxes
                    iou = bbox_iou(pred_bbox, target_boxes, x1y1x2y2=True)[0]
                    # Extract index of largest overlap
                    best_i = np.argmax(iou)
                    # If overlap exceeds threshold and classification is correct mark as correct
                    if iou[best_i] > iou_thres and obj_pred == labels[
                            best_i, 0] and best_i not in detected:
                        correct.append(1)
                        detected.append(best_i)
                    else:
                        correct.append(0)

            # Compute Average Precision (AP) per class
            AP, AP_class, R, P = ap_per_class(
                tp=correct,
                conf=dets[:, 4],
                pred_cls=np.zeros_like(dets[:, 4]),  # detections[:, 6]
                target_cls=target_cls)

            # Accumulate AP per class
            AP_accum_count += np.bincount(AP_class, minlength=nC)
            AP_accum += np.bincount(AP_class, minlength=nC, weights=AP)

            # Compute mean AP across all classes in this image, and append to image list
            mAPs.append(AP.mean())
            mR.append(R.mean())
            mP.append(P.mean())

            # Means of all images
            mean_mAP = np.sum(mAPs) / (AP_accum_count + 1E-16)
            mean_R = np.sum(mR) / (AP_accum_count + 1E-16)
            mean_P = np.sum(mP) / (AP_accum_count + 1E-16)

        if batch_i % print_interval == 0:
            # Print image mAP and running mean mAP
            print(('%11s%11s' + '%11.3g' * 4 + 's') %
                  (seen, dataloader.dataset.nF, mean_P, mean_R, mean_mAP,
                   time.time() - t))
    # Print mAP per class
    print('%11s' * 5 % ('Image', 'Total', 'P', 'R', 'mAP'))

    print('AP: %-.4f\n\n' % (AP_accum[0] / (AP_accum_count[0] + 1E-16)))

    # Return mAP
    return mean_mAP, mean_R, mean_P
Beispiel #10
0
    def update(self, im_blob, img0):        # 处理当前帧中的检测框
        self.frame_id += 1
        activated_starcks = []
        refind_stracks = []                 # 从上一帧到当前帧,新发现的track
        lost_stracks = []                   # 从上一帧到当前帧,丢失的stack
        removed_stracks = []                # 从上一帧到当前帧,需要被移除的stack

        width = img0.shape[1]
        height = img0.shape[0]
        inp_height = im_blob.shape[2]
        inp_width = im_blob.shape[3]

        c = np.array([width / 2., height / 2.], dtype=np.float32)
        s = max(float(inp_width) / float(inp_height) * height, width) * 1.0
        meta = {'c': c, 's': s,
                'out_height': inp_height // self.opt.down_ratio,
                'out_width': inp_width // self.opt.down_ratio}

        ''' Step 1: Network forward, get detections & embeddings'''
        with torch.no_grad():
            output = self.model(im_blob)[-1]
            hm = output['hm'].sigmoid_()
            wh = output['wh']
            id_feature = output['id']
            id_feature = F.normalize(id_feature, dim=1)                 # torch.Size([1, 512, 152, 272])
            reg = output['reg'] if self.opt.reg_offset else None

            dets, inds = mot_decode(hm, wh, reg=reg, cat_spec_wh=self.opt.cat_spec_wh, K=self.opt.K)    # 预测框左上角、右下角的坐标表示、得分、分类,inds是图像在一维情况下的索引
            # inds 是在图像转换成一维情况下,置信度得分最大的128个值,表示最大输出目标的数量
            id_feature = _tranpose_and_gather_feat(id_feature, inds)        # id_feature torch.Size([1, 512, 152, 272]), inds torch.Size([1, 128])
            id_feature = id_feature.squeeze(0)                              # torch.Size([1, 128, 512])
            id_feature = id_feature.cpu().numpy()

        dets = self.post_process(dets, meta)                                # 是将在feature上的预测结果,映射到原始图像中,给出在原始图像中128个检测框的坐标、及相应置信度
        dets = self.merge_outputs([dets])[1]                    # (128, 5)

        remain_inds = dets[:, 4] > self.opt.conf_thres                      # 仅保留置信度得分 大于 设置阈值的检测框
        dets = dets[remain_inds]                    # (2, 5),只剩下两个检测框作为最终的结果
        id_feature = id_feature[remain_inds]        # (2, 512),对应的feature

        # vis
        '''
        for i in range(0, dets.shape[0]):
            bbox = dets[i][0:4]
            cv2.rectangle(img0, (bbox[0], bbox[1]),
                          (bbox[2], bbox[3]),
                          (0, 255, 0), 2)
        cv2.imshow('dets', img0)
        cv2.waitKey(0)
        id0 = id0-1
        '''

        if len(dets) > 0:
            '''Detections'''
            detections = [STrack(STrack.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4], f, 30) for               # 直接调用类的方法进行计算,有什么特别的么?
                          (tlbrs, f) in zip(dets[:, :5], id_feature)]                               # 创建strack,这里相当于tracklets
        else:
            detections = []

        ''' Add newly detected tracklets to tracked_stracks'''
        unconfirmed = []
        tracked_stracks = []  # type: list[STrack]
        for track in self.tracked_stracks:                      # 将当前帧之前存在的track,划分为unconfirmed、track_stracks两种类型
            if not track.is_activated:
                unconfirmed.append(track)
            else:
                tracked_stracks.append(track)

        ''' Step 2: First association, with embedding'''
        strack_pool = joint_stracks(tracked_stracks, self.lost_stracks)     # 取并集
        # Predict the current location with KF
        #for strack in strack_pool:
            #strack.predict()
        STrack.multi_predict(strack_pool)                               # 使用卡尔曼滤波预测下一帧中目标的状态,调用每一个track的predict方法进行预测

        dists = matching.embedding_distance(strack_pool, detections)            # 使用embedding进行匹配,返回匹配矩阵,将detection与当前存在的track的smooth feat计算距离
        #dists = matching.gate_cost_matrix(self.kalman_filter, dists, strack_pool, detections)
        dists = matching.fuse_motion(self.kalman_filter, dists, strack_pool, detections)    # 对每一个track,计算其与当前帧中每一个detection的门距离
        matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.7)       # 根据门距离,使用匈牙利算法最大匹配,确定三种匹配结果

        for itracked, idet in matches:
            track = strack_pool[itracked]
            det = detections[idet]
            if track.state == TrackState.Tracked:               # 上一帧是被追踪状态
                track.update(detections[idet], self.frame_id)   # track状态更新,其中 KF 的均值向量、协方差矩阵进行更新
                activated_starcks.append(track)
            else:                                               # 上一帧是new状态
                track.re_activate(det, self.frame_id, new_id=False)
                refind_stracks.append(track)

        ''' Step 3: Second association, with IOU'''             # 第二次,尝试将未匹配到的detection和未匹配到的track匹配起来
        detections = [detections[i] for i in u_detection]
        r_tracked_stracks = [strack_pool[i] for i in u_track if strack_pool[i].state == TrackState.Tracked]
        dists = matching.iou_distance(r_tracked_stracks, detections)
        matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.5)

        for itracked, idet in matches:
            track = r_tracked_stracks[itracked]
            det = detections[idet]
            if track.state == TrackState.Tracked:
                track.update(det, self.frame_id)
                activated_starcks.append(track)
            else:
                track.re_activate(det, self.frame_id, new_id=False)
                refind_stracks.append(track)

        for it in u_track:
            track = r_tracked_stracks[it]
            if not track.state == TrackState.Lost:      # 判断track是否
                track.mark_lost()
                lost_stracks.append(track)

        '''第三次匹配, Deal with unconfirmed tracks, usually tracks with only one beginning frame 仅追踪到一帧的track为unconfirmed track'''
        detections = [detections[i] for i in u_detection]
        dists = matching.iou_distance(unconfirmed, detections)
        matches, u_unconfirmed, u_detection = matching.linear_assignment(dists, thresh=0.7)

        for itracked, idet in matches:
            unconfirmed[itracked].update(detections[idet], self.frame_id)
            activated_starcks.append(unconfirmed[itracked])

        for it in u_unconfirmed:
            track = unconfirmed[it]
            track.mark_removed()
            removed_stracks.append(track)

        """ Step 4: Init new stracks"""
        for inew in u_detection:
            track = detections[inew]
            if track.score < self.det_thresh:                           # 与tracking的置信度阈值相比较
                continue
            track.activate(self.kalman_filter, self.frame_id)
            activated_starcks.append(track)

        """ Step 5: Update state"""
        for track in self.lost_stracks:
            if self.frame_id - track.end_frame > self.max_time_lost:
                track.mark_removed()                                    # 移除达到条件的track
                removed_stracks.append(track)

        # print('Ramained match {} s'.format(t4-t3))

        self.tracked_stracks = [t for t in self.tracked_stracks if t.state == TrackState.Tracked]
        self.tracked_stracks = joint_stracks(self.tracked_stracks, activated_starcks)
        self.tracked_stracks = joint_stracks(self.tracked_stracks, refind_stracks)
        self.lost_stracks = sub_stracks(self.lost_stracks, self.tracked_stracks)
        self.lost_stracks.extend(lost_stracks)
        self.lost_stracks = sub_stracks(self.lost_stracks, self.removed_stracks)
        self.removed_stracks.extend(removed_stracks)
        self.tracked_stracks, self.lost_stracks = remove_duplicate_stracks(self.tracked_stracks, self.lost_stracks)
        # get scores of lost tracks
        output_stracks = [track for track in self.tracked_stracks if track.is_activated]

        logger.debug('===========Frame {}=========='.format(self.frame_id))
        logger.debug('Activated: {}'.format([track.track_id for track in activated_starcks]))
        logger.debug('Refind: {}'.format([track.track_id for track in refind_stracks]))
        logger.debug('Lost: {}'.format([track.track_id for track in lost_stracks]))
        logger.debug('Removed: {}'.format([track.track_id for track in removed_stracks]))

        return output_stracks
Beispiel #11
0
    def update(self, im_blob, img0):
        self.frame_id += 1
        activated_starcks = []
        refind_stracks = []
        lost_stracks = []
        removed_stracks = []

        width = img0.shape[1]
        height = img0.shape[0]
        inp_height = im_blob.shape[2]
        inp_width = im_blob.shape[3]
        c = np.array([width / 2.0, height / 2.0], dtype=np.float32)
        s = max(float(inp_width) / float(inp_height) * height, width) * 1.0
        meta = {
            "c": c,
            "s": s,
            "out_height": inp_height // self.opt.down_ratio,
            "out_width": inp_width // self.opt.down_ratio,
        }
        """ Step 1: Network forward, get detections & embeddings"""
        with torch.no_grad():
            output = self.model(im_blob)[-1]
            hm = output["hm"].sigmoid_()
            wh = output["wh"]
            id_feature = output["id"]
            id_feature = F.normalize(id_feature, dim=1)

            reg = output["reg"] if self.opt.reg_offset else None
            dets, inds = mot_decode(hm,
                                    wh,
                                    reg=reg,
                                    cat_spec_wh=self.opt.cat_spec_wh,
                                    K=self.opt.K)
            id_feature = _tranpose_and_gather_feat(id_feature, inds)
            id_feature = id_feature.squeeze(0)
            id_feature = id_feature.cpu().numpy()

        dets = self.post_process(dets, meta)
        dets = self.merge_outputs([dets])[1]

        remain_inds = dets[:, 4] > self.opt.conf_thres
        dets = dets[remain_inds]
        id_feature = id_feature[remain_inds]

        # vis
        """
        for i in range(0, dets.shape[0]):
            bbox = dets[i][0:4]
            cv2.rectangle(img0, (bbox[0], bbox[1]),
                          (bbox[2], bbox[3]),
                          (0, 255, 0), 2)
        cv2.imshow('dets', img0)
        cv2.waitKey(0)
        id0 = id0-1
        """

        if len(dets) > 0:
            """Detections"""
            detections = [
                STrack(STrack.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4], f, 30)
                for (tlbrs, f) in zip(dets[:, :5], id_feature)
            ]
        else:
            detections = []
        """ Add newly detected tracklets to tracked_stracks"""
        unconfirmed = []
        tracked_stracks = []  # type: list[STrack]
        for track in self.tracked_stracks:
            if not track.is_activated:
                unconfirmed.append(track)
            else:
                tracked_stracks.append(track)
        """ Step 2: First association, with embedding"""
        strack_pool = joint_stracks(tracked_stracks, self.lost_stracks)
        # Predict the current location with KF
        # for strack in strack_pool:
        # strack.predict()
        STrack.multi_predict(strack_pool)
        dists = matching.embedding_distance(strack_pool, detections)
        # dists = matching.gate_cost_matrix(self.kalman_filter, dists, strack_pool, detections)
        dists = matching.fuse_motion(self.kalman_filter, dists, strack_pool,
                                     detections)
        matches, u_track, u_detection = matching.linear_assignment(dists,
                                                                   thresh=0.7)

        for itracked, idet in matches:
            track = strack_pool[itracked]
            det = detections[idet]
            if track.state == TrackState.Tracked:
                track.update(detections[idet], self.frame_id)
                activated_starcks.append(track)
            else:
                track.re_activate(det, self.frame_id, new_id=False)
                refind_stracks.append(track)
        """ Step 3: Second association, with IOU"""
        detections = [detections[i] for i in u_detection]
        r_tracked_stracks = [
            strack_pool[i] for i in u_track
            if strack_pool[i].state == TrackState.Tracked
        ]
        dists = matching.iou_distance(r_tracked_stracks, detections)
        matches, u_track, u_detection = matching.linear_assignment(dists,
                                                                   thresh=0.5)

        for itracked, idet in matches:
            track = r_tracked_stracks[itracked]
            det = detections[idet]
            if track.state == TrackState.Tracked:
                track.update(det, self.frame_id)
                activated_starcks.append(track)
            else:
                track.re_activate(det, self.frame_id, new_id=False)
                refind_stracks.append(track)

        for it in u_track:
            track = r_tracked_stracks[it]
            if not track.state == TrackState.Lost:
                track.mark_lost()
                lost_stracks.append(track)
        """Deal with unconfirmed tracks, usually tracks with only one beginning frame"""
        detections = [detections[i] for i in u_detection]
        dists = matching.iou_distance(unconfirmed, detections)
        matches, u_unconfirmed, u_detection = matching.linear_assignment(
            dists, thresh=0.7)
        for itracked, idet in matches:
            unconfirmed[itracked].update(detections[idet], self.frame_id)
            activated_starcks.append(unconfirmed[itracked])
        for it in u_unconfirmed:
            track = unconfirmed[it]
            track.mark_removed()
            removed_stracks.append(track)
        """ Step 4: Init new stracks"""
        for inew in u_detection:
            track = detections[inew]
            if track.score < self.det_thresh:
                continue
            track.activate(self.kalman_filter, self.frame_id)
            activated_starcks.append(track)
        """ Step 5: Update state"""
        for track in self.lost_stracks:
            if self.frame_id - track.end_frame > self.max_time_lost:
                track.mark_removed()
                removed_stracks.append(track)

        # print('Ramained match {} s'.format(t4-t3))

        self.tracked_stracks = [
            t for t in self.tracked_stracks if t.state == TrackState.Tracked
        ]
        self.tracked_stracks = joint_stracks(self.tracked_stracks,
                                             activated_starcks)
        self.tracked_stracks = joint_stracks(self.tracked_stracks,
                                             refind_stracks)
        self.lost_stracks = sub_stracks(self.lost_stracks,
                                        self.tracked_stracks)
        self.lost_stracks.extend(lost_stracks)
        self.lost_stracks = sub_stracks(self.lost_stracks,
                                        self.removed_stracks)
        self.removed_stracks.extend(removed_stracks)
        self.tracked_stracks, self.lost_stracks = remove_duplicate_stracks(
            self.tracked_stracks, self.lost_stracks)
        # get scores of lost tracks
        output_stracks = [
            track for track in self.tracked_stracks if track.is_activated
        ]

        logger.debug("===========Frame {}==========".format(self.frame_id))
        logger.debug("Activated: {}".format(
            [track.track_id for track in activated_starcks]))
        logger.debug("Refind: {}".format(
            [track.track_id for track in refind_stracks]))
        logger.debug("Lost: {}".format(
            [track.track_id for track in lost_stracks]))
        logger.debug("Removed: {}".format(
            [track.track_id for track in removed_stracks]))

        return output_stracks
Beispiel #12
0
    def update(self, im_blob, img0):
        self.frame_id += 1
        activated_starcks = []
        refind_stracks = []
        lost_stracks = []
        removed_stracks = []

        width = img0.shape[1]
        height = img0.shape[0]
        inp_height = im_blob.shape[2]
        inp_width = im_blob.shape[3]
        c = np.array([width / 2., height / 2.], dtype=np.float32)
        s = max(float(inp_width) / float(inp_height) * height, width) * 1.0
        meta = {
            'c': c,
            's': s,
            'out_height': inp_height // self.opt.down_ratio,
            'out_width': inp_width // self.opt.down_ratio
        }
        ''' Step 1: Network forward, get detections & embeddings'''
        with torch.no_grad():
            output = self.model(im_blob)[-1]
            hm = output['hm'].sigmoid_()
            wh = output['wh']
            id_feature = output['id']
            id_feature = F.normalize(id_feature, dim=1)

            reg = output['reg'] if self.opt.reg_offset else None
            dets, inds = mot_decode(hm,
                                    wh,
                                    reg=reg,
                                    cat_spec_wh=self.opt.cat_spec_wh,
                                    K=self.opt.K)
            id_feature = _tranpose_and_gather_feat(id_feature, inds)
            id_feature = id_feature.squeeze(0)
            id_feature = id_feature.cpu().numpy()
        '''
        print("==> [multi-tracker.update] dets:", dets)
        print("==> [multi-tracker.update] dets.size 1:", dets.size()) # [1, 128, 6]
        '''

        dets = self.post_process(dets, meta)
        dets = self.merge_outputs([dets])[1]
        '''
        print("==> [multi-tracker.update] len(dets):", len(dets)) # 128
        print("==> [multi-tracker.update] len(dets[0]):", len(dets[0])) # 5
        
        dets: [[     761.85      169.75      779.43      210.57     0.76028]
                [     746.16      167.86      763.81      209.36     0.70138]
                [     520.55      170.32      533.13      198.51     0.44955]
                [     678.15      170.84       687.6      190.35     0.42314]
                [      706.3      172.26         723      207.56     0.41279]
                [     731.59       168.2      742.89      194.59     0.40816]
                [     345.91      188.76      369.22      234.64     0.38459]
                [     434.66      170.01       448.6      199.26     0.37619]
                [     212.57      177.95      231.56      228.84     0.26836]
                [      549.7      168.05      560.64      193.19     0.23459]
                ...
                ]
        print("self.opt.conf_thres:", self.opt.conf_thres) # 0.4
        '''

        remain_inds = dets[:, 4] > self.opt.conf_thres
        dets = dets[remain_inds]
        id_feature = id_feature[remain_inds]
        '''
        print("==> [multi-tracker.update] len(dets):", len(dets)) # 6
        print("==> [multi-tracker.update] len(id_feature):",  len(id_feature)) # 6
        print("==> [multi-tracker.update] id_feature[0]:",  id_feature.size) # 3072 
        3072 = 6 * 512
        embedding dimension: 512
        '''

        # vis
        '''
        for i in range(0, dets.shape[0]):
            bbox = dets[i][0:4]
            cv2.rectangle(img0, (bbox[0], bbox[1]),
                          (bbox[2], bbox[3]),
                          (0, 255, 0), 2)
        cv2.imshow('dets', img0)
        cv2.waitKey(0)
        id0 = id0-1
        '''
        '''
        print("==> [multi-tracker.update] dets[:, :5]:", dets[:, :5])
        print("==> [multi-tracker.update] id_feature:", id_feature)
        print("==> [multi-tracker.update] len(id_feature)", len(id_feature))
        
        ==> [multi-tracker.update] dets[:, :5]: [[     761.85      169.75      779.43      210.57     0.76028]
        [     746.16      167.86      763.81      209.36     0.70138]
        [     520.55      170.32      533.13      198.51     0.44955]
        [     678.15      170.84       687.6      190.35     0.42314]
        [      706.3      172.26         723      207.56     0.41279]
        [     731.59       168.2      742.89      194.59     0.40816]]
        ==> [multi-tracker.update] id_feature: [[   0.047802    0.033811   0.0041801 ...   -0.018475   -0.014819    0.010965]
        [   0.090996    0.015452    0.020774 ...   -0.017812   -0.013593    0.016779]
        [  -0.023971    0.084845     0.10603 ...   -0.063187    0.063411   -0.012202]
        [   0.050601    0.063119    0.070075 ...   -0.063469   0.0026391    0.051197]
        [   0.090193    0.036841    0.045577 ...   -0.024319   -0.075271    0.017419]
        [   0.014926    0.089218     0.07839 ...    -0.09095   0.0066383    0.076563]]
        ==> [multi-tracker.update] len(id_feature) 6
        '''

        if len(dets) > 0:
            '''Detections'''
            # put dets and id_feature to STrack
            # init new STrack
            detections = [
                STrack(STrack.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4], f, 30)
                for (tlbrs, f) in zip(dets[:, :5], id_feature)
            ]
        else:
            detections = []
        ''' Add newly detected tracklets to tracked_stracks'''
        unconfirmed = []
        tracked_stracks = []  # type: list[STrack]
        for track in self.tracked_stracks:
            if not track.is_activated:
                unconfirmed.append(track)
            else:
                tracked_stracks.append(track)
        ''' Step 2: First association, with embedding'''
        strack_pool = joint_stracks(tracked_stracks, self.lost_stracks)
        # Predict the current location with KF
        #for strack in strack_pool:
        #strack.predict()
        STrack.multi_predict(strack_pool)
        dists = matching.embedding_distance(strack_pool, detections)
        #dists = matching.gate_cost_matrix(self.kalman_filter, dists, strack_pool, detections)
        dists = matching.fuse_motion(self.kalman_filter, dists, strack_pool,
                                     detections)
        matches, u_track, u_detection = matching.linear_assignment(dists,
                                                                   thresh=0.7)

        for itracked, idet in matches:
            track = strack_pool[itracked]
            det = detections[idet]
            if track.state == TrackState.Tracked:
                track.update(detections[idet], self.frame_id)
                activated_starcks.append(track)
            else:
                track.re_activate(det, self.frame_id, new_id=False)
                refind_stracks.append(track)
        ''' Step 3: Second association, with IOU'''
        detections = [detections[i] for i in u_detection]
        r_tracked_stracks = [
            strack_pool[i] for i in u_track
            if strack_pool[i].state == TrackState.Tracked
        ]
        dists = matching.iou_distance(r_tracked_stracks, detections)
        matches, u_track, u_detection = matching.linear_assignment(dists,
                                                                   thresh=0.5)

        for itracked, idet in matches:
            track = r_tracked_stracks[itracked]
            det = detections[idet]
            if track.state == TrackState.Tracked:
                track.update(det, self.frame_id)
                activated_starcks.append(track)
            else:
                track.re_activate(det, self.frame_id, new_id=False)
                refind_stracks.append(track)

        for it in u_track:
            track = r_tracked_stracks[it]
            if not track.state == TrackState.Lost:
                track.mark_lost()
                lost_stracks.append(track)
        '''Deal with unconfirmed tracks, usually tracks with only one beginning frame'''
        detections = [detections[i] for i in u_detection]
        dists = matching.iou_distance(unconfirmed, detections)
        matches, u_unconfirmed, u_detection = matching.linear_assignment(
            dists, thresh=0.7)
        for itracked, idet in matches:
            unconfirmed[itracked].update(detections[idet], self.frame_id)
            activated_starcks.append(unconfirmed[itracked])
        for it in u_unconfirmed:
            track = unconfirmed[it]
            track.mark_removed()
            removed_stracks.append(track)
        """ Step 4: Init new stracks"""
        for inew in u_detection:
            track = detections[inew]
            if track.score < self.det_thresh:
                continue
            track.activate(self.kalman_filter, self.frame_id)
            activated_starcks.append(track)
        """ Step 5: Update state"""
        for track in self.lost_stracks:
            if self.frame_id - track.end_frame > self.max_time_lost:
                track.mark_removed()
                removed_stracks.append(track)

        # print('Ramained match {} s'.format(t4-t3))

        self.tracked_stracks = [
            t for t in self.tracked_stracks if t.state == TrackState.Tracked
        ]
        self.tracked_stracks = joint_stracks(self.tracked_stracks,
                                             activated_starcks)
        self.tracked_stracks = joint_stracks(self.tracked_stracks,
                                             refind_stracks)
        self.lost_stracks = sub_stracks(self.lost_stracks,
                                        self.tracked_stracks)
        self.lost_stracks.extend(lost_stracks)
        self.lost_stracks = sub_stracks(self.lost_stracks,
                                        self.removed_stracks)
        self.removed_stracks.extend(removed_stracks)
        self.tracked_stracks, self.lost_stracks = remove_duplicate_stracks(
            self.tracked_stracks, self.lost_stracks)
        # get scores of lost tracks
        output_stracks = [
            track for track in self.tracked_stracks if track.is_activated
        ]

        logger.debug('===========Frame {}=========='.format(self.frame_id))
        logger.debug('Activated: {}'.format(
            [track.track_id for track in activated_starcks]))
        logger.debug('Refind: {}'.format(
            [track.track_id for track in refind_stracks]))
        logger.debug('Lost: {}'.format(
            [track.track_id for track in lost_stracks]))
        logger.debug('Removed: {}'.format(
            [track.track_id for track in removed_stracks]))

        # print("==> [multi-tracker.update] len(output_stracks):",  len(output_stracks))
        return output_stracks
Beispiel #13
0
    def update(self, im_blob, img0):
        self.frame_id += 1
        activated_starcks = []
        refind_stracks = []
        lost_stracks = []
        removed_stracks = []

        width = img0.shape[1]
        height = img0.shape[0]
        inp_height = im_blob.shape[2]
        inp_width = im_blob.shape[3]
        c = np.array([width / 2., height / 2.], dtype=np.float32)
        s = max(float(inp_width) / float(inp_height) * height, width) * 1.0
        meta = {
            'c': c,
            's': s,
            'out_height': inp_height // self.opt.down_ratio,
            'out_width': inp_width // self.opt.down_ratio
        }
        ''' Step 1: Network forward, get detections & embeddings
        这里首先通过backbone获取到对应的各个head的输出,接着进行后处理及置信度过滤(NMS),将新的目标加入轨迹
        '''
        with torch.no_grad():
            output = self.model(im_blob)[-1]  # 检测网络的检测结果
            hm = output['hm'].sigmoid_()  # 检测网络输出的热力图
            wh = output['wh']  # 检测网络输出的目标宽高
            id_feature = output['id']  # 检测网络输出的Re-ID特征
            id_feature = F.normalize(id_feature, dim=1)

            reg = output[
                'reg'] if self.opt.reg_offset else None  # 检测网络输出的目标中心offset
            # 检测的det res(bb, score, clses, ID)以及特征得分图的排序的有效index
            dets, inds = mot_decode(hm,
                                    wh,
                                    reg=reg,
                                    cat_spec_wh=self.opt.cat_spec_wh,
                                    K=self.opt.K)
            # 根据 index 选取 有效的Re-ID特征
            id_feature = _tranpose_and_gather_feat(id_feature, inds)
            # 去除那些维度大小为1的维度
            id_feature = id_feature.squeeze(0)
            id_feature = id_feature.cpu().numpy()

        # 对检测结果做后处理
        dets = self.post_process(dets, meta)
        dets = self.merge_outputs([dets])[1]

        # 检测置信度阈值过滤,得到有效的目标和对应的Re-ID特征
        remain_inds = dets[:, 4] > self.opt.conf_thres
        dets = dets[remain_inds]
        id_feature = id_feature[remain_inds]

        # vis
        '''
        for i in range(0, dets.shape[0]):
            bbox = dets[i][0:4]
            cv2.rectangle(img0, (bbox[0], bbox[1]),
                          (bbox[2], bbox[3]),
                          (0, 255, 0), 2)
        cv2.imshow('dets', img0)
        cv2.waitKey(0)
        id0 = id0-1
        '''

        if len(dets) > 0:
            '''Detections  对每个检测目标转化为跟踪对象,并绑定检测结果等属性'''
            detections = [
                STrack(STrack.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4], f, 30)
                for (tlbrs, f) in zip(dets[:, :5], id_feature)
            ]
        else:
            detections = []
        ''' Add newly detected tracklets to tracked_stracks'''
        unconfirmed = []
        tracked_stracks = []
        for track in self.tracked_stracks:
            if not track.is_activated:
                unconfirmed.append(track)
            else:
                tracked_stracks.append(track)
        ''' Step 2: First association, with embedding
                    1. 将[activated_stracks lost_stracks]融合成strack_pool
                    2. detections和strack_pool根据feats计算外观cost矩阵,就是用feat计算cosine距离
                    3. 利用卡尔曼算法预测strack_pool的新的mean,covariance、
                    4. 计算strack_pool和detection的距离cost矩阵,并将大于距离阈值的外观cost矩阵赋值为inf
                    5. 利用匈牙利算法进行匹配(这里没有采用Munkres,而是利用另一种高效最优任务分配方法:LAPJV)
                        a. 能匹配成功:
                            strack_pool中的track_state==tracked,更新smooth_feat,卡尔曼状态更新mean,covariance(卡尔曼用),计入activated_stracks
                            strack_pool中的track_state!=tracked,更新smooth_feat,卡尔曼状态更新mean,covariance(卡尔曼用),计入refind_stracks
                        b. 未成功匹配:
                            得到新的detections,r_tracked_stracks
        '''
        strack_pool = joint_stracks(tracked_stracks, self.lost_stracks)
        STrack.multi_predict(strack_pool)  # 卡尔曼预测
        dists = matching.embedding_distance(
            strack_pool,
            detections)  # 计算新检测出来的目标detections和strack_pool之间的cosine距离
        #dists = matching.gate_cost_matrix(self.kalman_filter, dists, strack_pool, detections)
        dists = matching.fuse_motion(
            self.kalman_filter, dists, strack_pool, detections
        )  # 利用卡尔曼计算strack_pool和detection的距离cost,并将大于距离阈值的外观cost矩阵赋值为inf(距离约束)
        matches, u_track, u_detection = matching.linear_assignment(
            dists, thresh=0.7
        )  # LAPJV匹配 // 将跟踪框和检测框进行匹配 // matches是匹配对索引,u_track是未匹配的tracker的索引,u_detection是未匹配的检测目标索引

        for itracked, idet in matches:  # matches:63*2 , 63:匹配成对个数,2:第一列为tracked_tracker索引,第二列为detection的索引
            track = strack_pool[itracked]
            det = detections[idet]
            if track.state == TrackState.Tracked:
                track.update(detections[idet],
                             self.frame_id)  # 匹配的tracker和detection,更新特征和卡尔曼状态
                activated_starcks.append(track)
            else:
                track.re_activate(det, self.frame_id,
                                  new_id=False)  # 如果是在lost中的,就重新激活
                refind_stracks.append(track)
        ''' Step 3: Second association, with IOU
                    对余弦距离未匹配剩下的detections,r_tracked_stracks进行IOU匹配
                    1. detections和r_tracked_stracks计算IOU cost矩阵
                    2. 针对IOU cost进行匈牙利匹配(这里没有采用Munkres,而是利用另一种高效最优任务分配方法:LAPJV)
                        a. 能匹配成功:
                            r_tracked_stracks中的track_state==tracked,更新smooth_feat,卡尔曼状态更新mean,covariance(卡尔曼用),计入activated_stracks
                            r_tracked_stracks中的track_state!=tracked,更新smooth_feat,卡尔曼状态更新mean,covariance(卡尔曼用),计入refind_stracks
                        b. 未成功匹配:
                            r_tracked_stracks中的状态track_state不为lost的,改为lost
                            detections再遗留到下一步进行继续匹配
        '''
        detections = [detections[i]
                      for i in u_detection]  # u_detection是上步未匹配的detection的索引
        r_tracked_stracks = [
            strack_pool[i] for i in u_track
            if strack_pool[i].state == TrackState.Tracked
        ]  # 上步没有匹配的且是跟踪状态的tracker
        dists = matching.iou_distance(r_tracked_stracks,
                                      detections)  # 计算IOU cost矩阵
        matches, u_track, u_detection = matching.linear_assignment(
            dists, thresh=0.5)  # 针对IOU cost进行LAPJV匹配

        for itracked, idet in matches:
            track = r_tracked_stracks[itracked]
            det = detections[idet]
            if track.state == TrackState.Tracked:
                track.update(det, self.frame_id)
                activated_starcks.append(track)
            else:
                track.re_activate(det, self.frame_id, new_id=False)
                refind_stracks.append(track)

        for it in u_track:
            track = r_tracked_stracks[it]
            if not track.state == TrackState.Lost:
                track.mark_lost()
                lost_stracks.append(
                    track)  # 将和r_tracked_stracks iou未匹配的剩下的tracker的状态改为lost
        ''' Deal with unconfirmed tracks, usually tracks with only one beginning frame
            上一步遗留的detection与unconfirmed_stracks进行IOU匹配
            1. 计算IOU cost矩阵
            2. 匈牙利匹配(这里没有采用Munkres,而是利用另一种高效最优任务分配方法:LAPJV)
                a. 能匹配成功:
                    更新 unconfirmed_stracks,更新smooth_feat,卡尔曼状态更新mean,covariance(卡尔曼用),计入activated_stracks
                b. 未成功匹配:
                    unconfirmed_stracks直接计入removed_stracks
                    不能匹配的detections,再遗留到下一步
        '''
        detections = [detections[i] for i in u_detection
                      ]  # 将cosine/iou 未匹配的detection和unconfirmed_tracker进行匹配
        dists = matching.iou_distance(unconfirmed, detections)
        matches, u_unconfirmed, u_detection = matching.linear_assignment(
            dists, thresh=0.7)
        for itracked, idet in matches:
            unconfirmed[itracked].update(
                detections[idet], self.frame_id
            )  # 更新 unconfirmed_stracks,更新smooth_feat,卡尔曼状态更新mean,covariance(卡尔曼用),计入activated_stracks
            activated_starcks.append(unconfirmed[itracked])
        for it in u_unconfirmed:
            track = unconfirmed[it]
            track.mark_removed()
            removed_stracks.append(
                track)  # unconfirmed_stracks直接计入removed_stracks
        """ Step 4: Init new stracks 
            上一步遗留的detections,初始化成新的tracker,计入activated_stracks
        """
        for inew in u_detection:  # 对cosine/iou/uncofirmed_tracker都未匹配的detection重新初始化成一个新的tracker
            track = detections[inew]
            if track.score < self.det_thresh:
                continue
            track.activate(self.kalman_filter,
                           self.frame_id)  # 激活track,第一帧的activated=T,其他为False
            activated_starcks.append(track)
        """ Step 5: Update state"""
        for track in self.lost_stracks:
            if self.frame_id - track.end_frame > self.max_time_lost:  # 消失 max_time_lost 帧之后,计入removed_stracks,删除
                track.mark_removed()
                removed_stracks.append(track)

        # print('Ramained match {} s'.format(t4-t3))

        self.tracked_stracks = [
            t for t in self.tracked_stracks if t.state == TrackState.Tracked
        ]  # 筛出tracked状态的tracker
        self.tracked_stracks = joint_stracks(
            self.tracked_stracks,
            activated_starcks)  # 向self.tracked_stacks中添加新的detection
        self.tracked_stracks = joint_stracks(self.tracked_stracks,
                                             refind_stracks)  # 重新匹配出的trackers
        self.lost_stracks = sub_stracks(self.lost_stracks,
                                        self.tracked_stracks)
        self.lost_stracks.extend(lost_stracks)
        self.lost_stracks = sub_stracks(self.lost_stracks,
                                        self.removed_stracks)
        self.removed_stracks.extend(removed_stracks)
        self.tracked_stracks, self.lost_stracks = remove_duplicate_stracks(
            self.tracked_stracks, self.lost_stracks)
        # get scores of lost tracks
        output_stracks = [
            track for track in self.tracked_stracks if track.is_activated
        ]

        logger.debug('===========Frame {}=========='.format(self.frame_id))
        logger.debug('Activated: {}'.format(
            [track.track_id for track in activated_starcks]))
        logger.debug('Refind: {}'.format(
            [track.track_id for track in refind_stracks]))
        logger.debug('Lost: {}'.format(
            [track.track_id for track in lost_stracks]))
        logger.debug('Removed: {}'.format(
            [track.track_id for track in removed_stracks]))

        return output_stracks
Beispiel #14
0
    def update_sep(self, im_blob, img0, conf_thres=None):
        self.frame_id += 1
        activated_stracks = []
        refind_stracks = []
        lost_stracks = []
        removed_stracks = []

        width = img0.shape[1]
        height = img0.shape[0]
        inp_height = im_blob.shape[2]
        inp_width = im_blob.shape[3]
        c = np.array([width / 2., height / 2.], dtype=np.float32)
        s = max(float(inp_width) / float(inp_height) * height, width) * 1.0
        meta = {
            'c': c,
            's': s,
            'out_height': inp_height // self.opt.down_ratio,
            'out_width': inp_width // self.opt.down_ratio
        }
        ''' Step 1: Network forward, get detections & embeddings'''
        with torch.no_grad():
            output = self.model(im_blob)[-1]
            hm = output['hm'].sigmoid_()
            wh = output['wh']
            id_feature = output['id']
            id_feature = F.normalize(id_feature, dim=1)

            reg = output['reg'] if self.opt.reg_offset else None
            dets, inds = mot_decode(hm,
                                    wh,
                                    reg=reg,
                                    cat_spec_wh=self.opt.cat_spec_wh,
                                    K=self.opt.K)
            id_feature = _tranpose_and_gather_feat(id_feature, inds)
            id_feature = id_feature.squeeze(0)
            id_feature = id_feature.cpu().numpy()

        dets = self.post_process(dets, meta)
        dets_classes = self.merge_outputs([dets])  #[1]
        # dets = np.concatenate(
        #     [dets[i] for i in range(1, self.opt.num_classes+1)], axis=0)
        output_stracks = []
        id_feature_orig = id_feature.copy()
        start_idx = 0
        for i_class in range(1, 9):  #self.opt.num_classes + 1):
            activated_stracks = []
            refind_stracks = []
            lost_stracks = []
            removed_stracks = []
            dets = dets_classes[i_class]
            if conf_thres is None:
                remain_inds = dets[:, 4] > self.opt.conf_thres
            else:
                remain_inds = dets[:, 4] > conf_thres[i_class - 1]
            dets = dets[remain_inds]
            # id_feature = id_feature_orig[start_idx:start_idx+len(remain_inds)][remain_inds]
            id_feature = id_feature_orig[start_idx:start_idx +
                                         len(dets_classes[i_class]
                                             )][remain_inds]
            # start_idx = start_idx+len(remain_inds)
            start_idx = start_idx + len(dets_classes[i_class])

            # vis
            '''
            for i in range(0, dets.shape[0]):
                bbox = dets[i][0:4]
                cv2.rectangle(img0, (bbox[0], bbox[1]),
                            (bbox[2], bbox[3]),
                            (0, 255, 0), 2)
            cv2.imshow('dets', img0)
            cv2.waitKey(0)
            id0 = id0-1
            '''

            if len(dets) > 0:
                '''Detections'''
                detections = [
                    STrack(STrack.tlbr_to_tlwh(tlbrs[:4]),
                           tlbrs[4],
                           f,
                           30,
                           class_id=i_class - 1)
                    for (tlbrs, f) in zip(dets[:, :5], id_feature)
                ]
            else:
                detections = []
            ''' Add newly detected tracklets to tracked_stracks'''
            unconfirmed = []
            tracked_stracks = []  # type: list[STrack]
            for track in self.tracked_stracks_sp[i_class]:
                if not track.is_activated:
                    unconfirmed.append(track)
                else:
                    tracked_stracks.append(track)
            ''' Step 2: First association, with embedding'''
            strack_pool = joint_stracks(tracked_stracks,
                                        self.lost_stracks_sp[i_class])
            # Predict the current location with KF
            #for strack in strack_pool:
            #strack.predict()
            STrack.multi_predict(strack_pool)
            dists = matching.embedding_distance(strack_pool, detections)
            #dists = matching.gate_cost_matrix(self.kalman_filter, dists, strack_pool, detections)
            dists = matching.fuse_motion(self.kalman_filter, dists,
                                         strack_pool, detections)
            matches, u_track, u_detection = matching.linear_assignment(
                dists, thresh=0.7)

            for itracked, idet in matches:
                track = strack_pool[itracked]
                det = detections[idet]
                if track.state == TrackState.Tracked:
                    track.update(detections[idet], self.frame_id)
                    activated_stracks.append(track)
                else:
                    track.re_activate(det, self.frame_id, new_id=False)
                    refind_stracks.append(track)
            ''' Step 3: Second association, with IOU'''
            detections = [detections[i] for i in u_detection]
            r_tracked_stracks = [
                strack_pool[i] for i in u_track
                if strack_pool[i].state == TrackState.Tracked
            ]
            dists = matching.iou_distance(r_tracked_stracks, detections)
            matches, u_track, u_detection = matching.linear_assignment(
                dists, thresh=0.5)

            for itracked, idet in matches:
                track = r_tracked_stracks[itracked]
                det = detections[idet]
                if track.state == TrackState.Tracked:
                    track.update(det, self.frame_id)
                    activated_stracks.append(track)
                else:
                    track.re_activate(det, self.frame_id, new_id=False)
                    refind_stracks.append(track)

            for it in u_track:
                track = r_tracked_stracks[it]
                if not track.state == TrackState.Lost:
                    track.mark_lost()
                    lost_stracks.append(track)
            '''Deal with unconfirmed tracks, usually tracks with only one beginning frame'''
            detections = [detections[i] for i in u_detection]
            dists = matching.iou_distance(unconfirmed, detections)
            matches, u_unconfirmed, u_detection = matching.linear_assignment(
                dists, thresh=0.7)
            for itracked, idet in matches:
                unconfirmed[itracked].update(detections[idet], self.frame_id)
                activated_stracks.append(unconfirmed[itracked])
            for it in u_unconfirmed:
                track = unconfirmed[it]
                track.mark_removed()
                removed_stracks.append(track)
            """ Step 4: Init new stracks"""
            for inew in u_detection:
                track = detections[inew]
                if track.score < self.det_thresh:
                    continue
                track.activate(self.kalman_filter, self.frame_id)
                activated_stracks.append(track)
            """ Step 5: Update state"""
            for track in self.lost_stracks_sp[i_class]:
                if self.frame_id - track.end_frame > self.max_time_lost:
                    track.mark_removed()
                    removed_stracks.append(track)

            # print('Ramained match {} s'.format(t4-t3))

            self.tracked_stracks_sp[i_class] = [
                t for t in self.tracked_stracks_sp[i_class]
                if t.state == TrackState.Tracked
            ]
            self.tracked_stracks_sp[i_class] = joint_stracks(
                self.tracked_stracks_sp[i_class], activated_stracks)
            self.tracked_stracks_sp[i_class] = joint_stracks(
                self.tracked_stracks_sp[i_class], refind_stracks)
            self.lost_stracks_sp[i_class] = sub_stracks(
                self.lost_stracks_sp[i_class],
                self.tracked_stracks_sp[i_class])
            self.lost_stracks_sp[i_class].extend(lost_stracks)
            self.lost_stracks_sp[i_class] = sub_stracks(
                self.lost_stracks_sp[i_class],
                self.removed_stracks_sp[i_class])
            self.removed_stracks_sp[i_class].extend(removed_stracks)
            self.tracked_stracks_sp[i_class], self.lost_stracks_sp[
                i_class] = remove_duplicate_stracks(
                    self.tracked_stracks_sp[i_class],
                    self.lost_stracks_sp[i_class])
            # get scores of lost tracks
            # output_stracks = [track for track in self.tracked_stracks if track.is_activated]
            output_stracks.extend([
                track for track in self.tracked_stracks_sp[i_class]
                if track.is_activated
            ])
            # print('iclass {}, nb detected {}, nb output_stracks {}, nb tracked_stracks {}, nb lost_stracks {}, nb removed_stracks {}'\
            #     .format(i_class, len(dets), len(output_stracks), len(self.tracked_stracks_sp[i_class]), len(self.lost_stracks_sp[i_class]), len(self.removed_stracks_sp[i_class])))

        # logger.debug('===========Frame {}=========='.format(self.frame_id))
        # logger.debug('Activated: {}'.format([track.track_id for track in activated_stracks]))
        # logger.debug('Refind: {}'.format([track.track_id for track in refind_stracks]))
        # logger.debug('Lost: {}'.format([track.track_id for track in lost_stracks]))
        # logger.debug('Removed: {}'.format([track.track_id for track in removed_stracks]))

        return output_stracks
Beispiel #15
0
    meta = {
        'c': c,
        's': s,
        'out_height': inp_height // down_ratio,
        'out_width': inp_width // down_ratio
    }
    ''' Step 1: Network forward, get detections & embeddings'''
    with torch.no_grad():
        output = model(im_blob)[-1]
        hm = output['hm'].sigmoid_()
        wh = output['wh']
        id_feature = output['id']
        id_feature = F.normalize(id_feature, dim=1)

        reg = output['reg'] if reg_offset else None
        dets, inds = mot_decode(hm, wh, reg=reg, ltrb=ltrb, K=Kt)
        id_feature = _tranpose_and_gather_feat(id_feature, inds)
        id_feature = id_feature.squeeze(0)
        id_feature = id_feature.cpu().numpy()

    dets = post_process(dets, meta)
    dets = merge_outputs([dets])[1]
    remain_inds = dets[:, 4] > conf_thres
    dets = dets[remain_inds]
    id_feature = id_feature[remain_inds]

    # vis
    person_count += len(dets)
    for i in range(0, dets.shape[0]):
        bbox = dets[i][0:4]
        cv2.rectangle(img0, (bbox[0], bbox[1]), (bbox[2], bbox[3]),
Beispiel #16
0
    def update(self, im_blob, img0):
        self.frame_id += 1
        activated_starcks = []
        refind_stracks = []
        lost_stracks = []
        removed_stracks = []

        width = img0.shape[1]
        height = img0.shape[0]
        inp_height = im_blob.shape[2]
        inp_width = im_blob.shape[3]
        c = np.array([width / 2., height / 2.], dtype=np.float32)
        s = max(float(inp_width) / float(inp_height) * height, width) * 1.0
        meta = {
            'c': c,
            's': s,
            'out_height': inp_height // self.opt.down_ratio,
            'out_width': inp_width // self.opt.down_ratio
        }
        ''' Step 1: Network forward, get detections & embeddings'''
        with torch.no_grad():
            output = self.model(im_blob)[-1]
            hm = output['hm'].sigmoid_()
            wh = output['wh']
            id_feature = output['id']
            id_feature = F.normalize(id_feature, dim=1)

            reg = output['reg'] if self.opt.reg_offset else None
            dets, inds = mot_decode(hm,
                                    wh,
                                    reg=reg,
                                    cat_spec_wh=self.opt.cat_spec_wh,
                                    K=self.opt.K)
            id_feature = _tranpose_and_gather_feat(id_feature, inds)
            id_feature = id_feature.squeeze(0)
            id_feature = id_feature.cpu().numpy()

        dets = self.post_process(dets, meta)
        dets = self.merge_outputs([dets])[1]

        remain_inds = dets[:, 4] > self.opt.conf_thres
        dets = dets[remain_inds]
        id_feature = id_feature[remain_inds]

        # vis
        '''
        for i in range(0, dets.shape[0]):
            bbox = dets[i][0:4]
            cv2.rectangle(img0, (bbox[0], bbox[1]),
                          (bbox[2], bbox[3]),
                          (0, 255, 0), 2)
        cv2.imshow('dets', img0)
        cv2.waitKey(0)
        id0 = id0-1
        '''

        if len(dets) > 0:
            '''Detections'''
            detections = [
                STrack(STrack.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4], f, 30)
                for (tlbrs, f) in zip(dets[:, :5], id_feature)
            ]
        else:
            detections = []
        ''' Add newly detected tracklets to tracked_stracks'''
        unconfirmed = []
        tracked_stracks = []  # type: list[STrack]
        for track in self.tracked_stracks:
            if not track.is_activated:
                unconfirmed.append(track)
            else:
                tracked_stracks.append(track)
        ''' Step 2: First association, with embedding'''
        strack_pool = joint_stracks(tracked_stracks, self.lost_stracks)
        dists = matching.embedding_distance(
            strack_pool, detections)  # 计算新检测出来的目标和tracked_tracker之间的cosine距离
        STrack.multi_predict(strack_pool)  # 卡尔曼预测
        dists = matching.fuse_motion(
            self.kalman_filter, dists, strack_pool,
            detections)  # 利用卡尔曼计算detection和pool_stacker直接的距离代价
        matches, u_track, u_detection = matching.linear_assignment(
            dists,
            thresh=0.7)  # 匈牙利匹配 // 将跟踪框和检测框进行匹配 // u_track是未匹配的tracker的索引,

        for itracked, idet in matches:  # matches:63*2 , 63:detections的维度,2:第一列为tracked_tracker索引,第二列为detection的索引
            track = strack_pool[itracked]
            det = detections[idet]
            if track.state == TrackState.Tracked:
                track.update(
                    det, self.frame_id)  # 匹配的pool_tracker和detection,更新特征和卡尔曼状态
                activated_starcks.append(track)
            else:
                track.re_activate(det, self.frame_id,
                                  new_id=False)  # 如果是在lost中的,就重新激活
                refind_stracks.append(track)
        ''' Step 3: Second association, with IOU''' """ 在余弦距离未匹配的detection和tracker重新用iou进行匹配 """
        detections = [detections[i]
                      for i in u_detection]  # u_detection是未匹配的detection的索引
        r_tracked_stracks = [
            strack_pool[i] for i in u_track
            if strack_pool[i].state == TrackState.Tracked
        ]
        dists = matching.iou_distance(r_tracked_stracks, detections)
        matches, u_track, u_detection = matching.linear_assignment(dists,
                                                                   thresh=0.5)

        for itracked, idet in matches:
            track = r_tracked_stracks[itracked]
            det = detections[idet]
            if track.state == TrackState.Tracked:
                track.update(det, self.frame_id)
                activated_starcks.append(track)
            else:
                track.re_activate(
                    det, self.frame_id,
                    new_id=False)  # 前面已经限定了是TrackState.Tracked,这里是不用运行到的。
                refind_stracks.append(track)

        for it in u_track:
            track = r_tracked_stracks[it]
            if not track.state == TrackState.Lost:
                track.mark_lost()
                lost_stracks.append(
                    track)  # 将和tracked_tracker iou未匹配的tracker的状态改为lost

        temp = 1
        '''Deal with unconfirmed tracks, usually tracks with only one beginning frame'''
        detections = [detections[i] for i in u_detection
                      ]  # 将cosine/iou未匹配的detection和unconfirmed_tracker进行匹配
        dists = matching.iou_distance(unconfirmed, detections)
        matches, u_unconfirmed, u_detection = matching.linear_assignment(
            dists, thresh=0.7)
        for itracked, idet in matches:
            unconfirmed[itracked].update(detections[idet], self.frame_id)
            activated_starcks.append(unconfirmed[itracked])
        for it in u_unconfirmed:
            track = unconfirmed[it]
            track.mark_removed()
            removed_stracks.append(track)
        """ Step 4: Init new stracks"""
        for inew in u_detection:  # 对cosine/iou/uncofirmed_tracker都未匹配的detection重新初始化一个unconfimed_tracker
            track = detections[inew]
            if track.score < self.det_thresh:
                continue
            track.activate(self.kalman_filter,
                           self.frame_id)  # 激活track,第一帧的activated=T,其他为False
            activated_starcks.append(track)
        """ Step 5: Update state"""
        for track in self.lost_stracks:
            if self.frame_id - track.end_frame > self.max_time_lost:  # 消失15帧之后
                track.mark_removed()
                removed_stracks.append(track)

        # print('Ramained match {} s'.format(t4-t3))

        self.tracked_stracks = [
            t for t in self.tracked_stracks if t.state == TrackState.Tracked
        ]  # 筛出tracked状态的tracker
        self.tracked_stracks = joint_stracks(
            self.tracked_stracks,
            activated_starcks)  # 向self.tracked_stacks中添加新的detection
        self.tracked_stracks = joint_stracks(self.tracked_stracks,
                                             refind_stracks)  # 重新匹配出的trackers
        self.lost_stracks = sub_stracks(self.lost_stracks,
                                        self.tracked_stracks)
        self.lost_stracks.extend(lost_stracks)
        self.lost_stracks = sub_stracks(self.lost_stracks,
                                        self.removed_stracks)
        self.removed_stracks.extend(removed_stracks)
        self.tracked_stracks, self.lost_stracks = remove_duplicate_stracks(
            self.tracked_stracks, self.lost_stracks)
        # get scores of lost tracks
        output_stracks = [
            track for track in self.tracked_stracks if track.is_activated
        ]

        logger.debug('===========Frame {}=========='.format(self.frame_id))
        logger.debug('Activated: {}'.format(
            [track.track_id for track in activated_starcks]))
        logger.debug('Refind: {}'.format(
            [track.track_id for track in refind_stracks]))
        logger.debug('Lost: {}'.format(
            [track.track_id for track in lost_stracks]))
        logger.debug('Removed: {}'.format(
            [track.track_id for track in removed_stracks]))

        return output_stracks
Beispiel #17
0
    def update(self, im_blob, img0):
        self.frame_id += 1
        activated_starcks = []
        refind_stracks = []
        lost_stracks = []
        removed_stracks = []

        width = img0.shape[1]
        height = img0.shape[0]
        inp_height = im_blob.shape[2]
        inp_width = im_blob.shape[3]
        c = np.array([width / 2., height / 2.], dtype=np.float32)
        s = max(float(inp_width) / float(inp_height) * height, width) * 1.0
        meta = {
            'c': c,
            's': s,
            'out_height': inp_height // self.opt.down_ratio,
            'out_width': inp_width // self.opt.down_ratio
        }
        ''' Step 1: Network forward, get detections & embeddings'''
        with torch.no_grad():
            if hasattr(self.model, 'relation'):
                outputs, stuff = self.model(im_blob)
                det_heads = set(['wh', 'hm', 'reg'])
                trk_heads = set(['id'])
                for head in (set(self.model.backend.heads) & det_heads):
                    outputs[head] = getattr(self.model.backend,
                                            head)(outputs['raw'])
                # for head in (set(self.model.heads) & trk_heads):
                #     outputs[head] = getattr(self.model, head)(outputs['raw_trk'])
                # del outputs['raw_trk']
                del outputs['raw']
                output = outputs
                if hasattr(self.model.relation, 'loss'):
                    cur_feats = stuff[-2]
                    self.model.relation.lock.acquire()
                    self.model.relation.feature_bank.append(
                        cur_feats.detach().cpu())
                    self.model.relation.lock.release()
            else:
                output = self.model(im_blob)[-1]
            hm = output['hm'].sigmoid_()
            wh = output['wh']
            id_feature = output['id']
            id_feature = F.normalize(id_feature, dim=1)

            reg = output['reg'] if self.opt.reg_offset else None
            dets, inds = mot_decode(hm,
                                    wh,
                                    reg=reg,
                                    ltrb=self.opt.ltrb,
                                    K=self.opt.K)
            id_feature = _tranpose_and_gather_feat(id_feature, inds)
            id_feature = id_feature.squeeze(0)
            id_feature = id_feature.cpu().numpy()

        dets = self.post_process(dets, meta)
        dets = self.merge_outputs([dets])[1]

        remain_inds = dets[:, 4] > self.opt.conf_thres
        dets = dets[remain_inds]
        id_feature = id_feature[remain_inds]
        self.inputs_embs.append((dets, id_feature))

        # vis
        '''
        for i in range(0, dets.shape[0]):
            bbox = dets[i][0:4]
            cv2.rectangle(img0, (bbox[0], bbox[1]),
                          (bbox[2], bbox[3]),
                          (0, 255, 0), 2)
        cv2.imshow('dets', img0)
        cv2.waitKey(0)
        id0 = id0-1
        '''

        if len(dets) > 0:
            '''Detections'''
            detections = [
                STrack(STrack.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4], f, 30)
                for (tlbrs, f) in zip(dets[:, :5], id_feature)
            ]
        else:
            detections = []
        ''' Add newly detected tracklets to tracked_stracks'''
        unconfirmed = []
        tracked_stracks = []  # type: list[STrack]
        for track in self.tracked_stracks:
            if not track.is_activated:
                unconfirmed.append(track)
            else:
                tracked_stracks.append(track)
        ''' Step 2: First association, with embedding'''
        strack_pool = joint_stracks(tracked_stracks, self.lost_stracks)
        # Predict the current location with KF
        #for strack in strack_pool:
        #strack.predict()
        STrack.multi_predict(strack_pool)
        dists = matching.embedding_distance(strack_pool, detections)
        #dists = matching.iou_distance(strack_pool, detections)
        dists = matching.fuse_motion(self.kalman_filter, dists, strack_pool,
                                     detections)
        matches, u_track, u_detection = matching.linear_assignment(dists,
                                                                   thresh=0.4)

        for itracked, idet in matches:
            track = strack_pool[itracked]
            det = detections[idet]
            if track.state == TrackState.Tracked:
                track.update(detections[idet], self.frame_id)
                activated_starcks.append(track)
            else:
                track.re_activate(det, self.frame_id, new_id=False)
                refind_stracks.append(track)
        ''' Step 3: Second association, with IOU'''
        detections = [detections[i] for i in u_detection]
        r_tracked_stracks = [
            strack_pool[i] for i in u_track
            if strack_pool[i].state == TrackState.Tracked
        ]
        dists = matching.iou_distance(r_tracked_stracks, detections)
        matches, u_track, u_detection = matching.linear_assignment(dists,
                                                                   thresh=0.5)

        for itracked, idet in matches:
            track = r_tracked_stracks[itracked]
            det = detections[idet]
            if track.state == TrackState.Tracked:
                track.update(det, self.frame_id)
                activated_starcks.append(track)
            else:
                track.re_activate(det, self.frame_id, new_id=False)
                refind_stracks.append(track)

        for it in u_track:
            track = r_tracked_stracks[it]
            if not track.state == TrackState.Lost:
                track.mark_lost()
                lost_stracks.append(track)
        '''Deal with unconfirmed tracks, usually tracks with only one beginning frame'''
        detections = [detections[i] for i in u_detection]
        dists = matching.iou_distance(unconfirmed, detections)
        matches, u_unconfirmed, u_detection = matching.linear_assignment(
            dists, thresh=0.7)
        for itracked, idet in matches:
            unconfirmed[itracked].update(detections[idet], self.frame_id)
            activated_starcks.append(unconfirmed[itracked])
        for it in u_unconfirmed:
            track = unconfirmed[it]
            track.mark_removed()
            removed_stracks.append(track)
        """ Step 4: Init new stracks"""
        for inew in u_detection:
            track = detections[inew]
            if track.score < self.det_thresh:
                continue
            track.activate(self.kalman_filter, self.frame_id)
            activated_starcks.append(track)
        """ Step 5: Update state"""
        for track in self.lost_stracks:
            if self.frame_id - track.end_frame > self.max_time_lost:
                track.mark_removed()
                removed_stracks.append(track)

        # print('Ramained match {} s'.format(t4-t3))

        self.tracked_stracks = [
            t for t in self.tracked_stracks if t.state == TrackState.Tracked
        ]
        self.tracked_stracks = joint_stracks(self.tracked_stracks,
                                             activated_starcks)
        self.tracked_stracks = joint_stracks(self.tracked_stracks,
                                             refind_stracks)
        self.lost_stracks = sub_stracks(self.lost_stracks,
                                        self.tracked_stracks)
        self.lost_stracks.extend(lost_stracks)
        self.lost_stracks = sub_stracks(self.lost_stracks,
                                        self.removed_stracks)
        self.removed_stracks.extend(removed_stracks)
        self.tracked_stracks, self.lost_stracks = remove_duplicate_stracks(
            self.tracked_stracks, self.lost_stracks)
        # get scores of lost tracks
        output_stracks = [
            track for track in self.tracked_stracks if track.is_activated
        ]

        logger.debug('===========Frame {}=========='.format(self.frame_id))
        logger.debug('Activated: {}'.format(
            [track.track_id for track in activated_starcks]))
        logger.debug('Refind: {}'.format(
            [track.track_id for track in refind_stracks]))
        logger.debug('Lost: {}'.format(
            [track.track_id for track in lost_stracks]))
        logger.debug('Removed: {}'.format(
            [track.track_id for track in removed_stracks]))

        return output_stracks
Beispiel #18
0
    def update(self, im_blob, img0, p_crops, p_crops_lengths, edge_index, gnn_output_layer=-1, p_imgs=None,
               conf_thres=0.3):
        self.frame_id += 1
        activated_starcks = []
        refind_stracks = []
        lost_stracks = []
        removed_stracks = []

        width = img0.shape[1]
        height = img0.shape[0]
        inp_height = im_blob.shape[2]
        inp_width = im_blob.shape[3]
        c = np.array([width / 2., height / 2.], dtype=np.float32)
        s = max(float(inp_width) / float(inp_height) * height, width) * 1.0
        meta = {'c': c, 's': s,
                'out_height': inp_height // self.opt.down_ratio,
                'out_width': inp_width // self.opt.down_ratio}

        ''' Step 1: Network forward, get detections & embeddings'''
        with torch.no_grad():
            output = self.model(im_blob, p_crops, p_crops_lengths, edge_index, p_imgs=p_imgs)[gnn_output_layer]
            if type(output) is list:
                output = output[-1]
            hm = output['hm'].sigmoid_()
            wh = output['wh']
            id_feature = output['id']
            id_feature = F.normalize(id_feature, dim=1)

            reg = output['reg'] if self.opt.reg_offset else None
            dets, inds = mot_decode(hm, wh, reg=reg, cat_spec_wh=self.opt.cat_spec_wh, K=self.opt.K)
            id_feature = _tranpose_and_gather_feat(id_feature, inds)
            id_feature = id_feature.squeeze(0)
            id_feature = id_feature.cpu().numpy()
        if self.viz_attention and self.frame_id == self.opt.vis_attn_frame:
            # vis attention
            attn = output['p']
            node0_neighbor_idx = output['node0_neighbor_idx']
            keep = torch.where(attn > self.opt.vis_attn_thres)[0]
            self.visualize_centers(im_blob, keep, node0_neighbor_idx, attn, output, p_imgs)

        dets = self.post_process(dets, meta)
        dets = self.merge_outputs([dets])[1]

        # remain_inds = dets[:, 4] > self.opt.conf_thres
        remain_inds = dets[:, 4] > conf_thres
        dets = dets[remain_inds]
        id_feature = id_feature[remain_inds]


        # vis
        '''
        for i in range(0, dets.shape[0]):
            bbox = dets[i][0:4]
            cv2.rectangle(img0, (bbox[0], bbox[1]),
                          (bbox[2], bbox[3]),
                          (0, 255, 0), 2)
        cv2.imshow('dets', img0)
        cv2.waitKey(0)
        id0 = id0-1
        '''

        if len(dets) > 0:
            '''Detections'''
            detections = [STrack(STrack.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4], f, 30) for
                          (tlbrs, f) in zip(dets[:, :5], id_feature)]
        else:
            detections = []

        ''' Add newly detected tracklets to tracked_stracks'''
        unconfirmed = []
        tracked_stracks = []  # type: list[STrack]
        for track in self.tracked_stracks:
            if not track.is_activated:
                unconfirmed.append(track)
            else:
                tracked_stracks.append(track)

        ''' Step 2: First association, with embedding'''
        strack_pool = joint_stracks(tracked_stracks, self.lost_stracks)
        # Predict the current location with KF
        #for strack in strack_pool:
            #strack.predict()
        STrack.multi_predict(strack_pool)
        dists = matching.embedding_distance(strack_pool, detections)
        #dists = matching.gate_cost_matrix(self.kalman_filter, dists, strack_pool, detections)
        dists = matching.fuse_motion(self.kalman_filter, dists, strack_pool, detections)
        matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.7)

        for itracked, idet in matches:
            track = strack_pool[itracked]
            det = detections[idet]
            if track.state == TrackState.Tracked:
                track.update(detections[idet], self.frame_id)
                activated_starcks.append(track)
            else:
                track.re_activate(det, self.frame_id, new_id=False)
                refind_stracks.append(track)

        ''' Step 3: Second association, with IOU'''
        detections = [detections[i] for i in u_detection]
        r_tracked_stracks = [strack_pool[i] for i in u_track if strack_pool[i].state == TrackState.Tracked]
        dists = matching.iou_distance(r_tracked_stracks, detections)
        matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.5)

        for itracked, idet in matches:
            track = r_tracked_stracks[itracked]
            det = detections[idet]
            if track.state == TrackState.Tracked:
                track.update(det, self.frame_id)
                activated_starcks.append(track)
            else:
                track.re_activate(det, self.frame_id, new_id=False)
                refind_stracks.append(track)

        for it in u_track:
            track = r_tracked_stracks[it]
            if not track.state == TrackState.Lost:
                track.mark_lost()
                lost_stracks.append(track)

        '''Deal with unconfirmed tracks, usually tracks with only one beginning frame'''
        detections = [detections[i] for i in u_detection]
        dists = matching.iou_distance(unconfirmed, detections)
        matches, u_unconfirmed, u_detection = matching.linear_assignment(dists, thresh=0.7)
        for itracked, idet in matches:
            unconfirmed[itracked].update(detections[idet], self.frame_id)
            activated_starcks.append(unconfirmed[itracked])
        for it in u_unconfirmed:
            track = unconfirmed[it]
            track.mark_removed()
            removed_stracks.append(track)

        """ Step 4: Init new stracks"""
        for inew in u_detection:
            track = detections[inew]
            # if track.score < self.det_thresh:
            if track.score < conf_thres:
                continue
            track.activate(self.kalman_filter, self.frame_id)
            activated_starcks.append(track)
        """ Step 5: Update state"""
        for track in self.lost_stracks:
            if self.frame_id - track.end_frame > self.max_time_lost:
                track.mark_removed()
                removed_stracks.append(track)

        # print('Ramained match {} s'.format(t4-t3))

        self.tracked_stracks = [t for t in self.tracked_stracks if t.state == TrackState.Tracked]
        self.tracked_stracks = joint_stracks(self.tracked_stracks, activated_starcks)
        self.tracked_stracks = joint_stracks(self.tracked_stracks, refind_stracks)
        self.lost_stracks = sub_stracks(self.lost_stracks, self.tracked_stracks)
        self.lost_stracks.extend(lost_stracks)
        self.lost_stracks = sub_stracks(self.lost_stracks, self.removed_stracks)
        self.removed_stracks.extend(removed_stracks)
        self.tracked_stracks, self.lost_stracks = remove_duplicate_stracks(self.tracked_stracks, self.lost_stracks)
        # get scores of lost tracks
        output_stracks = [track for track in self.tracked_stracks if track.is_activated]

        logger.debug('===========Frame {}=========='.format(self.frame_id))
        logger.debug('Activated: {}'.format([track.track_id for track in activated_starcks]))
        logger.debug('Refind: {}'.format([track.track_id for track in refind_stracks]))
        logger.debug('Lost: {}'.format([track.track_id for track in lost_stracks]))
        logger.debug('Removed: {}'.format([track.track_id for track in removed_stracks]))

        return output_stracks
Beispiel #19
0
    def forward(self,
                x,
                img_path,
                p_crops,
                p_crops_lengths,
                edge_index,
                p_imgs=None):
        """
        forward function of the GNN detTrack module
        :param x: input image of (N, 3, im_h, im_w)
        :param p_crops: input image crops of previous frame corresponding to each input image, (∑_i n_crops_i, 64)
        :param p_crops_lengths: lengths of the number of previous crops for each batch image (N)
        :param edge_index: list of tensors with length (N), each element of which has a shape of (2, n_edges_i)
        :return:
        """
        # Get the current image features (N, C, H, W)
        img0 = cv2.imread(img_path)
        width = img0.shape[1]
        height = img0.shape[0]
        inp_height = x.shape[2]
        inp_width = x.shape[3]
        c = np.array([width / 2., height / 2.], dtype=np.float32)
        s = max(float(inp_width) / float(inp_height) * height, width) * 1.0
        meta = {
            'c': c,
            's': s,
            'out_height': inp_height // 4,
            'out_width': inp_width // 4
        }

        y = self.backbone_forward(x)[-1]
        hm = y['hm'].sigmoid_()
        wh = y['wh']
        id_feature = y['id']
        reg = y['reg'] if self.opt.reg_offset else None
        dets, inds = mot_decode(hm,
                                wh,
                                reg=reg,
                                ltrb=self.opt.ltrb,
                                K=self.opt.K)
        dets = self.post_process(dets, meta)
        dets = self.merge_outputs([dets])[1]
        remain_inds = dets[:, 4] > 0.4
        dets = dets[remain_inds]
        boxes = dets[:, 0:4].copy()
        _, h, w = x.shape
        boxes = xywh2xyxy(boxes)
        boxes = boxes * np.array([w, h, w, h])
        crops = torchvision.ops.roi_align(
            input=x.unsqueeze(0),
            boxes=[torch.from_numpy(boxes).float()],
            output_size=(96, 32))
        return crops
        x_edge_index = self.build_edge_index_full(
            len(crops), self.default_backbone_feature_resolution[0] *
            self.default_backbone_feature_resolution[1])

        y_x_crops_list = self.crop_features_forward(crops=crops,
                                                    crops_lengths=len(crops),
                                                    imgs=x)
        y_p_crops_list = self.crop_features_forward(
            crops=p_crops, crops_lengths=p_crops_lengths, imgs=p_imgs)

        edge_index = [edge_index, x_edge_index]
        edge_attr = torch.cat((edge_index, x_edge_index), dim=1)
        xy = y

        x_is_img = len(xy.shape) == 4
        if self.node_cnn is not None and x_is_img:
            xy = self.node_cnn(xy)

            emb_dists = nn.functional.pairwise_distance(
                xy[edge_index[0]], xy[edge_index[1]]).view(-1, 1)
            edge_attr = torch.cat((edge_attr, emb_dists), dim=1)

        # Encoding features step
        latent_edge_feats, latent_node_feats = self.encoder(edge_attr, xy)
        initial_edge_feats = latent_edge_feats
        initial_node_feats = latent_node_feats

        # During training, the feature vectors that the MPNetwork outputs for the  last self.num_class_steps message
        # passing steps are classified in order to compute the loss.
        first_class_step = self.num_enc_steps - self.num_class_steps + 1
        outputs_dict = {'classified_edges': []}
        for step in range(1, self.num_enc_steps + 1):

            # Reattach the initially encoded embeddings before the update
            if self.reattach_initial_edges:
                latent_edge_feats = torch.cat(
                    (initial_edge_feats, latent_edge_feats), dim=1)
            if self.reattach_initial_nodes:
                latent_node_feats = torch.cat(
                    (initial_node_feats, latent_node_feats), dim=1)

            # Message Passing Step
            latent_node_feats, latent_edge_feats = self.MPNet(
                latent_node_feats, edge_index, latent_edge_feats)

            if step >= first_class_step:
                # Classification Step
                dec_edge_feats, _ = self.classifier(latent_edge_feats)
                outputs_dict['classified_edges'].append(dec_edge_feats)

        if self.num_enc_steps == 0:
            dec_edge_feats, _ = self.classifier(latent_edge_feats)
            outputs_dict['classified_edges'].append(dec_edge_feats)

        return outputs_dict