det["image_id"], det["bbox"][0], det["bbox"][1],
                        det["bbox"][2], det["bbox"][3], det["score"]
                    ]).unsqueeze(dim=0)
                    if video_dets.shape[0] == 0:
                        video_dets = det_t
                    else:
                        video_dets = torch.cat([video_dets, det_t])

            # 选取这段视频中的score最大的框
            used_mask = torch.zeros(video_dets.shape[0])
            max_score, max_score_index = select_max_det(video_dets, used_mask)
            used_mask[
                max_score_index] = True  # 标记已经被选择的框,排除后迭代,直到max score小于阈值0.2
            gallery_imgs = []
            for image_id in ids:
                crop_imgs = bridge.crop_img(image_id)
                if len(crop_imgs) > 0:
                    gallery_imgs.extend(crop_imgs)

            while (max_score > 0.5):
                query_img = bridge.crop_bbox(
                    int(video_dets[max_score_index][0]),
                    video_dets[max_score_index][1:5])
                # gallery_imgs = []
                query_feat = reid.extract_feats([query_img])
                gallery_feats = reid.extract_feats(gallery_imgs)
                min_dist, min_dist_index = get_similar_box(
                    query_feat, gallery_feats, used_mask)
                position = [
                    int(video_dets[min_dist_index][1]),
                    int(video_dets[min_dist_index][2])