Esempio n. 1
0
    def distance_test(self):
        data_loader = LoadImages(
            '/home/smile/cv/person_search_demo/data/test1',
            img_size=416,
            half=False)
        imgs = []
        paths = []
        for index, (path, img, im0, vid_cap) in enumerate(data_loader):
            img = Image.fromarray(cv2.cvtColor(
                im0, cv2.COLOR_BGR2RGB))  # PIL: (233, 602)
            img = build_transforms(reidCfg)(img).unsqueeze(0)  # torch.Size
            imgs.append(img)
            paths.append(path)
        feats = self.reid_person(imgs)
        query_feats = feats
        gallery_feats = feats
        m, n = query_feats.shape[0], gallery_feats.shape[0]
        visual_distmat = torch.pow(query_feats, 2).sum(dim=1, keepdim=True).expand(m, n) + \
                         torch.pow(gallery_feats, 2).sum(dim=1, keepdim=True).expand(n, m).t()
        visual_distmat.addmm_(1, -2, query_feats, gallery_feats.t())
        visual_distmat = visual_distmat.cpu()

        res = visual_distmat.numpy().tolist()
        res = paths.append(res)
        print('\n')
        print(res)
Esempio n. 2
0
def load_img(cfg):
    # 验证集的预处理
    val_transforms = build_transforms(cfg)
    num_workers = cfg.DATALOADER.NUM_WORKERS  # 加载图像进程数 8
    dataset = Market1501(root=cfg.DATASETS.ROOT_DIR)
    print("dataset:", type(dataset), dataset)
    print("dataset.query:", type(dataset.query), dataset.query)

    val_set = ImageDataset(dataset.query, val_transforms)
    val_loader = DataLoader(
        val_set, batch_size=cfg.TEST.IMS_PER_BATCH, shuffle=False, num_workers=num_workers,
        collate_fn=val_collate_fn
    )
    return val_loader, len(dataset.query)
Esempio n. 3
0
    def detect_person(self, path, img, im0, vid_cap):
        imgs = []
        locs = []

        # Get detections shape: (3, 416, 320)
        img = torch.from_numpy(img).unsqueeze(0).to(
            self.device)  # torch.Size([1, 3, 416, 320])
        pred, _ = self.detection_model(img)  # 经过处理的网络预测
        det = non_max_suppression(pred.float(), self.conf_thres,
                                  self.nms_thres)[0]  # torch.Size([5, 7])

        if det is not None and len(det) > 0:
            # Rescale boxes from 416 to true image size 映射到原图
            det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                      im0.shape).round()
            print('%gx%g ' % img.shape[2:], end='')

            # 对图片的所有类进行遍历循环,得到了当前类别的个数
            for c in det[:, -1].unique():
                n = (det[:, -1] == c).sum()
                if self.classes[int(c)] == 'person':
                    print('%g %ss' % (n, self.classes[int(c)]), end=', ')

            for *xyxy, conf, cls_conf, cls in det:  # 对于最后的预测框进行遍历
                # *xyxy: 对于原图来说的左上角右下角坐标: [tensor(349.), tensor(26.), tensor(468.), tensor(341.)]
                if self.classes[int(cls)] == 'person':
                    xmin = int(xyxy[0])
                    ymin = int(xyxy[1])
                    xmax = int(xyxy[2])
                    ymax = int(xyxy[3])
                    w = xmax - xmin  # 233
                    h = ymax - ymin  # 602
                    # 如果检测到的行人太小了,感觉意义也不大
                    # 这里需要根据实际情况稍微设置下
                    if w * h > self.bbox_scale_thres:
                        locs.append((xmin, ymin, xmax, ymax))
                        crop_img = im0[ymin:ymax,
                                       xmin:xmax]  # HWC (602, 233, 3)
                        crop_img = Image.fromarray(
                            cv2.cvtColor(crop_img,
                                         cv2.COLOR_BGR2RGB))  # PIL: (233, 602)
                        crop_img = build_transforms(reidCfg)(
                            crop_img).unsqueeze(
                                0)  # torch.Size([1, 3, 256, 128])
                        imgs.append(crop_img)

        return pred, imgs, locs
def detect(
        cfg,
        data,
        weights,
        images='data/samples',  # input folder
        output='output',  # output folder
        fourcc='mp4v',  # video codec
        img_size=416,
        conf_thres=0.25,
        nms_thres=0.4,
        dist_thres=1.3,
        save_txt=False,
        save_images=True):

    # Initialize
    device = torch_utils.select_device(force_cpu=False)
    torch.backends.cudnn.benchmark = False  # set False for reproducible results
    if os.path.exists(output):
        shutil.rmtree(output)  # delete output folder
    os.makedirs(output)  # make new output folder

    ############# 行人重识别模型初始化 #############
    query_loader, num_query = make_data_loader(reidCfg)
    reidModel = build_model(reidCfg, num_classes=10126)
    reidModel.load_param(reidCfg.TEST.WEIGHT)
    reidModel.to(device).eval()
    print('num query %d' % num_query)
    query_feats = defaultdict(list)
    query_pids = []

    for i, batch in enumerate(query_loader):
        with torch.no_grad():
            img, pid, camid = batch
            img = img.to(device)
            feat = reidModel(
                img)  # 一共2张待查询图片,每张图片特征向量2048 torch.Size([2, 2048])
            for j, f in enumerate(feat):
                if (not pid[j] in query_pids):
                    query_pids.append(pid[j])
                print(f.cpu().numpy())
                query_feats[pid[j]].append(f.cpu().numpy())

    for pid in query_pids:
        temp = np.array(query_feats[pid])
        print(temp)
        query_feats[pid] = torch.from_numpy(temp).float().to(
            device)  # torch.Size([2, 2048])
        print(query_feats[pid])
        query_feats[pid] = torch.nn.functional.normalize(query_feats[pid],
                                                         dim=1,
                                                         p=2)  # 计算出查询图片的特征向量
        print(query_feats[pid])
    print("The query feature is normalized")

    ############# 行人检测模型初始化 #############
    model = Darknet(cfg, img_size)

    # Load weights
    if weights.endswith('.pt'):  # pytorch format
        model.load_state_dict(
            torch.load(weights, map_location=device)['model'])
    else:  # darknet format
        _ = load_darknet_weights(model, weights)

    # Eval mode
    model.to(device).eval()
    # Half precision
    opt.half = opt.half and device.type != 'cpu'  # half precision only supported on CUDA
    if opt.half:
        model.half()

    # Set Dataloader
    vid_path, vid_writer = None, None
    if opt.webcam:
        save_images = False
        dataloader = LoadWebcam(img_size=img_size, half=opt.half)
    else:
        dataloader = LoadImages(images, img_size=img_size, half=opt.half)

    # Get classes and colors
    # parse_data_cfg(data)['names']:得到类别名称文件路径 names=data/coco.names
    classes = load_classes(
        parse_data_cfg(data)['names'])  # 得到类别名列表: ['person', 'bicycle'...]
    colors = [[random.randint(0, 255) for _ in range(3)]
              for _ in range(len(classes))]  # 对于每种类别随机使用一种颜色画框

    # Run inference
    t0 = time.time()
    for i, (path, img, im0, vid_cap) in enumerate(dataloader):
        t = time.time()
        # if i < 500 or i % 5 == 0:
        #     continue
        save_path = str(Path(output) / Path(path).name)  # 保存的路径

        # Get detections shape: (3, 416, 320)
        img = torch.from_numpy(img).unsqueeze(0).to(
            device)  # torch.Size([1, 3, 416, 320])
        pred, _ = model(img)  # 经过处理的网络预测,和原始的
        det = non_max_suppression(pred.float(), conf_thres,
                                  nms_thres)[0]  # torch.Size([5, 7])

        if det is not None and len(det) > 0:
            # Rescale boxes from 416 to true image size 映射到原图
            det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                      im0.shape).round()

            # Print results to screen image 1/3 data\samples\000493.jpg: 288x416 5 persons, Done. (0.869s)
            print('%gx%g ' % img.shape[2:],
                  end='')  # print image size '288x416'
            for c in det[:, -1].unique():  # 对图片的所有类进行遍历循环
                n = (det[:, -1] == c).sum()  # 得到了当前类别的个数,也可以用来统计数目
                if classes[int(c)] == 'person':
                    print('%g %ss' % (n, classes[int(c)]),
                          end=', ')  # 打印个数和类别'5 persons'

            # Draw bounding boxes and labels of detections
            # (x1y1x2y2, obj_conf, class_conf, class_pred)
            count = 0
            gallery_img = []
            gallery_loc = []
            for *xyxy, conf, cls_conf, cls in det:  # 对于最后的预测框进行遍历
                # *xyxy: 对于原图来说的左上角右下角坐标: [tensor(349.), tensor(26.), tensor(468.), tensor(341.)]
                if save_txt:  # Write to file
                    with open(save_path + '.txt', 'a') as file:
                        file.write(('%g ' * 6 + '\n') % (*xyxy, cls, conf))

                # Add bbox to the image
                label = '%s %.2f' % (classes[int(cls)], conf)  # 'person 1.00'
                if classes[int(cls)] == 'person':
                    #plot_one_bo x(xyxy, im0, label=label, color=colors[int(cls)])
                    xmin = int(xyxy[0])
                    ymin = int(xyxy[1])
                    xmax = int(xyxy[2])
                    ymax = int(xyxy[3])
                    w = xmax - xmin  # 233
                    h = ymax - ymin  # 602
                    # 如果检测到的行人太小了,感觉意义也不大
                    # 这里需要根据实际情况稍微设置下
                    if w * h > 500:
                        gallery_loc.append((xmin, ymin, xmax, ymax))
                        crop_img = im0[ymin:ymax,
                                       xmin:xmax]  # HWC (602, 233, 3)
                        crop_img = Image.fromarray(
                            cv2.cvtColor(crop_img,
                                         cv2.COLOR_BGR2RGB))  # PIL: (233, 602)
                        crop_img = build_transforms(reidCfg)(
                            crop_img).unsqueeze(
                                0)  # torch.Size([1, 3, 256, 128])
                        gallery_img.append(crop_img)

            if gallery_img:
                gallery_img = torch.cat(gallery_img,
                                        dim=0)  # torch.Size([7, 3, 256, 128])
                gallery_img = gallery_img.to(device)
                gallery_feats = reidModel(gallery_img)  # torch.Size([7, 2048])
                print("The gallery feature is normalized")
                gallery_feats = torch.nn.functional.normalize(
                    gallery_feats, dim=1, p=2)  # 计算出查询图片的特征向量

                for pid in query_pids:
                    m, n = query_feats[pid].shape[0], gallery_feats.shape[0]
                    distmat = torch.pow(query_feats[pid], 2).sum(dim=1, keepdim=True).expand(m, n) + \
                              torch.pow(gallery_feats, 2).sum(dim=1, keepdim=True).expand(n, m).t()

                    distmat.addmm_(1, -2, query_feats[pid], gallery_feats.t())
                    # distmat = (qf - gf)^2
                    # distmat = np.array([[1.79536, 2.00926, 0.52790, 1.98851, 2.15138, 1.75929, 1.99410],
                    #                     [1.78843, 1.96036, 0.53674, 1.98929, 1.99490, 1.84878, 1.98575]])
                    distmat = distmat.cpu().numpy()  # <class 'tuple'>: (3, 12)
                    distmat = distmat.sum(axis=0) / len(
                        query_feats[pid])  # 平均一下query中同一行人的多个结果
                    index = distmat.argmin()
                    if distmat[index] < dist_thres:
                        print('距离:%s' % distmat[index])
                        plot_one_box(gallery_loc[index],
                                     im0,
                                     label=str(pid),
                                     color=colors[int(cls)])
                        # cv2.imshow('person search', im0)
                        # cv2.waitKey()

        print('Done. (%.3fs)' % (time.time() - t))

        if opt.webcam:  # Show live webcam
            cv2.imshow(weights, im0)

        if save_images:  # Save image with detections
            if dataloader.mode == 'images':
                cv2.imwrite(save_path, im0)
            else:
                if vid_path != save_path:  # new video
                    vid_path = save_path
                    if isinstance(vid_writer, cv2.VideoWriter):
                        vid_writer.release()  # release previous video writer

                    fps = vid_cap.get(cv2.CAP_PROP_FPS)
                    width = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                    height = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                    vid_writer = cv2.VideoWriter(
                        save_path, cv2.VideoWriter_fourcc(*fourcc), fps,
                        (width, height))
                vid_writer.write(im0)

    if save_images:
        print('Results saved to %s' % os.getcwd() + os.sep + output)
        if platform == 'darwin':  # macos
            os.system('open ' + output + ' ' + save_path)

    print('Done. (%.3fs)' % (time.time() - t0))
Esempio n. 5
0
def detect(
        cfg,
        data,
        weights,
        images='data/samples',  # input folder
        output='output',  # output folder
        fourcc='mp4v',  # video codec
        img_size=416,
        conf_thres=0.5,
        nms_thres=0.5,
        dist_thres=1.0,
        save_txt=False,
        save_images=True):

    # Initialize
    device = torch_utils.select_device(force_cpu=False)  # 不强制使用cpu
    print("using device:", type(device), device)
    torch.backends.cudnn.benchmark = False  # set False for reproducible results
    if os.path.exists(output):
        shutil.rmtree(output)  # delete output folder
    os.makedirs(output)  # make new output folder

    ############# 加载<pid, 中文名, 颜色>列表 #############
    pidNameInfo = make_pidNames_loader(reidCfg)
    print("pidNameInfo:", pidNameInfo)

    ############# 行人重识别模型初始化 #############
    query_loader, num_query = make_data_loader(reidCfg)
    print("query_loader:", type(query_loader), query_loader)

    reidModel = build_model(reidCfg, num_classes=10126)
    reidModel.load_param(reidCfg.TEST.WEIGHT)
    reidModel.to(device).eval()

    query_feats = []
    query_pids = []

    for i, batch in enumerate(query_loader):
        with torch.no_grad():
            img, pid, camid = batch  # 图像,行人ID,相机ID

            img = img.to(device)
            feat = reidModel(
                img)  # 一共2张待查询图片,每张图片特征向量2048 torch.Size([2, 2048])
            print("feat:", type(feat), feat.shape, feat)
            query_feats.append(feat)
            query_pids.extend(np.asarray(
                pid))  # extend() 函数用于在列表末尾一次性追加另一个序列中的多个值(用新列表扩展原来的列表)。

    query_feats = torch.cat(query_feats, dim=0)  # torch.Size([2, 2048])
    print("The query feature is normalized")
    query_feats = torch.nn.functional.normalize(query_feats, dim=1,
                                                p=2)  # 计算出查询图片的特征向量

    ############# 行人检测模型初始化 #############
    model = Darknet(cfg, img_size)

    # Load weights
    if weights.endswith('.pt'):  # pytorch format
        model.load_state_dict(
            torch.load(weights, map_location=device)['model'])
        # model.load_state_dict(torch.load(weights, map_location='cpu')['model'])    # 使用cpu
    else:  # darknet format
        _ = load_darknet_weights(model, weights)

    # Eval mode
    model.to(device).eval()
    # Half precision
    opt.half = opt.half and device.type != 'cpu'  # half precision only supported on CUDA
    if opt.half:
        model.half()

    # Set Dataloader
    vid_path, vid_writer = None, None
    if opt.webcam:
        save_images = False
        dataloader = LoadWebcam(img_size=img_size, half=opt.half)
    else:
        dataloader = LoadImages(images, img_size=img_size, half=opt.half)

    # Get classes and colors
    # parse_data_cfg(data)['names']:得到类别名称文件路径 names=data/coco.names
    classes = load_classes(
        parse_data_cfg(data)['names'])  # 得到类别名列表: ['person', 'bicycle'...]
    # colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(classes))] # 对于每种类别随机使用一种颜色画框
    # colors = [[40, 92, 230] for _ in range(len(classes))]  # 只检测人,使用相同的颜色

    # Run inference
    t0 = time.time()
    for i, (path, img, im0, vid_cap) in enumerate(dataloader):
        t = time.time()
        # if i < 500 or i % 5 == 0:
        #     continue
        save_path = str(Path(output) / Path(path).name)  # 保存的路径

        # Get detections shape: (3, 416, 320)
        img = torch.from_numpy(img).unsqueeze(0).to(
            device)  # torch.Size([1, 3, 416, 320])
        pred, _ = model(img)  # 经过处理的网络预测,和原始的
        det = non_max_suppression(pred.float(), conf_thres,
                                  nms_thres)[0]  # torch.Size([5, 7])

        if det is not None and len(det) > 0:
            # Rescale boxes from 416 to true image size 映射到原图
            det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                      im0.shape).round()

            # Print results to screen image 1/3 data\samples\000493.jpg: 288x416 5 persons, Done. (0.869s)
            print('%gx%g ' % img.shape[2:],
                  end='')  # print image size '288x416'
            for c in det[:, -1].unique():  # 对图片的所有类进行遍历循环
                n = (det[:, -1] == c).sum()  # 得到了当前类别的个数,也可以用来统计数目
                if classes[int(c)] == 'person':
                    print('%g %ss' % (n, classes[int(c)]),
                          end=', ')  # 打印个数和类别'5 persons'

            # Draw bounding boxes and labels of detections
            # (x1y1x2y2, obj_conf, class_conf, class_pred)
            count = 0
            gallery_img = []
            gallery_loc = []
            for *xyxy, conf, cls_conf, cls in det:  # 对于最后的预测框进行遍历
                # *xyxy: 对于原图来说的左上角右下角坐标: [tensor(349.), tensor(26.), tensor(468.), tensor(341.)]
                if save_txt:  # Write to file
                    with open(save_path + '.txt', 'a') as file:
                        file.write(('%g ' * 6 + '\n') % (*xyxy, cls, conf))

                # Add bbox to the image
                label = '%s %.2f' % (classes[int(cls)], conf)  # 'person 1.00'
                if classes[int(cls)] == 'person':
                    #plot_one_bo x(xyxy, im0, label=label, color=colors[int(cls)])
                    xmin = int(xyxy[0])
                    ymin = int(xyxy[1])
                    xmax = int(xyxy[2])
                    ymax = int(xyxy[3])
                    w = xmax - xmin  # 233
                    h = ymax - ymin  # 602
                    # 如果检测到的行人太小了,感觉意义也不大
                    # 这里需要根据实际情况稍微设置下
                    if w * h > 500:
                        gallery_loc.append((xmin, ymin, xmax, ymax))
                        crop_img = im0[ymin:ymax,
                                       xmin:xmax]  # HWC (602, 233, 3)
                        crop_img = Image.fromarray(
                            cv2.cvtColor(crop_img,
                                         cv2.COLOR_BGR2RGB))  # PIL: (233, 602)
                        crop_img = build_transforms(reidCfg)(
                            crop_img).unsqueeze(
                                0)  # torch.Size([1, 3, 256, 128])
                        gallery_img.append(crop_img)

            if gallery_img:
                gallery_img = torch.cat(gallery_img,
                                        dim=0)  # torch.Size([7, 3, 256, 128])
                gallery_img = gallery_img.to(device)
                gallery_feats = reidModel(gallery_img)  # torch.Size([7, 2048])
                print("The gallery feature is normalized")
                gallery_feats = torch.nn.functional.normalize(
                    gallery_feats, dim=1, p=2)  # 计算出查询图片的特征向量

                # m: 2,待查询人的个数
                # n: 7,yolo检测到人的个数
                m, n = query_feats.shape[0], gallery_feats.shape[0]
                # print("query_feats.shape:", query_feats.shape, "gallery_feats.shape:", gallery_feats.shape)
                distmat = torch.pow(query_feats, 2).sum(dim=1, keepdim=True).expand(m, n) + \
                          torch.pow(gallery_feats, 2).sum(dim=1, keepdim=True).expand(n, m).t()    # .t(),矩阵转置
                # out=(beta∗M)+(alpha∗mat1@mat2)
                # qf^2 + gf^2 - 2 * [email protected]()
                # distmat - 2 * [email protected]()
                # distmat: qf^2 + gf^2
                # qf: torch.Size([2, 2048])
                # gf: torch.Size([7, 2048])
                distmat.addmm_(1, -2, query_feats,
                               gallery_feats.t())  # distmat行数:待检人数;列数:库图片中人数
                # distmat = (qf - gf)^2
                # distmat = np.array([[1.79536, 2.00926, 0.52790, 1.98851, 2.15138, 1.75929, 1.99410],
                #                     [1.78843, 1.96036, 0.53674, 1.98929, 1.99490, 1.84878, 1.98575]])
                distmat = distmat.cpu().numpy(
                )  # <class 'tuple'>: (3, 12),cuda转到内存中
                print("distmat:", type(distmat), m, n, distmat.shape)

                pidMatDict = splitDistmat(query_pids,
                                          distmat)  # 按pid拆分,按行拆分,<pid, mat>
                pid_freq = Counter(query_pids)

                for k in pidMatDict.keys():  # k表示pid
                    tmp_mat = pidMatDict[k]  # 某一个人的矩阵
                    tmp_mat = tmp_mat.sum(
                        axis=0) / pid_freq[k]  # 平均一下query中同一行人的多个结果

                    # tmp_mat = tmp_mat.sum(axis=0)
                    index = tmp_mat.argmin()  # 返回距离最小值的下标(图中哪个人最像待检测的人)
                    # print("tmp_mat:", type(tmp_mat), m, n, tmp_mat.shape, tmp_mat)
                    print('距 离:%s' % tmp_mat[index], index, dist_thres, n)

                    # print("gallery_loc:", gallery_loc)
                    if tmp_mat[index] < dist_thres:
                        print('距离:%s' % tmp_mat[index])
                        # # cv2显示中文出现乱码,需转为PIL添加中文后再转回来。这样在方法中调用的话最后显示为颜色涂层filled,没有内容。
                        # # 解决办法:把画框和添加中文label过程直接放到这里来,避免函数调用式转写。(英文可以直接调用该方法,不会出现乱码)
                        # plot_one_box(gallery_loc[index], im0, label='%s:%s' % (pidNameInfo[str(k)][1], tmp_mat[index]), color=getColorArr(pidNameInfo[str(k)][2]))

                        # 准备plot_one_box()方法的各项参数
                        x = gallery_loc[index]  # 画框范围(左上、右下坐标)
                        label = '%s:%s' % (pidNameInfo[str(k)][1],
                                           tmp_mat[index])  # 要标记的内容(中文)
                        # label = '%s' % pidNameInfo[str(k)][0]
                        color = getColorArr(pidNameInfo[str(k)][2])  # 画框颜色

                        tl = round(0.002 * (im0.shape[0] + im0.shape[1]) /
                                   2) + 1  # line thickness,线条厚度
                        color = color or [
                            random.randint(0, 255) for _ in range(3)
                        ]  # 如果没指定颜色,则随机
                        c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3])
                                                          )  # 左上xy,右下xy
                        cv2.rectangle(im0, c1, c2, color, thickness=tl)

                        if label:
                            # 对于内容框,c1为左下,c2为右上
                            tf = max(tl - 1, 1)  # font thickness,字体粗细
                            # t_size = cv2.getTextSize(label, fontFace=0, fontScale=tl / 3, thickness=tf)[0]    # fontFace=1时,((310, 22), 10)
                            # t_size = cv2.getTextSize(label, fontFace=1, fontScale=tl / 3, thickness=tf)[0]   # fontFace=1时,((156, 10), 6)

                            zh_cn_nums = get_zhcn_number(
                                label)  # 中文的字数(一个中文字20个像素宽,一个英文字10个像素宽)
                            t_size = (20 * zh_cn_nums + 10 *
                                      (len(label) - zh_cn_nums), 22)
                            c2 = c1[0] + t_size[0], c1[1] - t_size[
                                1] - 3  # 纵坐标,多减3目的是字上方稍留空
                            cv2.rectangle(im0, c1, c2, color, -1)  # filled
                            print("t_size:", t_size, " c1:", c1, " c2:", c2)

                            # Draw a label with a name below the face
                            # cv2.rectangle(im0, c1, c2, (0, 0, 255), cv2.FILLED)
                            font = cv2.FONT_HERSHEY_DUPLEX

                            # 将CV2转为PIL,添加中文label后再转回来
                            pil_img = Image.fromarray(
                                cv2.cvtColor(im0, cv2.COLOR_BGR2RGB))
                            draw = ImageDraw.Draw(pil_img)
                            font = ImageFont.truetype('simhei.ttf',
                                                      20,
                                                      encoding='utf-8')
                            draw.text((c1[0], c1[1] - 20),
                                      label, (255, 255, 255),
                                      font=font)

                            im0 = cv2.cvtColor(np.array(pil_img),
                                               cv2.COLOR_RGB2BGR)  # PIL转CV2
                        # cv2.imshow('person search', im0)
                        # cv2.waitKey()
        print('Done. (%.3fs)' % (time.time() - t))

        if opt.webcam:  # Show live webcam
            cv2.imshow(weights, im0)

        if save_images:  # Save image with detections
            if dataloader.mode == 'images':
                cv2.imwrite(save_path, im0)
            else:
                if vid_path != save_path:  # new video
                    vid_path = save_path
                    if isinstance(vid_writer, cv2.VideoWriter):
                        vid_writer.release()  # release previous video writer

                    fps = vid_cap.get(cv2.CAP_PROP_FPS)
                    width = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                    height = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                    vid_writer = cv2.VideoWriter(
                        save_path, cv2.VideoWriter_fourcc(*fourcc), fps,
                        (width, height))
                vid_writer.write(im0)

    if save_images:
        print('Results saved to %s' % os.getcwd() + os.sep + output)
        if platform == 'darwin':  # macos
            os.system('open ' + output + ' ' + save_path)
    print('Done. (%.3fs)' % (time.time() - t0))
Esempio n. 6
0
    def yolo_frames(unique_name):
        cam_id = unique_name[1]
        device = torch_utils.select_device(force_cpu=False)

        torch.backends.cudnn.benchmark = False  # set False for reproducible results

        cfg = 'cfg/yolov3.cfg'
        data = 'data/coco.data'
        weights = 'weights/yolov3.weights'
        half = False
        img_size = 416
        conf_thres = 0.25
        nms_thres = 0.4
        dist_thres = 1.4

        #reid 모델 생성
        query_loader, num_query = make_data_loader(reidCfg)
        reidModel = build_model(reidCfg, num_classes=10126)
        reidModel.load_param(reidCfg.TEST.WEIGHT)
        reidModel.to(device).eval()

        #reid하기 위한 query 정보
        query_feats = defaultdict(list)
        query_pids = []

        #query 정보 가져오기
        for i, batch in enumerate(query_loader):
            with torch.no_grad():
                img, pid, camid = batch
                img = img.to(device)
                feat = reidModel(img)
                for j, f in enumerate(feat):
                    if (not pid[j] in query_pids):
                        query_pids.append(pid[j])
                    query_feats[pid[j]].append(f.cpu().numpy())

        #query 정보 torch형식으로 변경
        for pid in query_pids:
            temp = np.array(query_feats[pid])
            query_feats[pid] = torch.from_numpy(temp).float().to(device)
            query_feats[pid] = torch.nn.functional.normalize(query_feats[pid],
                                                             dim=1,
                                                             p=2)
        print("The query feature is normalized")

        model = Darknet(cfg, img_size)  #config로 디텍션 모델 생성

        # Load weights
        if weights.endswith('.pt'):  # pytorch format
            model.load_state_dict(
                torch.load(weights, map_location=device)['model'])
        else:  # darknet format
            _ = load_darknet_weights(model, weights)

        # Eval mode
        model.to(device).eval()
        # Half precision
        half = half and device.type != 'cpu'  # half precision only supported on CUDA
        if half:
            model.half()

        # Set Dataloader
        dataloader = LoadWebcam(cam_id, img_size=img_size, half=half)

        # Get classes and colors
        # parse_data_cfg(data)['names'] names=data/coco.names
        classes = load_classes(parse_data_cfg(
            data)['names'])  # 코코 네임 파일에서 클래스 전부 가져옴 ['person', 'bicycle'...]
        colors = [[random.randint(0, 255) for _ in range(3)]
                  for _ in range(len(classes))]  # 이 클래스 수대로 박스의 색을 random으로 생성

        #count = 0 #이미지 자르기 위한 카운트
        # Run inference
        t0 = time.time()
        for i, (path, img, im0, vid_cap) in enumerate(dataloader):
            patient_map = map.Map
            now = time.localtime()

            # If the saved minute is not same with the current minute, it means this camera is the first one that access to the map information in this minute.
            # Since we have to clear the second array every minute, change the minute and clear the second array.
            if (now.tm_min != patient_map.minute):
                patient_map.minute = now.tm_min
                patient_map.sec_array.clear()

            # If there is no information about current second, it means this is the first access to the map in this second.
            # We should init the map information each second.
            if (now.tm_sec not in patient_map.sec_array):
                patient_map.sec_array.append(now.tm_sec)
                patient_map.exist_id = []
                patient_map.camera_map = {0: [], 1: [], 2: []}
                patient_map.total_count = {0: 0, 1: 0, 2: 0}

            if i % 5 != 0:  #이미지 처리 부하 줄이기
                continue

            # Get detections shape: (3, 416, 320)
            img = torch.from_numpy(img).unsqueeze(0).to(
                device)  # torch.Size([1, 3, 416, 320]) #이미지 torch 형식으로 바꾸기
            pred, _ = model(img)  #이미지 디텍션
            det = non_max_suppression(pred.float(), conf_thres, nms_thres)[
                0]  # torch.Size([5, 7]) #threshold로 이미지 거른 후 det변수에 초기화

            if det is not None and len(det) > 0:
                # Rescale boxes from 416 to true image size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                          im0.shape).round()

                # Print results to screen image 1/3 data\samples\000493.jpg: 288x416 5 persons, Done. (0.869s)
                #print('%gx%g ' % img.shape[2:], end='')  # print image size '288x416'
                #for c in det[:, -1].unique():   # 对图片的所有类进行遍历循环
                #   n = (det[:, -1] == c).sum() # 得到了当前类别的个数,也可以用来统计数目
                #if classes[int(c)] == 'person':
                #    print('%g %ss' % (n, classes[int(c)]), end=', ') # 打印个数和类别'5 persons'
                #    print(" ")

                # Draw bounding boxes and labels of detections
                # (x1y1x2y2, obj_conf, class_conf, class_pred)

                gallery_img = []  #사람의 이미지만 따로 저장하는 Array
                gallery_loc = []  #사진의 좌표를 저장하는 Array
                for *xyxy, conf, cls_conf, cls in det:  # det의 정보엔 박스의 좌표, 정확도, 클래스인지 확인하는 정확도가 있음
                    # *xyxy: [tensor(349.), tensor(26.), tensor(468.), tensor(341.)] tensor로 좌표가 있음

                    # Add bbox to the image
                    # label = '%s %.2f' % (classes[int(cls)], conf) # 'person 1.00'
                    if classes[int(
                            cls)] == 'person':  # detect한 클래스가 person class라면
                        xmin = int(xyxy[0])
                        ymin = int(xyxy[1])
                        xmax = int(xyxy[2])
                        ymax = int(xyxy[3])
                        w = xmax - xmin  # 233
                        h = ymax - ymin  # 602
                        if w * h > 500:
                            gallery_loc.append((xmin, ymin, xmax, ymax))
                            crop_img = im0[ymin:ymax,
                                           xmin:xmax]  # HWC (602, 233, 3)
                            #cv2.imwrite('./temp/'+str(count)+'.jpg',crop_img) #query 이미지로 쓰기위해 temp 폴더에 내 이미지 저장
                            #count=count+1
                            crop_img = Image.fromarray(
                                cv2.cvtColor(
                                    crop_img,
                                    cv2.COLOR_BGR2RGB))  # PIL: (233, 602)
                            crop_img = build_transforms(reidCfg)(
                                crop_img).unsqueeze(
                                    0)  # torch.Size([1, 3, 256, 128])
                            gallery_img.append(crop_img)
                            plot_one_box(xyxy, im0,
                                         color=[128, 128,
                                                128])  #사람을 흰색으로 박스 치기
                map.Map.total_count[int(cam_id)] = len(gallery_img)
                if gallery_img:  #사람의 이미지만 자른 Array의 데이터가 존재하면
                    gallery_img = torch.cat(
                        gallery_img, dim=0)  # torch.Size([7, 3, 256, 128])
                    gallery_img = gallery_img.to(device)
                    gallery_feats = reidModel(
                        gallery_img)  # torch.Size([7, 2048])
                    #print("The gallery feature is normalized")
                    gallery_feats = torch.nn.functional.normalize(
                        gallery_feats, dim=1, p=2)  # 计算出查询图片的特征向量
                    #그 이미지의 특징을 뽑아옴

                    # m: 2
                    # n: 7
                    for pid in query_pids:
                        m, n = query_feats[pid].shape[0], gallery_feats.shape[
                            0]
                        distmat = torch.pow(query_feats[pid], 2).sum(dim=1, keepdim=True).expand(m, n) + \
                                  torch.pow(gallery_feats, 2).sum(dim=1, keepdim=True).expand(n, m).t()
                        # out=(beta∗M)+(alpha∗mat1@mat2)
                        # qf^2 + gf^2 - 2 * [email protected]()
                        # distmat - 2 * [email protected]()
                        # distmat: qf^2 + gf^2
                        # qf: torch.Size([2, 2048])
                        # gf: torch.Size([7, 2048])
                        distmat.addmm_(1, -2, query_feats[pid],
                                       gallery_feats.t())
                        # distmat = (qf - gf)^2
                        # distma2 번 목표 찾음 2번 카메라:1.2738347
                        #t = np.array([[1.79536, 2.00926, 0.52790, 1.98851, 2.15138, 1.75929, 1.99410],
                        #                     [1.78843, 1.96036, 0.53674, 1.98929, 1.99490, 1.84878, 1.98575]])
                        distmat = distmat.cpu().detach().numpy(
                        )  # <class 'tuple'>: (3, 12)
                        distmat = distmat.sum(axis=0) / len(
                            query_feats[pid])  # 쿼리의 특징과 현재 이미지의 특징의 차이를 계산
                        index = distmat.argmin()
                        if distmat[
                                index] < dist_thres:  #그 차이가 위에서 지정한 treshold보다 작으면 일치하다
                            print('%i 번 목표 찾음 %s번 카메라:%s' %
                                  (pid, cam_id, distmat[index]))

                            plot_one_box(gallery_loc[index],
                                         im0,
                                         label=str(pid),
                                         color=[0, 0, 255])
                            if (pid not in patient_map.exist_id):
                                patient_map.exist_id.append(pid)
                            if (pid not in patient_map.camera_map[int(cam_id)]
                                ):
                                patient_map.camera_map[int(cam_id)].append(pid)
                                #print("exist id : ", patient_map.exist_id)
                                #print(patient_map.camera_map)
                                print("total : ", len(gallery_img))
                                filename = time.strftime(
                                    "%Y%m%d", time.localtime(time.time())
                                ) + '_person' + str(pid) + '.txt'
                                f = open(filename, 'a')
                                f.write('\n' + cam_id + ' - ' +
                                        time.strftime('%H : %M : %S'))
                                f.close
                            #If the map of this camera ID is still false, it means there was no identified query in this second.
                            #if(patient_map.camera_map[int(cam_id)] == False):

                yield cam_id, im0
Esempio n. 7
0
def search_detect(dataloader_item, model, reidModel, device, classes, colors,
                  weights):
    global query_time_last
    global query_time_now
    global query_feats

    conf_thres = 0.1
    nms_thres = 0.4
    dist_thres = 3.0
    output = 'output'
    fourcc = 'mp4v'

    t = time.time()
    path, img, im0, vid_cap = dataloader_item
    # print(path, img.shape, im0.shape, vid_cap)
    # print(aaa)
    # data/samples/c1s1_001051.jpg (3, 320, 416) (480, 640, 3) None
    vid_path, vid_writer = None, None

    ############# query初始化 #############
    if len(os.listdir('query')) < 1:
        print('not enough query')
        return
    else:
        if query_time_now == query_time_last or query_time_now - query_time_last >= 1:
            query_loader, num_query = make_data_loader(reidCfg)

            query_feats = []
            for i, batch in enumerate(query_loader):
                with torch.no_grad():
                    img_q, pid, camid = batch
                    img_q = img_q.to(device)
                    feat = reidModel(
                        img_q)  # 一共2张待查询图片,每张图片特征向量2048 torch.Size([2, 2048])
                    query_feats.append(feat)

            query_feats = torch.cat(query_feats,
                                    dim=0)  # torch.Size([2, 2048])
            query_feats = torch.nn.functional.normalize(query_feats,
                                                        dim=1,
                                                        p=2)  # 计算出查询图片的特征向量
            print("The query feature is normalized")

            query_time_last = query_time_now
        elif len(query_feats) == 0:
            print('no query_feats')
            return
    query_time_now = time.time()
    ############# query初始化 END #############

    if not os.path.exists(output):
        os.makedirs(output)
    save_path = str(Path(output) / Path(path).name)  # 保存的路径

    # Get detections shape: (3, 416, 320)
    img = torch.from_numpy(img).unsqueeze(0).to(
        device)  # torch.Size([1, 3, 416, 320])
    pred, _ = model(img)  # 经过处理的网络预测,和原始的
    det = non_max_suppression(pred.float(), conf_thres,
                              nms_thres)[0]  # torch.Size([5, 7])

    if det is not None and len(det) > 0:
        # Rescale boxes from 416 to true image size 映射到原图
        det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()

        # Print results to screen image 1/3 data\samples\000493.jpg: 288x416 5 persons, Done. (0.869s)
        print('%gx%g ' % img.shape[2:], end='')  # print image size '288x416'
        for c in det[:, -1].unique():  # 对图片的所有类进行遍历循环
            n = (det[:, -1] == c).sum()  # 得到了当前类别的个数,也可以用来统计数目
            if classes[int(c)] == 'person':
                print('%g %ss' % (n, classes[int(c)]),
                      end=', ')  # 打印个数和类别'5 persons'

        # Draw bounding boxes and labels of detections
        # (x1y1x2y2, obj_conf, class_conf, class_pred)
        count = 0
        gallery_img = []
        gallery_loc = []
        for *xyxy, conf, cls_conf, cls in det:  # 对于最后的预测框进行遍历
            # *xyxy: 对于原图来说的左上角右下角坐标: [tensor(349.), tensor(26.), tensor(468.), tensor(341.)]
            '''Write to file'''
            # with open(save_path + '.txt', 'a') as file:
            # file.write(('%g ' * 6 + '\n') % (*xyxy, cls, conf))

            # Add bbox to the image
            label = '%s %.2f' % (classes[int(cls)], conf)  # 'person 1.00'
            if classes[int(cls)] == 'person':
                #plot_one_bo x(xyxy, im0, label=label, color=colors[int(cls)])
                xmin = int(xyxy[0])
                ymin = int(xyxy[1])
                xmax = int(xyxy[2])
                ymax = int(xyxy[3])
                w = xmax - xmin  # 233
                h = ymax - ymin  # 602
                # 如果检测到的行人太小了,感觉意义也不大
                # 这里需要根据实际情况稍微设置下
                # if h>2*w and h*w > 100*50:
                if h > 100 and w > 50:
                    gallery_loc.append((xmin, ymin, xmax, ymax))
                    crop_img = im0[ymin:ymax, xmin:xmax]  # HWC (602, 233, 3)
                    crop_img = Image.fromarray(
                        cv2.cvtColor(crop_img,
                                     cv2.COLOR_BGR2RGB))  # PIL: (233, 602)
                    crop_img = build_transforms(reidCfg)(crop_img).unsqueeze(
                        0)  # torch.Size([1, 3, 256, 128])
                    gallery_img.append(crop_img)
        '''flip image and box'''
        im0 = cv2.flip(im0, 1)

        if gallery_img:
            gallery_img = torch.cat(gallery_img,
                                    dim=0)  # torch.Size([7, 3, 256, 128])
            gallery_img = gallery_img.to(device)
            gallery_feats = reidModel(gallery_img)  # torch.Size([7, 2048])
            gallery_feats = torch.nn.functional.normalize(gallery_feats,
                                                          dim=1,
                                                          p=2)  # 计算出查询图片的特征向量
            print("The gallery feature is normalized")

            # m: 2
            # n: 7
            m, n = query_feats.shape[0], gallery_feats.shape[0]
            distmat = torch.pow(query_feats, 2).sum(dim=1, keepdim=True).expand(m, n) + \
                      torch.pow(gallery_feats, 2).sum(dim=1, keepdim=True).expand(n, m).t()
            # out=(beta∗M)+(alpha∗mat1@mat2)
            # qf^2 + gf^2 - 2 * [email protected]()
            # distmat - 2 * [email protected]()
            # distmat: qf^2 + gf^2
            # qf: torch.Size([2, 2048])
            # gf: torch.Size([7, 2048])
            distmat.addmm_(1, -2, query_feats, gallery_feats.t())
            # distmat = (qf - gf)^2
            # distmat = np.array([[1.79536, 2.00926, 0.52790, 1.98851, 2.15138, 1.75929, 1.99410],
            #                     [1.78843, 1.96036, 0.53674, 1.98929, 1.99490, 1.84878, 1.98575]])
            distmat = distmat.cpu().numpy()  # <class 'tuple'>: (3, 12)
            distmat = distmat.sum(axis=0) / len(
                query_feats)  # 平均一下query中同一行人的多个结果
            index = distmat.argmin()
            if distmat[index] < dist_thres:
                print('距离:%s' % distmat[index])

                # print(gallery_loc[index])
                xmin = im0.shape[1] - gallery_loc[index][2]
                ymin = im0.shape[0] - gallery_loc[index][3]
                xmax = im0.shape[1] - gallery_loc[index][0]
                ymax = im0.shape[0] - gallery_loc[index][1]

                # plot_one_box(gallery_loc[index], im0, label='find!', color=colors[int(cls)])
                plot_one_box((xmin, ymin, xmax, ymax),
                             im0,
                             label='find!',
                             color=colors[int(cls)])
                # cv2.imshow('person search', im0)
                # cv2.waitKey()

    print('Done. (%.3fs)' % (time.time() - t))
    '''show image'''
    # cv2.imshow(weights, im0)
    '''save image'''
    # cv2.imwrite(save_path, im0)
    '''save webcam'''
    #     if vid_path != save_path:  # new video
    #         vid_path = save_path
    #         if isinstance(vid_writer, cv2.VideoWriter):
    #             vid_writer.release()  # release previous video writer

    #         fps = vid_cap.get(cv2.CAP_PROP_FPS)
    #         width = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    #         height = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    #         vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (width, height))
    #     vid_writer.write(im0)

    return im0
Esempio n. 8
0
    def detect(self, images, stamp, dist_thres=1.0):

        camid = 0  #这里定义camid
        # start re-id
        crop_img = build_transforms(reidCfg)(images).unsqueeze(0)
        gallery_img = []
        gallery_img.append(crop_img)
        if gallery_img:
            gallery_img = torch.cat(gallery_img, dim=0)
            gallery_img = gallery_img.to(self.device)
            gallery_feats = self.reidModel(gallery_img)
            print("The gallery feature is normalized")
            gallery_feats = torch.nn.functional.normalize(gallery_feats,
                                                          dim=1,
                                                          p=2)

            for i, m_query_feat in enumerate(self.query_feats):
                m_query_feat = torch.reshape(m_query_feat, (1, 2048))
                m, n = m_query_feat.shape[0], gallery_feats.shape[0]
                distmat = torch.pow(m_query_feat, 2).sum(dim=1, keepdim=True).expand(m, n) + \
                         torch.pow(gallery_feats, 2).sum(dim=1, keepdim=True).expand(n, m).t()
                distmat.addmm_(1, -2, m_query_feat, gallery_feats.t())
                distmat = distmat.cpu().numpy()
                distmat = distmat.sum(axis=0) / len(m_query_feat)
                index = distmat.argmin()
                print('stamp:', stamp, '\tpid:', self.query_pids[i],
                      '\tdismat:', distmat[index])
                if distmat[index] < dist_thres:
                    return self.query_pids[i]

            # 如果query中没有要找的目标
            #Step 1:找到最大值,存储图片
            if len(self.query_pids) != 0:
                pids_copy = self.query_pids[:]  #copy一下pids列表用来寻找最大值,以便分配新id
                pids_copy.sort()
                x = pids_copy[-1] + 1
            else:
                x = 0
            x_str = str(x)
            path = './query/' + x_str.rjust(4, '0') + '_c1s1_0_0_' + str(
                time.time()) + '.jpg'
            images.save(path)

            #Step 2:自学习
            with torch.no_grad():

                #提取特征并normalized
                new_id_img = build_transforms(reidCfg)(images).unsqueeze(0)
                new_id_imgs = []
                new_id_imgs.append(new_id_img)
                new_id_imgs = torch.cat(new_id_imgs, dim=0)
                new_id_imgs = new_id_imgs.to(self.device)
                new_id_feats = self.reidModel(new_id_imgs)
                new_id_feats = torch.nn.functional.normalize(new_id_feats,
                                                             dim=1,
                                                             p=2)
                print("The gallery feature is normalized")

                #加入总的特征矩阵
                self.query_feats = torch.cat((self.query_feats, new_id_feats),
                                             0)
                temp = []
                temp.append(x)
                self.query_pids.extend(np.asarray(temp))
            return x
Esempio n. 9
0
from utils.utils import *
from reid.data import make_data_loader
from reid.data.transforms import build_transforms
from reid.modeling import build_model
from reid.config import cfg as reidCfg
import numpy as np

device = torch_utils.select_device(force_cpu=False)
torch.backends.cudnn.benchmark = False
reidCfg.DATASETS.ROOT_DIR = ('./query/')
query_loader, num_query = make_data_loader(reidCfg)
reidModel = build_model(reidCfg, num_classes=10126)
reidModel.load_param(reidCfg.TEST.WEIGHT)
reidModel.to(device).eval()

img = Image.open('./query/0001_c1s1_0_146.jpg')
crop_img = build_transforms(reidCfg)(img).unsqueeze(0)
gallery_img = []
gallery_img.append(crop_img)
gallery_img = torch.cat(gallery_img, dim=0)
gallery_img = gallery_img.to(device)
gallery_feats = reidModel(gallery_img)
print("The gallery feature is normalized")
gallery_feats = torch.nn.functional.normalize(gallery_feats, dim=1, p=2)
print(gallery_feats)
Esempio n. 10
0
                #plot_one_bo x(xyxy, im0, label=label, color=colors[int(cls)])
                xmin = int(xyxy[0])
                ymin = int(xyxy[1])
                xmax = int(xyxy[2])
                ymax = int(xyxy[3])
                w = xmax - xmin  # 233
                h = ymax - ymin  # 602
                # 如果检测到的行人太小了,感觉意义也不大
                # 这里需要根据实际情况稍微设置下
                if h > 2 * w and h * w > 300 * 150:
                    print(h, w)
                    crop_img = im0[ymin:ymax, xmin:xmax]  # HWC (602, 233, 3)
                    crop_img = Image.fromarray(
                        cv2.cvtColor(crop_img,
                                     cv2.COLOR_BGR2RGB))  # PIL: (233, 602)
                    crop_img = build_transforms(reidCfg)(crop_img).unsqueeze(
                        0)  # torch.Size([1, 3, 256, 128])
                    gallery_img.append(crop_img)

                if gallery_img:
                    gallery_img = torch.cat(
                        gallery_img, dim=0)  # torch.Size([7, 3, 256, 128])
                    gallery_img = gallery_img.to(device)
                    gallery_feats = reidModel(
                        gallery_img)  # torch.Size([7, 2048])
                    print("The gallery feature is normalized")
                    gallery_feats = torch.nn.functional.normalize(
                        gallery_feats, dim=1, p=2)  # 计算出查询图片的特征向量

                    # m: 2
                    # n: 7
                    m, n = query_feats.shape[0], gallery_feats.shape[0]