Ejemplo n.º 1
0
    def detect_image(self, image):
        image_shape = np.array(np.shape(image)[0:2])

        crop_img = np.array(
            letterbox_image(image,
                            (self.model_image_size[1],
                             self.model_image_size[0])))  # letterbox_image???
        photo = np.array(crop_img, dtype=np.float32)
        photo /= 255.0  # 归一化操作
        photo = np.transpose(photo, (2, 0, 1))  # 通道维度调整(pytorch),有利于GPU处理
        photo = photo.astype(np.float32)
        images = []  # 扩充一个维度
        images.append(photo)  # 扩充一个维度

        images = np.asarray(images)
        images = torch.from_numpy(images)  # numpy转化为tensor
        if self.cuda:
            images = images.cuda()

        with torch.no_grad():
            outputs = self.net(images)  # 图片传入网络,得到网络的预测结果
            output_list = []  # 三个size的预测结果
            for i in range(3):  # 经过三次循环对特征层解码(先验框)
                output_list.append(self.yolo_decodes[i](
                    outputs[i]))  # yolo_decodes 先验框调整的过程
            output = torch.cat(output_list, 1)
            batch_detections = non_max_suppression(
                output,
                self.config["yolo"]["classes"],  # 非极大值抑制
                conf_thres=self.confidence,
                nms_thres=self.iou)
        try:
            batch_detections = batch_detections[0].cpu().numpy()  # 判断图片是否还有框
        except:
            return image
        top_index = batch_detections[:,
                                     4] * batch_detections[:,
                                                           5] > self.confidence
        top_conf = batch_detections[top_index, 4] * batch_detections[top_index,
                                                                     5]
        top_label = np.array(batch_detections[top_index, -1], np.int32)
        top_bboxes = np.array(batch_detections[top_index, :4])
        top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(
            top_bboxes[:, 0],
            -1), np.expand_dims(top_bboxes[:, 1], -1), np.expand_dims(
                top_bboxes[:, 2], -1), np.expand_dims(top_bboxes[:, 3], -1)

        # 去掉灰条(基于原图的坐标绘制框)
        boxes = yolo_correct_boxes(
            top_ymin, top_xmin, top_ymax, top_xmax,
            np.array([self.model_image_size[0], self.model_image_size[1]]),
            image_shape)

        font = ImageFont.truetype(font='model_data/simhei.ttf',
                                  size=np.floor(3e-2 * np.shape(image)[1] +
                                                0.5).astype('int32'))  # 定义字体

        thickness = (np.shape(image)[0] + np.shape(image)[1]
                     ) // self.model_image_size[0]  # 框的宽度怎么样子的

        # 画图的代码
        for i, c in enumerate(top_label):
            predicted_class = self.class_names[c]  # 取出类的名称
            score = top_conf[i]  # 取出类的得分

            top, left, bottom, right = boxes[i]  # # 取出类的位置
            top = top - 5
            left = left - 5
            bottom = bottom + 5
            right = right + 5

            top = max(0, np.floor(top + 0.5).astype('int32'))
            left = max(0, np.floor(left + 0.5).astype('int32'))
            bottom = min(
                np.shape(image)[0],
                np.floor(bottom + 0.5).astype('int32'))
            right = min(
                np.shape(image)[1],
                np.floor(right + 0.5).astype('int32'))

            # 画框框
            label = '{} {:.2f}'.format(predicted_class, score)
            draw = ImageDraw.Draw(image)
            label_size = draw.textsize(label, font)
            label = label.encode('utf-8')
            print(label)

            if top - label_size[1] >= 0:
                text_origin = np.array([left, top - label_size[1]])
            else:
                text_origin = np.array([left, top + 1])

            for i in range(thickness):
                draw.rectangle(  # 绘画矩形
                    [left + i, top + i, right - i, bottom - i],
                    outline=self.colors[self.class_names.index(
                        predicted_class)])
            draw.rectangle(
                [tuple(text_origin),
                 tuple(text_origin + label_size)],
                fill=self.colors[self.class_names.index(predicted_class)])
            draw.text(text_origin,
                      str(label, 'UTF-8'),
                      fill=(0, 0, 0),
                      font=font)  # 写字
            del draw
        return image
Ejemplo n.º 2
0
    Tensor = torch.cuda.FloatTensor if torch.cuda.is_available(
    ) else torch.FloatTensor

    imgs = []  # Stores image paths
    img_detections = []  # Stores detections for each image index

    print("\nPerforming object detection:")
    prev_time = time.time()
    for batch_i, (img_paths, input_imgs) in enumerate(dataloader):
        # Configure input
        input_imgs = Variable(input_imgs.type(Tensor))

        # Get detections
        with torch.no_grad():
            detections = model(input_imgs)
            detections = non_max_suppression(detections, opt.conf_thres,
                                             opt.nms_thres)

        # Log progress
        current_time = time.time()
        inference_time = datetime.timedelta(seconds=current_time - prev_time)
        prev_time = current_time
        print("\t+ Batch %d, Inference Time: %s" % (batch_i, inference_time))

        # Save image and detections
        imgs.extend(img_paths)
        img_detections.extend(detections)

    # Bounding-box colors
    cmap = plt.get_cmap("tab20b")
    colors = [cmap(i) for i in np.linspace(0, 1, 20)]
Ejemplo n.º 3
0
    def detect_image(self, image):
        image_shape = np.array(np.shape(image)[0:2])

        #---------------------------------------------------------#
        #   给图像增加灰条,实现不失真的resize
        #   也可以直接resize进行识别
        #---------------------------------------------------------#
        if self.letterbox_image:
            crop_img = np.array(
                letterbox_image(
                    image,
                    (self.model_image_size[1], self.model_image_size[0])))
        else:
            crop_img = image.convert('RGB')
            crop_img = crop_img.resize(
                (self.model_image_size[1], self.model_image_size[0]),
                Image.BICUBIC)
        photo = np.array(crop_img, dtype=np.float32) / 255.0
        photo = np.transpose(photo, (2, 0, 1))
        #---------------------------------------------------------#
        #   添加上batch_size维度
        #---------------------------------------------------------#
        images = [photo]

        with torch.no_grad():
            images = torch.from_numpy(np.asarray(images))
            if self.cuda:
                images = images.cuda()

            #---------------------------------------------------------#
            #   将图像输入网络当中进行预测!
            #---------------------------------------------------------#
            outputs = self.net(images)
            output_list = []
            for i in range(2):
                output_list.append(self.yolo_decodes[i](outputs[i]))

            #---------------------------------------------------------#
            #   将预测框进行堆叠,然后进行非极大抑制
            #---------------------------------------------------------#
            output = torch.cat(output_list, 1)
            batch_detections = non_max_suppression(output,
                                                   len(self.class_names),
                                                   conf_thres=self.confidence,
                                                   nms_thres=self.iou)

            #---------------------------------------------------------#
            #   如果没有检测出物体,返回原图
            #---------------------------------------------------------#
            try:
                batch_detections = batch_detections[0].cpu().numpy()
            except:
                return image

            #---------------------------------------------------------#
            #   对预测框进行得分筛选
            #---------------------------------------------------------#
            top_index = batch_detections[:,
                                         4] * batch_detections[:,
                                                               5] > self.confidence
            top_conf = batch_detections[top_index,
                                        4] * batch_detections[top_index, 5]
            top_label = np.array(batch_detections[top_index, -1], np.int32)
            top_bboxes = np.array(batch_detections[top_index, :4])
            top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(
                top_bboxes[:, 0],
                -1), np.expand_dims(top_bboxes[:, 1], -1), np.expand_dims(
                    top_bboxes[:, 2],
                    -1), np.expand_dims(top_bboxes[:, 3], -1)

            #-----------------------------------------------------------------#
            #   在图像传入网络预测前会进行letterbox_image给图像周围添加灰条
            #   因此生成的top_bboxes是相对于有灰条的图像的
            #   我们需要对其进行修改,去除灰条的部分。
            #-----------------------------------------------------------------#
            if self.letterbox_image:
                boxes = yolo_correct_boxes(
                    top_ymin, top_xmin, top_ymax, top_xmax,
                    np.array(
                        [self.model_image_size[0], self.model_image_size[1]]),
                    image_shape)
            else:
                top_xmin = top_xmin / self.model_image_size[1] * image_shape[1]
                top_ymin = top_ymin / self.model_image_size[0] * image_shape[0]
                top_xmax = top_xmax / self.model_image_size[1] * image_shape[1]
                top_ymax = top_ymax / self.model_image_size[0] * image_shape[0]
                boxes = np.concatenate(
                    [top_ymin, top_xmin, top_ymax, top_xmax], axis=-1)

        font = ImageFont.truetype(font='model_data/simhei.ttf',
                                  size=np.floor(3e-2 * np.shape(image)[1] +
                                                0.5).astype('int32'))

        thickness = max((np.shape(image)[0] + np.shape(image)[1]) //
                        self.model_image_size[0], 1)

        for i, c in enumerate(top_label):
            predicted_class = self.class_names[c]
            score = top_conf[i]

            top, left, bottom, right = boxes[i]
            top = top - 5
            left = left - 5
            bottom = bottom + 5
            right = right + 5

            top = max(0, np.floor(top + 0.5).astype('int32'))
            left = max(0, np.floor(left + 0.5).astype('int32'))
            bottom = min(
                np.shape(image)[0],
                np.floor(bottom + 0.5).astype('int32'))
            right = min(
                np.shape(image)[1],
                np.floor(right + 0.5).astype('int32'))

            # 画框框
            label = '{} {:.2f}'.format(predicted_class, score)
            draw = ImageDraw.Draw(image)
            label_size = draw.textsize(label, font)
            label = label.encode('utf-8')
            print(label, top, left, bottom, right)

            if top - label_size[1] >= 0:
                text_origin = np.array([left, top - label_size[1]])
            else:
                text_origin = np.array([left, top + 1])

            for i in range(thickness):
                draw.rectangle([left + i, top + i, right - i, bottom - i],
                               outline=self.colors[self.class_names.index(
                                   predicted_class)])
            draw.rectangle(
                [tuple(text_origin),
                 tuple(text_origin + label_size)],
                fill=self.colors[self.class_names.index(predicted_class)])
            draw.text(text_origin,
                      str(label, 'UTF-8'),
                      fill=(0, 0, 0),
                      font=font)
            del draw
        return image
Ejemplo n.º 4
0
    def detect_image(self, image):
        image_shape = np.array(np.shape(image)[0:2])

        crop_img = np.array(
            letterbox_image(
                image, (self.model_image_size[0], self.model_image_size[1])))
        photo = np.array(crop_img, dtype=np.float32)
        photo /= 255.0
        photo = np.transpose(photo, (2, 0, 1))
        photo = photo.astype(np.float32)
        images = []
        images.append(photo)
        images = np.asarray(images)

        with torch.no_grad():
            images = torch.from_numpy(images)
            if self.cuda:
                images = images.cuda()
            outputs = self.net(images)

        output_list = []
        for i in range(3):
            output_list.append(self.yolo_decodes[i](outputs[i]))
        output = torch.cat(output_list, 1)
        batch_detections = non_max_suppression(output,
                                               len(self.class_names),
                                               conf_thres=self.confidence,
                                               nms_thres=0.3)
        try:
            batch_detections = batch_detections[0].cpu().numpy()
        except:
            return image

        top_index = batch_detections[:,
                                     4] * batch_detections[:,
                                                           5] > self.confidence
        top_conf = batch_detections[top_index, 4] * batch_detections[top_index,
                                                                     5]
        top_label = np.array(batch_detections[top_index, -1], np.int32)
        top_bboxes = np.array(batch_detections[top_index, :4])
        top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(
            top_bboxes[:, 0],
            -1), np.expand_dims(top_bboxes[:, 1], -1), np.expand_dims(
                top_bboxes[:, 2], -1), np.expand_dims(top_bboxes[:, 3], -1)

        # 去掉灰条
        boxes = yolo_correct_boxes(
            top_ymin, top_xmin, top_ymax, top_xmax,
            np.array([self.model_image_size[0], self.model_image_size[1]]),
            image_shape)

        font = ImageFont.truetype(font='model_data/simhei.ttf',
                                  size=np.floor(3e-2 * np.shape(image)[1] +
                                                0.5).astype('int32'))

        thickness = (np.shape(image)[0] +
                     np.shape(image)[1]) // self.model_image_size[0]
        print('画面中有{}个人'.format(len(boxes)))
        font_cn = ImageFont.truetype(font='model_data/simhei.ttf',
                                     size=np.floor(3e-2 * image.size[1] +
                                                   0.5).astype('int32'))

        for i, c in enumerate(top_label):
            predicted_class = self.class_names[c]
            score = top_conf[i]

            top, left, bottom, right = boxes[i]
            top = top - 5
            left = left - 5
            bottom = bottom + 5
            right = right + 5

            top = max(0, np.floor(top + 0.5).astype('int32'))
            left = max(0, np.floor(left + 0.5).astype('int32'))
            bottom = min(
                np.shape(image)[0],
                np.floor(bottom + 0.5).astype('int32'))
            right = min(
                np.shape(image)[1],
                np.floor(right + 0.5).astype('int32'))

            # 画框框
            label = '{} {:.2f}'.format(predicted_class, score)
            draw = ImageDraw.Draw(image)
            label_size = draw.textsize(label, font)
            label = label.encode('utf-8')
            print(label)

            if top - label_size[1] >= 0:
                text_origin = np.array([left, top - label_size[1]])
            else:
                text_origin = np.array([left, top + 1])

            for i in range(thickness):
                draw.rectangle([left + i, top + i, right - i, bottom - i],
                               outline=self.colors[self.class_names.index(
                                   predicted_class)])

            show_str = '  画面中有' + str(len(boxes)) + '个人  '
            label_size1 = draw.textsize(show_str, font_cn)
            print(label_size1)
            draw.rectangle([10, 10, 10 + label_size1[0], 10 + label_size1[1]],
                           fill=(255, 255, 0))
            draw.text((10, 10), show_str, fill=(0, 0, 0), font=font_cn)
            '''
            draw.rectangle(
                [tuple(text_origin), tuple(text_origin + label_size)],
                fill=self.colors[self.class_names.index(predicted_class)])
            #draw.text(text_origin, str(label,'UTF-8'), fill=(0, 0, 0), font=font)
            '''
            del draw
        return image
    def detect_image(self, image_id, image):
        self.confidence = 0.05
        f = open("./input/detection-results/" + image_id + ".txt", "w")
        image_shape = np.array(np.shape(image)[0:2])

        crop_img = np.array(
            letterbox_image(
                image, (self.model_image_size[0], self.model_image_size[1])))
        photo = np.array(crop_img, dtype=np.float32)
        photo /= 255.0
        photo = np.transpose(photo, (2, 0, 1))
        photo = photo.astype(np.float32)
        images = []
        images.append(photo)

        images = np.asarray(images)
        images = torch.from_numpy(images)
        if self.cuda:
            images = images.cuda()

        with torch.no_grad():
            outputs = self.net(images)
            output_list = []
            for i in range(3):
                output_list.append(self.yolo_decodes[i](outputs[i]))
            output = torch.cat(output_list, 1)
            batch_detections = non_max_suppression(
                output,
                self.config["yolo"]["classes"],
                conf_thres=self.confidence,
                nms_thres=0.3)
        try:
            batch_detections = batch_detections[0].cpu().numpy()
        except:
            return image
        top_index = batch_detections[:,
                                     4] * batch_detections[:,
                                                           5] > self.confidence
        top_conf = batch_detections[top_index, 4] * batch_detections[top_index,
                                                                     5]
        top_label = np.array(batch_detections[top_index, -1], np.int32)
        top_bboxes = np.array(batch_detections[top_index, :4])
        top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(
            top_bboxes[:, 0],
            -1), np.expand_dims(top_bboxes[:, 1], -1), np.expand_dims(
                top_bboxes[:, 2], -1), np.expand_dims(top_bboxes[:, 3], -1)

        # 去掉灰条
        boxes = yolo_correct_boxes(
            top_ymin, top_xmin, top_ymax, top_xmax,
            np.array([self.model_image_size[0], self.model_image_size[1]]),
            image_shape)

        for i, c in enumerate(top_label):
            predicted_class = self.class_names[c]
            score = str(top_conf[i])

            top, left, bottom, right = boxes[i]
            f.write("%s %s %s %s %s %s\n" %
                    (predicted_class, score[:6], str(int(left)), str(
                        int(top)), str(int(right)), str(int(bottom))))

        f.close()
        return
Ejemplo n.º 6
0
def detect():
    # 0、初始化一些参数
    cfg = opt.cfg
    weights = opt.weights
    src_txt_path = opt.src_txt_path
    img_size = opt.img_size
    batch_size = opt.batch_size
    dst_path = opt.dst_path
    if not os.path.exists(dst_path):
        os.mkdir(dst_path)
    device = select_device(opt.device)
    classes = load_classes(parse_data_cfg(opt.data)['names'])

    # 1、加载网络
    model = Darknet(cfg)
    if weights.endswith('.pt'):  # TODO: .weights权重格式
        model.load_state_dict(
            torch.load(weights)['model'])  # TODO:map_location=device ?
    model.to(device).eval()

    # 2、加载数据集
    test_dataset = VocDataset(src_txt_path, img_size, with_label=False)
    dataloader = DataLoader(
        test_dataset,
        batch_size=batch_size,
        shuffle=False,
        num_workers=8,  # TODO
        collate_fn=test_dataset.test_collate_fn)  # TODO

    # 3、预测,前向传播
    start = time.time()
    pbar = tqdm(dataloader)
    for i, (img_tensor, img0, img_name) in enumerate(pbar):
        pbar.set_description("Already Processed %d image: " % (i + 1))
        # print('clw: Already Processed %d image' % (i+1))
        img_tensor = img_tensor.to(device)  # (bs, 3, 416, 416)
        output = model(img_tensor)[
            0]  # (x1, y1, x2, y2, obj_conf, class_conf, class_pred)

        # NMS
        nms_output = non_max_suppression(output, opt.conf_thres, opt.nms_thres)

        # 可视化
        for batch_idx, det in enumerate(nms_output):  # detections per image
            if det is not None:  # and len(det):  # clw note: important !
                #or box in det:
                for *box, conf, _, cls in det:  # det: tensor.Size (bs, 7)    box: list
                    orig_h, orig_w = img0[batch_idx].shape[:2]  # 坐标变换
                    new_h = new_w = img_tensor.size()[
                        2]  # 绘图,resize后的图的框 -> 原图的框,new -> orig
                    ratio_h = orig_h / new_h
                    ratio_w = orig_w / new_w
                    x1 = int(ratio_w * box[0])
                    y1 = int(ratio_h * box[1])
                    x2 = int(ratio_w * (box[2]))
                    y2 = int(ratio_h * (box[3]))
                    label = '%s %.2f' % (classes[int(cls)], conf)

                    # 预测结果可视化
                    plot_one_box([x1, y1, x2, y2],
                                 img0[batch_idx],
                                 label=label,
                                 color=(255, 0, 0))
                    #cv2.rectangle(img0[batch_idx], (x1, y1), (x2, y2), (0, 0, 255), 1)  # 如果报错 TypeError: an integer is required (got type tuple),检查是不是传入了img_tensor

            if SAVE:
                # 保存结果
                cv2.imwrite(os.path.join(dst_path, img_name[batch_idx]),
                            img0[batch_idx])
            if SHOW:
                cv2.imshow('aaa', img0[batch_idx])
                cv2.waitKey(0)

    print('time use: %.3fs' % (time.time() - start))
Ejemplo n.º 7
0
    def get_FPS(self, image, test_interval):
        # 调整图片使其符合输入要求
        image_shape = np.array(np.shape(image)[0:2])

        crop_img = np.array(letterbox_image(image, (self.model_image_size[1],self.model_image_size[0])))
        photo = np.array(crop_img,dtype = np.float32)
        photo /= 255.0
        photo = np.transpose(photo, (2, 0, 1))
        photo = photo.astype(np.float32)
        images = []
        images.append(photo)
        images = np.asarray(images)

        with torch.no_grad():
            images = torch.from_numpy(images)
            if self.cuda:
                images = images.cuda()
            outputs = self.net(images)
            output_list = []
            for i in range(2):
                output_list.append(self.yolo_decodes[i](outputs[i]))
            output = torch.cat(output_list, 1)
            batch_detections = non_max_suppression(output, len(self.class_names),
                                                    conf_thres=self.confidence,
                                                    nms_thres=self.iou)
            try:
                batch_detections = batch_detections[0].cpu().numpy()
                top_index = batch_detections[:,4]*batch_detections[:,5] > self.confidence
                top_conf = batch_detections[top_index,4]*batch_detections[top_index,5]
                top_label = np.array(batch_detections[top_index,-1],np.int32)
                top_bboxes = np.array(batch_detections[top_index,:4])
                top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(top_bboxes[:,0],-1),np.expand_dims(top_bboxes[:,1],-1),np.expand_dims(top_bboxes[:,2],-1),np.expand_dims(top_bboxes[:,3],-1)
                # 去掉灰条
                boxes = yolo_correct_boxes(top_ymin,top_xmin,top_ymax,top_xmax,np.array([self.model_image_size[0],self.model_image_size[1]]),image_shape)
            except:
                pass

        t1 = time.time()
        for _ in range(test_interval):
            with torch.no_grad():
                outputs = self.net(images)
                output_list = []
                for i in range(2):
                    output_list.append(self.yolo_decodes[i](outputs[i]))
                output = torch.cat(output_list, 1)
                batch_detections = non_max_suppression(output, len(self.class_names),
                                                        conf_thres=self.confidence,
                                                        nms_thres=self.iou)
                try:
                    batch_detections = batch_detections[0].cpu().numpy()
                    top_index = batch_detections[:,4]*batch_detections[:,5] > self.confidence
                    top_conf = batch_detections[top_index,4]*batch_detections[top_index,5]
                    top_label = np.array(batch_detections[top_index,-1],np.int32)
                    top_bboxes = np.array(batch_detections[top_index,:4])
                    top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(top_bboxes[:,0],-1),np.expand_dims(top_bboxes[:,1],-1),np.expand_dims(top_bboxes[:,2],-1),np.expand_dims(top_bboxes[:,3],-1)
                    # 去掉灰条
                    boxes = yolo_correct_boxes(top_ymin,top_xmin,top_ymax,top_xmax,np.array([self.model_image_size[0],self.model_image_size[1]]),image_shape)
                except:
                    pass

        t2 = time.time()
        tact_time = (t2 - t1) / test_interval
        return tact_time
Ejemplo n.º 8
0
 def forward(self, x):
     return non_max_suppression(x[0],
                                conf_thres=self.conf,
                                iou_thres=self.iou,
                                classes=self.classes)
Ejemplo n.º 9
0
    def validation_step(self, opt, outputs, batch, batch_idx, epoch):
        imgs, targets, paths, shapes, pad = batch
        _, _, height, width = imgs.shape
        
        inf_out, train_out = outputs
        whwh = torch.Tensor([width, height, width, height]).to(imgs.device)

        losses = compute_loss(train_out, targets, self.model)[1][:3]  # GIoU, obj, cls
        output = non_max_suppression(inf_out, conf_thres=opt.conf_thres, iou_thres=opt.iou_thres, multi_label=self.calc_ni(batch_idx, epoch) > self.n_burn)

        # Statistics per image
        for si, pred in enumerate(output):
            labels = targets[targets[:, 0] == si, 1:]
            nl = len(labels)
            tcls = labels[:, 0].tolist() if nl else []  # target class
            self.seen += 1

            if pred is None:
                if nl:
                    self.stats.append((torch.zeros(0, self.niou, dtype=torch.bool), torch.Tensor(), torch.Tensor(), tcls))
                continue

            # Append to text file
            # with open('test.txt', 'a') as file:
            #    [file.write('%11.5g' * 7 % tuple(x) + '\n') for x in pred]

            # Clip boxes to image bounds
            clip_coords(pred, (height, width))

            # Assign all predictions as incorrect
            correct = torch.zeros(pred.shape[0], self.niou, dtype=torch.bool, device=imgs.device)

            if nl:
                detected = []  # target indices
                tcls_tensor = labels[:, 0]

                # target boxes
                tbox = xywh2xyxy(labels[:, 1:5]) * whwh

                # Per target class
                for cls in torch.unique(tcls_tensor):
                    ti = (cls == tcls_tensor).nonzero().view(-1)  # target indices
                    pi = (cls == pred[:, 5]).nonzero().view(-1)  # prediction indices

                    # Search for detections
                    if pi.shape[0]:
                        # Prediction to target ious
                        ious, i = box_iou(pred[pi, :4], tbox[ti]).max(1)  # best ious, indices

                        # Append detections
                        for j in (ious > self.iouv[0].to(ious.device)).nonzero():
                            d = ti[i[j]]  # detected target
                            if d not in detected:
                                detected.append(d)
                                correct[pi[j]] = ious[j] > self.iouv  # iou_thres is 1xn
                                if len(detected) == nl:  # all targets already located in image
                                    break

            # Append statistics (correct, conf, pcls, tcls)
            self.stats.append((correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), tcls))
        return losses
Ejemplo n.º 10
0
def main():
    img_size = 512  # 必须是32的整数倍 [416, 512, 608]
    cfg = "cfg/my_yolov3.cfg"  # 改成生成的.cfg文件
    weights = "weights/yolov3spp-voc-512.pth".format(img_size)  # 改成自己训练好的权重文件
    json_path = "./data/pascal_voc_classes.json"  # json标签文件
    img_path = "test.jpg"
    assert os.path.exists(cfg), "cfg file {} dose not exist.".format(cfg)
    assert os.path.exists(weights), "weights file {} dose not exist.".format(
        weights)
    assert os.path.exists(json_path), "json file {} dose not exist.".format(
        json_path)
    assert os.path.exists(img_path), "image file {} dose not exist.".format(
        img_path)

    json_file = open(json_path, 'r')
    class_dict = json.load(json_file)
    category_index = {v: k for k, v in class_dict.items()}

    input_size = (img_size, img_size)

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    model = YOLOV3_SPP(cfg, img_size)
    model.load_state_dict(torch.load(weights, map_location=device)["model"])
    model.to(device)

    model.eval()
    with torch.no_grad():
        # init
        img = torch.zeros((1, 3, img_size, img_size), device=device)
        model(img)

        img_o = cv2.imread(img_path)  # BGR
        assert img_o is not None, "Image Not Found " + img_path

        img = img_utils.letterbox(img_o,
                                  new_shape=input_size,
                                  auto=True,
                                  color=(0, 0, 0))[0]
        # Convert
        img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
        img = np.ascontiguousarray(img)

        img = torch.from_numpy(img).to(device).float()
        img /= 255.0  # scale (0, 255) to (0, 1)
        img = img.unsqueeze(0)  # add batch dimension

        t1 = torch_utils.time_synchronized()
        pred = model(img)[0]  # only get inference result
        t2 = torch_utils.time_synchronized()
        print(t2 - t1)

        pred = utils.non_max_suppression(pred,
                                         conf_thres=0.1,
                                         iou_thres=0.6,
                                         multi_label=True)[0]
        t3 = time.time()
        print(t3 - t2)

        # process detections
        pred[:, :4] = utils.scale_coordinates(pred[:, :4], img.shape[2:],
                                              img_o.shape).round()
        print(pred.shape)

        bboxes = pred[:, :4].detach().cpu().numpy()
        scores = pred[:, 4].detach().cpu().numpy()
        classes = pred[:, 5].detach().cpu().numpy().astype(np.int) + 1

        img_o = draw_box(img_o[:, :, ::-1], bboxes, classes, scores,
                         category_index)
        plt.imshow(img_o)
        plt.show()

        img_o.save("test_result.jpg")
Ejemplo n.º 11
0
def predict(
    data_dir: Union[str, os.PathLike],
    weights: Union[str, os.PathLike],
    batch_size: Optional[int] = 8,
    num_workers: Optional[int] = 1,
    resize: Optional[Union[int, Tuple[int, int]]] = None,
    file_ext: Optional[str] = "jpg",
    confidence: Optional[float] = 0.5,
    nms_threshold: Optional[float] = 0.5,
    output_path: Union[str, os.PathLike] = "../",
):
    dataset = YoloDataset(data_dir, file_ext=file_ext, resize=resize)
    loader = DataLoader(dataset,
                        batch_size=batch_size,
                        pin_memory=True,
                        num_workers=num_workers)
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print(f"using device {device}")

    print("loading model...")
    model = attempt_load(weights, map_location=device)
    model.eval()
    # print(type(model))

    predictions = []
    for batch in tqdm(loader):
        batch = batch.to(device)
        # print(batch.size())
        with torch.no_grad():
            pred = model(batch, augment=False)[0]
        pred = non_max_suppression(pred,
                                   confidence,
                                   nms_threshold,
                                   classes=None,
                                   agnostic=False)
        predictions.extend([to_cpu(p) for p in pred])

    predictions = Parallel(n_jobs=os.cpu_count(), backend="multiprocessing")(
        delayed(postprocess)(p) for p in tqdm(predictions))

    if output_path.endswith(".json"):
        if os.path.exists(os.path.dirname(output_path)):
            output_file = output_path
        else:
            raise IOError(
                f"{Fore.RED} no such directory {os.path.dirname(output_path)} {Style.RESET_ALL}"
            )
    elif os.path.isdir(output_path):
        output_file = os.path.join(
            output_dir,
            "yolov5_predictions_" + str(time.time()).split(".")[0] + ".json")

    else:
        raise IOError(
            f"{Fore.RED} no such directory {os.path.dirname(output_path)} {Style.RESET_ALL}"
        )

    filenames = dataset.filenames
    output_dict = dict(zip(filenames, predictions))

    with open(output_file, "w") as f:
        json.dump(output_dict, f, indent=2)
Ejemplo n.º 12
0
def detect_onnx(official=True, image_path=None):
    num_classes = 80
    # anchors = [[116, 90, 156, 198, 373, 326], [30, 61, 62, 45, 59, 119], [10, 13, 16, 30, 33, 23]]  # 5s
    anchors = [[10, 13, 16, 30, 33, 23], [30, 61, 62, 45, 59, 119],
               [116, 90, 156, 198, 373, 326]]

    session = onnxruntime.InferenceSession('./weights/best.onnx')
    # print("The model expects input shape: ", session.get_inputs()[0].shape)
    batch_size = session.get_inputs()[0].shape[0]
    img_size_h = session.get_inputs()[0].shape[2]
    img_size_w = session.get_inputs()[0].shape[3]

    # input
    image_src = Image.open(image_path)
    resized = letterbox_image(image_src, (img_size_w, img_size_h))

    img_in = np.transpose(resized, (2, 0, 1)).astype(np.float32)  # HWC -> CHW
    img_in = np.expand_dims(img_in, axis=0)
    img_in /= 255.0
    # print("Shape of the image input shape: ", img_in.shape)

    # inference
    input_name = session.get_inputs()[0].name
    outputs = session.run(None, {input_name: img_in})

    batch_detections = []
    if official and len(
            outputs
    ) == 4:  # model.model[-1].export = boolean ---> True:3 False:4
        # model.model[-1].export = False ---> outputs[0] (1, xxxx, 85)
        # 直接使用官方代码
        batch_detections = torch.from_numpy(np.array(outputs[0]))
        batch_detections = non_max_suppression(batch_detections,
                                               conf_thres=0.4,
                                               iou_thres=0.5,
                                               agnostic=False)
    else:
        # model.model[-1].export = False ---> outputs[1]/outputs[2]/outputs[2]
        # model.model[-1].export = True  ---> outputs
        # (1, 3, 20, 20, 85)
        # (1, 3, 40, 40, 85)
        # (1, 3, 80, 80, 85)
        # 自己手写处理 (部分原理来自 yolo.py Detect)
        boxs = []
        a = torch.tensor(anchors).float().view(3, -1, 2)
        anchor_grid = a.clone().view(3, 1, -1, 1, 1, 2)
        if len(outputs) == 4:
            outputs = [outputs[1], outputs[2], outputs[3]]
        for index, out in enumerate(outputs):
            out = torch.from_numpy(out)
            batch = out.shape[1]
            feature_w = out.shape[2]
            feature_h = out.shape[3]

            # Feature map corresponds to the original image zoom factor
            stride_w = int(img_size_w / feature_w)
            stride_h = int(img_size_h / feature_h)

            grid_x, grid_y = np.meshgrid(np.arange(feature_w),
                                         np.arange(feature_h))

            # cx, cy, w, h
            pred_boxes = torch.FloatTensor(out[..., :4].shape)
            pred_boxes[..., 0] = (torch.sigmoid(out[..., 0]) * 2.0 - 0.5 +
                                  grid_x) * stride_w  # cx
            pred_boxes[..., 1] = (torch.sigmoid(out[..., 1]) * 2.0 - 0.5 +
                                  grid_y) * stride_h  # cy
            pred_boxes[..., 2:4] = (torch.sigmoid(out[..., 2:4]) *
                                    2)**2 * anchor_grid[index]  # wh

            conf = torch.sigmoid(out[..., 4])
            pred_cls = torch.sigmoid(out[..., 5:])

            output = torch.cat((pred_boxes.view(
                batch_size, -1, 4), conf.view(batch_size, -1, 1),
                                pred_cls.view(batch_size, -1, num_classes)),
                               -1)
            boxs.append(output)

        outputx = torch.cat(boxs, 1)
        # NMS
        batch_detections = w_non_max_suppression(outputx,
                                                 num_classes,
                                                 conf_thres=0.4,
                                                 nms_thres=0.3)

    return batch_detections
Ejemplo n.º 13
0
def test(cfg,
         data,
         batch_size,
         img_size,
         conf_thres,
         iou_thres,
         nms_thres,
         src_txt_path,
         weights,
         log_file_path=None,
         model=None):

    # 0、初始化一些参数
    data = parse_data_cfg(data)
    nc = int(data['classes'])  # number of classes
    names = load_classes(data['names'])

    # 1、加载网络
    if model is None:
        device = select_device('0')
        model = Darknet(cfg)
        if weights.endswith('.pt'):  # TODO: .weights权重格式
            model.load_state_dict(
                torch.load(weights, map_location=device)['model']
            )  # 20200704_50epoch_modify_noobj   # TODO:map_location=device ?
        if torch.cuda.device_count() > 1:
            model = nn.DataParallel(model)  # clw note: 多卡
    else:
        device = next(model.parameters()).device  # get model device
    model.to(device).eval()

    # 2、加载数据集
    test_dataset = VocDataset(src_txt_path,
                              img_size,
                              with_label=True,
                              is_training=False)
    dataloader = DataLoader(
        test_dataset,
        batch_size=batch_size,
        shuffle=False,
        num_workers=8,  # TODO
        collate_fn=test_dataset.test_collate_fn,  # TODO
        pin_memory=True)

    # 3、预测,前向传播
    image_nums = 0
    s = ('%20s' + '%10s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R',
                                 'mAP@{}'.format(iou_thres), 'F1')
    #s = ('%20s' + '%10s' * 6) % ('Class', 'ImgNum', 'Target', 'P', 'R', '[email protected]', 'F1')

    p, r, f1, mp, mr, map, mf1 = 0., 0., 0., 0., 0., 0., 0.
    jdict, stats, ap, ap_class = [], [], [], []

    pbar = tqdm(dataloader)
    for i, (img_tensor, target_tensor, _, _) in enumerate(pbar):

        img_tensor = img_tensor.to(device)  # (bs, 3, 416, 416)
        target_tensor = target_tensor.to(device)
        height, width = img_tensor.shape[2:]

        start = time.time()
        # Disable gradients
        with torch.no_grad():
            # (1) Run model
            output = model(
                img_tensor
            )  # (x1, y1, x2, y2, obj_conf, class_conf, class_pred)

            # (2) NMS
            nms_output = non_max_suppression(output, conf_thres, nms_thres)
            s = 'time use per batch: %.3fs' % (time.time() - start)

        pbar.set_description(s)

        for batch_idx, pred in enumerate(nms_output):  # pred: (bs, 7)
            labels = target_tensor[target_tensor[:, 0] == batch_idx, 1:]
            nl = len(labels)  # len of label
            tcls = labels[:, 0].tolist() if nl else []  # target class
            image_nums += 1

            # 考虑一个预测 box 都没有的情况,比如 conf 太高
            if pred is None:
                if nl:
                    stats.append(([], torch.Tensor(), torch.Tensor(), tcls))
                continue

            # Clip boxes to image bounds   TODO:有必要,因为 label 都是经过clip的,所以如果去掉clip,mAP应该会有所降低
            clip_coords(pred, (height, width))  #  mAP is the same

            # Assign all predictions as incorrect
            correct = [0] * len(pred)
            if nl:
                detected = []
                tcls_tensor = labels[:, 0]

                # target boxes
                tbox = xywh2xyxy(labels[:, 1:5])
                tbox[:, [0, 2]] *= img_tensor[batch_idx].size()[2]  # w
                tbox[:, [1, 3]] *= img_tensor[batch_idx].size()[1]  # h

                # Search for correct predictions
                for i, (*pbox, pconf, pcls_conf, pcls) in enumerate(pred):

                    # Break if all targets already located in image
                    if len(detected) == nl:
                        break

                    # Continue if predicted class not among image classes
                    if pcls.item() not in tcls:
                        continue

                    # Best iou, index between pred and targets
                    m = (pcls == tcls_tensor).nonzero().view(-1)
                    iou, bi = bbox_iou(pbox, tbox[m]).max(0)

                    # If iou > threshold and class is correct mark as correct
                    if iou > iou_thres and m[
                            bi] not in detected:  # and pcls == tcls[bi]:
                        correct[i] = 1
                        detected.append(m[bi])

            # print('stats.append: ', (correct, pred[:, 4].cpu(), pred[:, 6].cpu(), tcls))
            '''
                        pred flag                (  [1,       0,       1,       0,       0,       1,       0,       0,       1], 
                        pred conf            tensor([0.17245, 0.14642, 0.07215, 0.07138, 0.07069, 0.06449, 0.06222, 0.05580, 0.05452]), 
                        pred cls             tensor([2.,      2.,      2.,      2.,      2.,      2.,      2.,      2.,      2.]), 
                        lb_cls                 [2.0,     2.0,  2.0, 2.0, 2.0])
            stats is a []
            '''
            stats.append(
                (correct, pred[:, 4].cpu(), pred[:, 6].cpu(),
                 tcls))  # Append statistics (correct, conf, pcls, tcls)

    # after get stats for all images , ...
    # Compute statistics
    stats = [np.concatenate(x, 0) for x in list(zip(*stats))]  # to numpy
    if len(stats):
        p, r, ap, f1, ap_class = ap_per_class(*stats)
        mp, mr, map, mf1 = p.mean(), r.mean(), ap.mean(), f1.mean()
        nt = np.bincount(stats[3].astype(np.int64),
                         minlength=nc)  # number of targets per class
    else:
        nt = torch.zeros(1)

    # Print results
    # time.sleep(0.01)  # clw note: 防止前面 tqdm 还没输出,但是这里已经打印了
    #pf = '%20s' + '%10.3g' * 6  # print format
    pf = '%20s' + '%10s' + '%10.3g' * 5
    pf_value = pf % ('all', str(image_nums), nt.sum(), mp, mr, map, mf1)
    print(pf_value)
    if __name__ != '__main__':
        write_to_file(s, log_file_path)
        write_to_file(pf_value, log_file_path)

    results = []
    results.append({"all": (mp, mr, map, mf1)})

    # Print results per class
    #if verbose and nc > 1 and len(stats):
    if nc > 1 and len(stats):
        for i, c in enumerate(ap_class):
            #print(pf % (names[c], seen, nt[c], p[i], r[i], ap[i], f1[i]))
            print(pf % (names[c], '', nt[c], p[i], r[i], ap[i], f1[i]))
            if __name__ != '__main__':
                write_to_file(
                    pf % (names[c], '', nt[c], p[i], r[i], ap[i], f1[i]),
                    log_file_path)
            results.append({names[c]: (p[i], r[i], ap[i], f1[i])})

    # Return results
    maps = np.zeros(nc) + map
    for i, c in enumerate(ap_class):
        maps[c] = ap[i]
    return (mp, mr, map, mf1), maps
def main():
    """Create a TensorRT engine for ONNX-based YOLOv3-608 and run inference."""

    # Try to load a previously generated YOLOv3-608 network graph in ONNX format:
    onnx_file_path = './yolov3.onnx'
    engine_file_path = "yolov3.trt"
    data_path = "./data/unrel.data"

    data = parse_data_cfg(data_path)
    nc = int(data['classes'])  # number of classes
    path = data['valid']  # path to test images
    names = load_classes(data['names'])  # class names

    iouv = torch.linspace(0.5, 0.95, 1,
                          dtype=torch.float32)  # iou vector for [email protected]:0.95
    niou = 1

    conf_thres = 0.001
    iou_thres = 0.6
    verbose = True

    # Genearte custom dataloader
    img_size = 448  # copy form pytorch src
    batch_size = 16

    dataset = LoadImagesAndLabels(path, img_size, batch_size, rect=True)
    batch_size = min(batch_size, len(dataset))
    dataloader = data_loader(dataset, batch_size, img_size)

    # Output shapes expected by the post-processor
    output_shapes = [(16, 126, 14, 14), (16, 126, 28, 28), (16, 126, 56, 56)]

    # Do inference with TensorRT
    trt_outputs = []
    with get_engine(onnx_file_path, engine_file_path
                    ) as engine, engine.create_execution_context() as context:
        inputs, outputs, bindings, stream = common.allocate_buffers(engine)
        s = ('%20s' + '%10s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R',
                                     '[email protected]', 'F1')
        p, r, f1, mp, mr, map, mf1, t0, t1 = 0., 0., 0., 0., 0., 0., 0., 0., 0.
        pbar = tqdm.tqdm(dataloader, desc=s)
        stats, ap, ap_class = [], [], []
        seen = 0

        for batch_i, (imgs, targets, paths, shapes) in enumerate(pbar):

            imgs = imgs.astype(np.float32) / 255.0
            nb, _, height, width = imgs.shape  # batch size, channels, height, width
            whwh = np.array([width, height, width, height])

            inputs[0].host = imgs

            postprocessor_args = {
                "yolo_masks": [
                    (6, 7, 8), (3, 4, 5), (0, 1, 2)
                ],  # A list of 3 three-dimensional tuples for the YOLO masks
                "yolo_anchors": [
                    (10, 13),
                    (16, 30),
                    (33, 23),
                    (30, 61),
                    (
                        62, 45
                    ),  # A list of 9 two-dimensional tuples for the YOLO anchors
                    (59, 119),
                    (116, 90),
                    (156, 198),
                    (373, 326)
                ],
                "num_classes":
                37,
                "stride": [32, 16, 8]
            }

            postprocessor = PostprocessYOLO(**postprocessor_args)

            # Do layers before yolo
            t = time.time()
            trt_outputs = common.do_inference_v2(context,
                                                 bindings=bindings,
                                                 inputs=inputs,
                                                 outputs=outputs,
                                                 stream=stream)

            trt_outputs = [
                output.reshape(shape)
                for output, shape in zip(trt_outputs, output_shapes)
            ]

            trt_outputs = [
                np.ascontiguousarray(
                    otpt[:, :, :int(imgs.shape[2] * (2**i) /
                                    32), :int(imgs.shape[3] * (2**i) / 32)],
                    dtype=np.float32) for i, otpt in enumerate(trt_outputs)
            ]

            output_list = postprocessor.process(trt_outputs)

            t0 += time.time() - t

            inf_out = torch.cat(output_list, 1)
            t = time.time()
            output = non_max_suppression(inf_out,
                                         conf_thres=conf_thres,
                                         iou_thres=iou_thres)  # nms
            t1 += time.time() - t

            # Statistics per image
            for si, pred in enumerate(output):
                labels = targets[targets[:, 0] == si, 1:]
                nl = len(labels)
                tcls = labels[:, 0].tolist() if nl else []  # target class
                seen += 1

                if pred is None:
                    if nl:
                        stats.append((torch.zeros(0, niou, dtype=torch.bool),
                                      torch.Tensor(), torch.Tensor(), tcls))
                    continue

                # Assign all predictions as incorrect
                correct = torch.zeros(pred.shape[0], niou, dtype=torch.bool)
                if nl:
                    detected = []  # target indices
                    tcls_tensor = labels[:, 0]

                    # target boxes
                    tbox = xywh2xyxy(labels[:, 1:5]) * whwh
                    tbox = tbox.type(torch.float32)

                    # Per target class
                    for cls in torch.unique(tcls_tensor):
                        ti = (cls == tcls_tensor).nonzero().view(
                            -1)  # prediction indices
                        pi = (cls == pred[:, 5]).nonzero().view(
                            -1)  # target indices

                        # Search for detections
                        if pi.shape[0]:
                            # Prediction to target ious
                            ious, i = box_iou(pred[pi, :4], tbox[ti]).max(
                                1)  # best ious, indices

                            # Append detections
                            for j in (ious > iouv[0]).nonzero():
                                d = ti[i[j]]  # detected target
                                if d not in detected:
                                    detected.append(d)
                                    correct[pi[j]] = ious[
                                        j] > iouv  # iou_thres is 1xn
                                    if len(
                                            detected
                                    ) == nl:  # all targets already located in image
                                        break

                # Append statistics (correct, conf, pcls, tcls)
                stats.append(
                    (correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), tcls))

            # Plot images
            if batch_i < 1:
                f = 'test_batch%g_gt.jpg' % batch_i  # filename
                plot_images(imgs, targets, paths=paths, names=names,
                            fname=f)  # ground truth
                f = 'test_batch%g_pred.jpg' % batch_i
                plot_images(imgs,
                            output_to_target(output, width, height),
                            paths=paths,
                            names=names,
                            fname=f)  # predictions

        # Compute statistics
        stats = [np.concatenate(x, 0) for x in zip(*stats)]  # to numpy
        if len(stats):
            p, r, ap, f1, ap_class = ap_per_class(*stats)
            if niou > 1:
                p, r, ap, f1 = p[:, 0], r[:, 0], ap.mean(
                    1), ap[:, 0]  # [P, R, [email protected]:0.95, [email protected]]
            mp, mr, map, mf1 = p.mean(), r.mean(), ap.mean(), f1.mean()
            nt = np.bincount(stats[3].astype(np.int64),
                             minlength=nc)  # number of targets per class
        else:
            nt = torch.zeros(1)

        # Print results
        pf = '%20s' + '%10.3g' * 6  # print format
        print(pf % ('all', seen, nt.sum(), mp, mr, map, mf1))

        # Print results per class
        if verbose and nc > 1 and len(stats):
            for i, c in enumerate(ap_class):
                print(pf % (names[c], seen, nt[c], p[i], r[i], ap[i], f1[i]))

        # Print speeds
        if verbose:
            t = tuple(x / seen * 1E3 for x in (t0, t1, t0 + t1)) + (
                img_size, img_size, batch_size)  # tuple
            print(
                'Speed: %.1f/%.1f/%.1f ms inference/NMS/total per %gx%g image at batch-size %g'
                % t)
Ejemplo n.º 15
0
    image = image[...,::-1]
    image = image.astype(np.float64)
    # image = (image - hyp['mean']) / hyp['std']
    image /= 255.0
    image = np.transpose(image, [2, 0, 1])

    image = np.expand_dims(image, axis=0)

    image = torch.from_numpy(image)

    image = image.to(device).float()
    pred = net(image)[0]


    pred = non_max_suppression(pred,0.25, 0.35,
                                       multi_label=False, classes=0, agnostic= False,land=True ,point_num= point_num)
    try:
        det = pred[0].cpu().detach().numpy()
        orig_image = orig_image.astype(np.uint8)

        det[:,:4] = det[:,:4] / np.array([scale_w, scale_h] * 2)
        det[:,5:5+point_num*2] = det[:,5:5+point_num*2] / np.array([scale_w, scale_h] * point_num)
    except:
        det = []
    for b in det:

        text = "{:.4f}".format(b[4])
        b = list(map(int, b))

        cv2.rectangle(orig_image, (b[0], b[1]), (b[2], b[3]), (0, 0, 255), 2)
        cx = b[0]
    def detect_image(self, image):
        # embed()
        image_shape = np.array(np.shape(image)[0:2])
        num_class = len(self.class_names)  # 有80类
        # embed()

        #---------------------------------------------------------#
        #   给图像增加灰条(什么是灰条),实现不失真的resize
        #---------------------------------------------------------#
        # 复制image return new_image
        crop_img = np.array(
            letterbox_image(
                image, (self.model_image_size[1], self.model_image_size[0])))
        photo = np.array(crop_img, dtype=np.float32) / 255.0  # 归一化?
        photo = np.transpose(
            photo, (2, 0, 1)
        )  # 转置:将Image.open(img)得到的[H,W,C]格式转换permute为pytorch可以处理的[C,H,W]格式
        #---------------------------------------------------------#
        #   添加上batch_size维度
        #---------------------------------------------------------#
        images = [photo]  # 将photo变为list类型

        with torch.no_grad(
        ):  # disabled gradient calculation,reduce memory consumption for computations
            images = torch.from_numpy(
                np.asarray(images)
            )  # Creates a Tensor from a numpy.ndarray,此时images的shape为[1, 3, 416, 416]
            if self.cuda:
                images = images.cuda()

            #---------------------------------------------------------#
            #   将图像输入网络当中进行预测!
            #---------------------------------------------------------#
            # embed()
            # 从这里开始处理
            # 特征提取
            # 输出outputs为tuple,len=3,每个tensor的shape分别为 第一个特征层[1, 255, 13, 13],第二个特征层[1, 255, 26, 26],第三个特征层[1, 255, 52, 52]
            outputs = self.net(images)
            # embed()
            output_list = []
            for i in range(3):  # 为什么是3
                # 有三个特征层,每个特征层对应自己的decode解码器
                output_list.append(self.yolo_decodes[i](
                    outputs[i]))  # 在这里打几个断点看看

            #---------------------------------------------------------#
            #   将预测框进行堆叠,然后进行非极大抑制
            #---------------------------------------------------------#
            # torch.cat()对矩阵按行进行拼接得到向量
            output = torch.cat(output_list, 1)  # 这里也打几个断点
            # output就是predictions,格式为[batch_size, num_anchors, 85]
            batch_detections = non_max_suppression(output,
                                                   len(self.class_names),
                                                   conf_thres=self.confidence,
                                                   nms_thres=self.iou)
            # embed()

            #---------------------------------------------------------#
            #   如果没有检测出物体,返回原图
            #---------------------------------------------------------#
            try:
                batch_detections = batch_detections[0].cpu().numpy()
            except:
                return image

            #---------------------------------------------------------#
            #   对预测框进行得分筛选
            #---------------------------------------------------------#
            # coordinates = []# bboxes的坐标

            top_index = batch_detections[:,
                                         4] * batch_detections[:,
                                                               5] > self.confidence
            top_conf = batch_detections[top_index,
                                        4] * batch_detections[top_index, 5]
            top_label = np.array(batch_detections[top_index, -1], np.int32)
            top_bboxes = np.array(batch_detections[top_index, :4])

            # 得到坐标点
            top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(
                top_bboxes[:, 0],
                -1), np.expand_dims(top_bboxes[:, 1], -1), np.expand_dims(
                    top_bboxes[:, 2],
                    -1), np.expand_dims(top_bboxes[:, 3], -1)

            # coordinates.append((top_xmin,top_xmax,top_ymin,top_ymax))# 把四个坐标点看做一个整体

            #-----------------------------------------------------------------#
            #   在图像传入网络预测前会进行letterbox_image给图像周围添加灰条
            #   因此生成的top_bboxes是相对于有灰条的图像的
            #   我们需要对其进行修改,去除灰条的部分。
            #-----------------------------------------------------------------#

            # boxes存放各目标的坐标
            boxes = yolo_correct_boxes(
                top_ymin, top_xmin, top_ymax, top_xmax,
                np.array([self.model_image_size[0], self.model_image_size[1]]),
                image_shape)

        font = ImageFont.truetype(font='model_data/simhei.ttf',
                                  size=np.floor(3e-2 * np.shape(image)[1] +
                                                0.5).astype('int32'))

        thickness = max((np.shape(image)[0] + np.shape(image)[1]) //
                        self.model_image_size[0], 1)

        for i, c in enumerate(top_label):
            # embed()
            predicted_class = self.class_names[c]
            score = top_conf[i]

            top, left, bottom, right = boxes[i]
            top = top - 5
            left = left - 5
            bottom = bottom + 5
            right = right + 5

            # 左上角点的坐标
            top = max(0, np.floor(top + 0.5).astype('int32'))
            left = max(0, np.floor(left + 0.5).astype('int32'))
            # 右下角点的坐标
            bottom = min(
                np.shape(image)[0],
                np.floor(bottom + 0.5).astype('int32'))
            right = min(
                np.shape(image)[1],
                np.floor(right + 0.5).astype('int32'))

            # 画框框
            label = '{} {:.2f}'.format(predicted_class, score)
            draw = ImageDraw.Draw(image)
            label_size = draw.textsize(label, font)
            label = label.encode('utf-8')
            print(label, top, left, bottom, right)

            if top - label_size[1] >= 0:
                text_origin = np.array([left, top - label_size[1]])
            else:
                text_origin = np.array([left, top + 1])

            for i in range(thickness):
                draw.rectangle([left + i, top + i, right - i, bottom - i],
                               outline=self.colors[self.class_names.index(
                                   predicted_class)])
            draw.rectangle(  # 画框框
                [tuple(text_origin),
                 tuple(text_origin + label_size)],
                fill=self.colors[self.class_names.index(predicted_class)])
            draw.text(text_origin,
                      str(label, 'UTF-8'),
                      fill=(0, 0, 0),
                      font=font)
            del draw
        return image, boxes  # 将boxes返回
Ejemplo n.º 17
0
def test_net(output_dir,
             net,
             cuda,
             dataset,
             score_min,
             nms_max,
             use_07_eval=True,
             iou_thres=(0.1, 0.5, 0.75)):
    num_images = len(dataset)
    num_classes = len(dataset.classes)
    # all detections are collected into:
    #    all_boxes[cls][image] = N x 5 array of detections in
    #    [x1, y1, x2, y2, score]
    all_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)]

    # timers
    _t = {'im_detect': Timer(), 'misc': Timer()}
    #  output_dir = get_output_dir('ssd300_120000', set_type)

    det_file = os.path.join(output_dir, 'detections.pkl')

    for i in range(num_images):
        _, raw_im, im = dataset.raw_transformed(i)
        h, w, _ = raw_im.shape

        x = Variable(im.unsqueeze(0))  # 1, channels, height, width
        if cuda:
            x = x.cuda()
        _t['im_detect'].tic()

        # When ssd.phase == 'test', net_output is
        # shaped as (batch_size, num_classes, top_k, 5)
        # and the last dim order is (score, xmin, ymin, xmax, ymax)
        detections = net(x)
        # Detection time, averaged
        detect_time = _t['im_detect'].toc(average=True)

        detections = non_max_suppression(detections,
                                         num_classes,
                                         score_thres=score_min,
                                         nms_thres=nms_max)

        # No background cls
        for j in range(num_classes):
            if detections[0] is None:
                continue
            # for class j, shape (xxx, 7)
            # (x1, y1, x2, y2, obj_conf, cls_conf, cls)
            dets = detections[0][detections[0][:, -1] == j]

            # select boxes with score > 0
            #  mask = dets[:, 0].gt(0.).expand(5, dets.size(0)).t()
            #  dets = torch.masked_select(dets, mask).view(-1, 5)

            if dets.size(0) == 0:
                continue

            boxes = dets[:, :4]  # (xxx, 4)

            #############################################
            # Transform boxes back according to raw image
            #############################################
            # Unresize
            max_hw = max(h, w)
            scale_hw = max_hw / dataset.img_shape[0]
            boxes *= scale_hw
            # Unpadding
            diff = abs(h - w)
            half_diff = diff // 2
            if h <= w:
                pad = (0, half_diff, 0, diff - half_diff)
            else:
                pad = (half_diff, 0, diff - half_diff, 0)
            boxes -= torch.Tensor(pad).type_as(boxes)

            scores = dets[:, -2].cpu().numpy()

            cls_dets = np.hstack((boxes.cpu().numpy(),
                                  scores[:, np.newaxis])).astype(np.float32,
                                                                 copy=False)
            all_boxes[j][i] = cls_dets  # (xxx, 5) with location and scores
    # store the predicted boxes and labels as .pkl
    with open(det_file, 'wb') as fdet:
        pickle.dump(all_boxes, fdet, pickle.HIGHEST_PROTOCOL)

    print('Write predictions to disk')  # cls by cls, .txt
    write_voc_results_file(all_boxes, dataset, output_dir)
    print('Evaluating detections')
    aps, mAP = do_python_eval(dataset, output_dir, use_07_eval, iou_thres)
    return aps, mAP
Ejemplo n.º 18
0
if img.ndimension() == 3:
    img = img.unsqueeze(0)

if is_CS:
    yolo = SoftDarknet(cfg='cfg/voc_yolov3_soft_orig-output.cfg').to(device)
    yolo.load_state_dict(ck_model['model'])
    yolo.ticket = True
    _ = yolo(img)
else:
    yolo = Darknet(cfg='cfg/voc_yolov3.cfg').to(device)
    yolo.load_state_dict(ck_model['model'])
    mask = create_mask_LTH(yolo)
    mask.load_state_dict(ck_mask)
    apply_mask_LTH(yolo, mask)

sparse = Ch_Wise_SparseYOLO(yolo).to(device)

yolo.eval()
sparse.eval()
# Inference
pred1 = yolo(img)[0]
pred2 = sparse(img)[0]

# Apply NMS
pred1 = non_max_suppression(pred1, 0.3, 0.6)
pred2 = non_max_suppression(pred2, 0.3, 0.6)

for i, (det1, det2) in enumerate(zip(pred1, pred2)):
    print(det1, det2)
    print(torch.abs(det1 - det2))
Ejemplo n.º 19
0
    def detect_image(self, image):

        image_shape = np.array(np.shape(image)[0:2])

        crop_img = np.array(
            letterbox_image(image,
                            (image_sizes[self.phi], image_sizes[self.phi])))
        photo = np.array(crop_img, dtype=np.float32)
        photo = np.transpose(preprocess_input(photo), (2, 0, 1))
        images = []
        images.append(photo)
        images = np.asarray(images)

        with torch.no_grad():
            images = torch.from_numpy(images)
            if self.cuda:
                images = images.cuda()
            _, regression, classification, anchors = self.net(images)

            regression = decodebox(regression, anchors, images)
            detection = torch.cat([regression, classification], axis=-1)
            batch_detections = non_max_suppression(detection,
                                                   len(self.class_names),
                                                   conf_thres=self.confidence,
                                                   nms_thres=0.2)
        try:
            batch_detections = batch_detections[0].cpu().numpy()
        except:
            print('置信度过高,没有找到符合条件的目标')
            return image, 0, 0

        top_index = batch_detections[:, 4] > self.confidence
        top_conf = batch_detections[top_index, 4]
        top_label = np.array(batch_detections[top_index, -1], np.int32)
        top_bboxes = np.array(batch_detections[top_index, :4])
        top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(
            top_bboxes[:, 0],
            -1), np.expand_dims(top_bboxes[:, 1], -1), np.expand_dims(
                top_bboxes[:, 2], -1), np.expand_dims(top_bboxes[:, 3], -1)

        # 去掉灰条
        boxes = efficientdet_correct_boxes(
            top_ymin, top_xmin, top_ymax, top_xmax,
            np.array([image_sizes[self.phi], image_sizes[self.phi]]),
            image_shape)
        font = ImageFont.truetype(font='utils/simhei.ttf',
                                  size=np.floor(3e-2 * np.shape(image)[1] +
                                                0.5).astype('int32'))
        thickness = (np.shape(image)[0] +
                     np.shape(image)[1]) // image_sizes[self.phi]

        for i, c in enumerate(top_label):
            predicted_class = self.class_names[c]
            score = top_conf[i]

            top, left, bottom, right = boxes[i]
            top = top - 5
            left = left - 5
            bottom = bottom + 5
            right = right + 5

            top = max(0, np.floor(top + 0.5).astype('int32'))
            left = max(0, np.floor(left + 0.5).astype('int32'))
            bottom = min(
                np.shape(image)[0],
                np.floor(bottom + 0.5).astype('int32'))
            right = min(
                np.shape(image)[1],
                np.floor(right + 0.5).astype('int32'))

            # 画框框
            label = '{} {:.2f}'.format(predicted_class, score)
            draw = ImageDraw.Draw(image)
            label_size = draw.textsize(label, font)
            label = label.encode('utf-8')
            # print(label)

            if top - label_size[1] >= 0:
                text_origin = np.array([left, top - label_size[1]])
            else:
                text_origin = np.array([left, top + 1])

            for i in range(thickness):
                draw.rectangle([left + i, top + i, right - i, bottom - i],
                               outline=self.colors[self.class_names.index(
                                   predicted_class)])
            draw.rectangle(
                [tuple(text_origin),
                 tuple(text_origin + label_size)],
                fill=self.colors[self.class_names.index(predicted_class)])
            draw.text(text_origin,
                      str(label, 'UTF-8'),
                      fill=(0, 0, 0),
                      font=font)
            del draw
        return image, predicted_class, score
Ejemplo n.º 20
0
    def detect_image(self, image_id, image):
        self.confidence = 0.01
        self.iou = 0.5
        f = open("./input/detection-results/" + image_id + ".txt", "w")
        image_shape = np.array(np.shape(image)[0:2])
        #---------------------------------------------------------#
        #   给图像增加灰条,实现不失真的resize
        #---------------------------------------------------------#
        crop_img = np.array(
            letterbox_image(image,
                            (image_sizes[self.phi], image_sizes[self.phi])))
        photo = np.array(crop_img, dtype=np.float32)
        photo = np.transpose(preprocess_input(photo), (2, 0, 1))

        with torch.no_grad():
            images = torch.from_numpy(np.asarray([photo]))
            if self.cuda:
                images = images.cuda()

            #---------------------------------------------------------#
            #   传入网络当中进行预测
            #---------------------------------------------------------#
            _, regression, classification, anchors = self.net(images)

            #-----------------------------------------------------------#
            #   将预测结果进行解码
            #-----------------------------------------------------------#
            regression = decodebox(regression, anchors, images)
            detection = torch.cat([regression, classification], axis=-1)
            batch_detections = non_max_suppression(detection,
                                                   len(self.class_names),
                                                   conf_thres=self.confidence,
                                                   nms_thres=self.iou)
            #--------------------------------------#
            #   如果没有检测到物体,则返回原图
            #--------------------------------------#
            try:
                batch_detections = batch_detections[0].cpu().numpy()
            except:
                return

            #-----------------------------------------------------------#
            #   筛选出其中得分高于confidence的框
            #-----------------------------------------------------------#
            top_index = batch_detections[:, 4] > self.confidence
            top_conf = batch_detections[top_index, 4]
            top_label = np.array(batch_detections[top_index, -1], np.int32)
            top_bboxes = np.array(batch_detections[top_index, :4])
            top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(
                top_bboxes[:, 0],
                -1), np.expand_dims(top_bboxes[:, 1], -1), np.expand_dims(
                    top_bboxes[:, 2],
                    -1), np.expand_dims(top_bboxes[:, 3], -1)

            #-----------------------------------------------------------#
            #   去掉灰条部分
            #-----------------------------------------------------------#
            boxes = efficientdet_correct_boxes(
                top_ymin, top_xmin, top_ymax, top_xmax,
                np.array([image_sizes[self.phi], image_sizes[self.phi]]),
                image_shape)

        for i, c in enumerate(top_label):
            predicted_class = self.class_names[c]
            score = str(top_conf[i])

            top, left, bottom, right = boxes[i]
            f.write("%s %s %s %s %s %s\n" %
                    (predicted_class, score[:6], str(int(left)), str(
                        int(top)), str(int(right)), str(int(bottom))))

        f.close()
        return
Ejemplo n.º 21
0
    def get_FPS(self, image, test_interval):
        image_shape = np.array(np.shape(image)[0:2])
        #---------------------------------------------------------#
        #   给图像增加灰条,实现不失真的resize
        #---------------------------------------------------------#
        crop_img = np.array(
            letterbox_image(image,
                            (image_sizes[self.phi], image_sizes[self.phi])))
        photo = np.array(crop_img, dtype=np.float32)
        photo = np.transpose(preprocess_input(photo), (2, 0, 1))

        with torch.no_grad():
            images = torch.from_numpy(np.asarray([photo]))
            if self.cuda:
                images = images.cuda()

            _, regression, classification, anchors = self.net(images)

            regression = decodebox(regression, anchors, images)
            detection = torch.cat([regression, classification], axis=-1)
            batch_detections = non_max_suppression(detection,
                                                   len(self.class_names),
                                                   conf_thres=self.confidence,
                                                   nms_thres=self.iou)
            try:
                batch_detections = batch_detections[0].cpu().numpy()
                top_index = batch_detections[:, 4] > self.confidence
                top_conf = batch_detections[top_index, 4]
                top_label = np.array(batch_detections[top_index, -1], np.int32)
                top_bboxes = np.array(batch_detections[top_index, :4])
                top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(
                    top_bboxes[:, 0],
                    -1), np.expand_dims(top_bboxes[:, 1], -1), np.expand_dims(
                        top_bboxes[:, 2],
                        -1), np.expand_dims(top_bboxes[:, 3], -1)

                boxes = efficientdet_correct_boxes(
                    top_ymin, top_xmin, top_ymax, top_xmax,
                    np.array([image_sizes[self.phi], image_sizes[self.phi]]),
                    image_shape)
            except:
                pass

        t1 = time.time()
        for _ in range(test_interval):
            with torch.no_grad():
                _, regression, classification, anchors = self.net(images)

                regression = decodebox(regression, anchors, images)
                detection = torch.cat([regression, classification], axis=-1)
                batch_detections = non_max_suppression(
                    detection,
                    len(self.class_names),
                    conf_thres=self.confidence,
                    nms_thres=self.iou)
                try:
                    batch_detections = batch_detections[0].cpu().numpy()
                    top_index = batch_detections[:, 4] > self.confidence
                    top_conf = batch_detections[top_index, 4]
                    top_label = np.array(batch_detections[top_index, -1],
                                         np.int32)
                    top_bboxes = np.array(batch_detections[top_index, :4])
                    top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(
                        top_bboxes[:, 0], -1), np.expand_dims(
                            top_bboxes[:, 1], -1), np.expand_dims(
                                top_bboxes[:, 2],
                                -1), np.expand_dims(top_bboxes[:, 3], -1)

                    boxes = efficientdet_correct_boxes(
                        top_ymin, top_xmin, top_ymax, top_xmax,
                        np.array(
                            [image_sizes[self.phi], image_sizes[self.phi]]),
                        image_shape)
                except:
                    pass

        t2 = time.time()
        tact_time = (t2 - t1) / test_interval
        return tact_time
Ejemplo n.º 22
0
def test(model, fetcher, conf_thres=1e-3, nms_thres=0.5):
    model.eval()
    val_loss = 0
    classes = fetcher.loader.dataset.classes
    num_classes = len(classes)
    seen = 0
    s = ('%20s' + '%10s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R', 'mAP',
                                 'F1')
    p, r, f1, mp, mr, mAP, mf1 = 0., 0., 0., 0., 0., 0., 0.
    jdict, stats, ap, ap_class = [], [], [], []
    pbar = tqdm(enumerate(fetcher), total=len(fetcher))
    for idx, (imgs, targets) in pbar:
        _, _, height, width = imgs.shape  # batch size, channels, height, width

        # Run model
        inf_out, train_out = model(imgs)  # inference and training outputs

        # Compute loss
        val_loss += compute_loss(train_out, targets,
                                 model).item()  # GIoU, obj, cls

        # Run NMS
        output = non_max_suppression(inf_out,
                                     conf_thres=conf_thres,
                                     nms_thres=nms_thres)
        # Plot images with bounding boxes
        if idx == 0:
            show_batch(imgs, output)

        # Statistics per image
        for si, pred in enumerate(output):
            labels = targets[targets[:, 0] == si, 1:]
            nl = len(labels)
            tcls = labels[:, 0].tolist() if nl else []  # target class
            seen += 1

            if pred is None:
                if nl:
                    stats.append(([], torch.Tensor(), torch.Tensor(), tcls))
                continue

            # Clip boxes to image bounds
            clip_coords(pred, (height, width))

            # Assign all predictions as incorrect
            correct = [0] * len(pred)
            if nl:
                detected = []
                tcls_tensor = labels[:, 0]

                # target boxes
                tbox = xywh2xyxy(labels[:, 1:5])
                tbox[:, [0, 2]] *= width
                tbox[:, [1, 3]] *= height

                # Search for correct predictions
                for i, (*pbox, pconf, pcls_conf, pcls) in enumerate(pred):

                    # Break if all targets already located in image
                    if len(detected) == nl:
                        break

                    # Continue if predicted class not among image classes
                    if pcls.item() not in tcls:
                        continue

                    # Best iou, index between pred and targets
                    m = (pcls == tcls_tensor).nonzero().view(-1)
                    iou, bi = bbox_iou(pbox, tbox[m]).max(0)

                    # If iou > threshold and class is correct mark as correct
                    if iou > 0.5 and m[
                            bi] not in detected:  # and pcls == tcls[bi]:
                        correct[i] = 1
                        detected.append(m[bi])

            # Append statistics (correct, conf, pcls, tcls)
            stats.append(
                (correct, pred[:,
                               4].cpu().numpy(), pred[:,
                                                      6].cpu().numpy(), tcls))
        pbar.set_description('loss: %8g' % (val_loss / (idx + 1)))

    # Compute statistics
    stats = [np.concatenate(x, 0) for x in list(zip(*stats))]

    # sync stats
    if dist.is_initialized():
        for i in range(len(stats)):
            stat = torch.FloatTensor(stats[i]).to(device)
            ls = torch.IntTensor([len(stat)]).to(device)
            ls_list = [
                torch.IntTensor([0]).to(device)
                for _ in range(dist.get_world_size())
            ]
            dist.all_gather(ls_list, ls)
            ls_list = [ls_item.item() for ls_item in ls_list]
            max_ls = max(ls_list)
            if len(stat) < max_ls:
                stat = torch.cat(
                    [stat, torch.zeros(max_ls - len(stat)).to(device)])
            stat_list = [
                torch.zeros(max_ls).to(device)
                for _ in range(dist.get_world_size())
            ]
            dist.all_gather(stat_list, stat)
            stat_list = [
                stat_list[si][:ls_list[si]]
                for si in range(dist.get_world_size()) if ls_list[si] > 0
            ]
            stat = torch.cat(stat_list)
            stats[i] = stat.cpu().numpy()

    if len(stats):
        p, r, ap, f1, ap_class = ap_per_class(*stats)
        mp, mr, mAP, mf1 = p.mean(), r.mean(), ap.mean(), f1.mean()
        nt = np.bincount(stats[3].astype(np.int64),
                         minlength=num_classes)  # number of targets per class
    else:
        nt = torch.zeros(1)

    # Print results
    pf = '%20s' + '%10.3g' * 6  # print format
    print(pf % ('all', seen, nt.sum(), mp, mr, mAP, mf1))

    # Print results per class
    for i, c in enumerate(ap_class):
        print(pf % (classes[c], seen, nt[c], p[i], r[i], ap[i], f1[i]))
    # Return results
    mAPs = np.zeros(num_classes) + mAP
    for i, c in enumerate(ap_class):
        mAPs[c] = ap[i]
    # return (mp, mr, mAP, mf1, *(loss / len(dataloader)).tolist()), mAPs
    return mAP
Ejemplo n.º 23
0
    def detect_image(self, image_id, image):
        self.confidence = 0.01
        self.iou = 0.5
        f = open("./input/detection-results/" + image_id + ".txt", "w")
        image_shape = np.array(np.shape(image)[0:2])

        #---------------------------------------------------------#
        #   给图像增加灰条,实现不失真的resize
        #   也可以直接resize进行识别
        #---------------------------------------------------------#
        if self.letterbox_image:
            crop_img = np.array(
                letterbox_image(
                    image,
                    (self.model_image_size[1], self.model_image_size[0])))
        else:
            crop_img = image.convert('RGB')
            crop_img = crop_img.resize(
                (self.model_image_size[1], self.model_image_size[0]),
                Image.BICUBIC)
        photo = np.array(crop_img, dtype=np.float32) / 255.0
        photo = np.transpose(photo, (2, 0, 1))
        #---------------------------------------------------------#
        #   添加上batch_size维度
        #---------------------------------------------------------#
        images = [photo]

        with torch.no_grad():
            images = torch.from_numpy(np.asarray(images))
            if self.cuda:
                images = images.cuda()

            #---------------------------------------------------------#
            #   将图像输入网络当中进行预测!
            #---------------------------------------------------------#
            outputs = self.net(images)
            output_list = []
            for i in range(3):
                output_list.append(self.yolo_decodes[i](outputs[i]))

            #---------------------------------------------------------#
            #   将预测框进行堆叠,然后进行非极大抑制
            #---------------------------------------------------------#
            output = torch.cat(output_list, 1)
            batch_detections = non_max_suppression(output,
                                                   len(self.class_names),
                                                   conf_thres=self.confidence,
                                                   nms_thres=self.iou)

            #---------------------------------------------------------#
            #   如果没有检测出物体,返回原图
            #---------------------------------------------------------#
            try:
                batch_detections = batch_detections[0].cpu().numpy()
            except:
                return

            #---------------------------------------------------------#
            #   对预测框进行得分筛选
            #---------------------------------------------------------#
            top_index = batch_detections[:,
                                         4] * batch_detections[:,
                                                               5] > self.confidence
            top_conf = batch_detections[top_index,
                                        4] * batch_detections[top_index, 5]
            top_label = np.array(batch_detections[top_index, -1], np.int32)
            top_bboxes = np.array(batch_detections[top_index, :4])
            top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(
                top_bboxes[:, 0],
                -1), np.expand_dims(top_bboxes[:, 1], -1), np.expand_dims(
                    top_bboxes[:, 2],
                    -1), np.expand_dims(top_bboxes[:, 3], -1)

            #-----------------------------------------------------------------#
            #   在图像传入网络预测前会进行letterbox_image给图像周围添加灰条
            #   因此生成的top_bboxes是相对于有灰条的图像的
            #   我们需要对其进行修改,去除灰条的部分。
            #-----------------------------------------------------------------#
            if self.letterbox_image:
                boxes = yolo_correct_boxes(
                    top_ymin, top_xmin, top_ymax, top_xmax,
                    np.array(
                        [self.model_image_size[0], self.model_image_size[1]]),
                    image_shape)
            else:
                top_xmin = top_xmin / self.model_image_size[1] * image_shape[1]
                top_ymin = top_ymin / self.model_image_size[0] * image_shape[0]
                top_xmax = top_xmax / self.model_image_size[1] * image_shape[1]
                top_ymax = top_ymax / self.model_image_size[0] * image_shape[0]
                boxes = np.concatenate(
                    [top_ymin, top_xmin, top_ymax, top_xmax], axis=-1)

        for i, c in enumerate(top_label):
            predicted_class = self.class_names[c]
            score = str(top_conf[i])

            top, left, bottom, right = boxes[i]
            f.write("%s %s %s %s %s %s\n" %
                    (predicted_class, score[:6], str(int(left)), str(
                        int(top)), str(int(right)), str(int(bottom))))

        f.close()
        return
Ejemplo n.º 24
0
def stream(cfg,
           classes_file,
           weights,
           socket_ip,
           socket_port,
           image_size=128,
           confidence_threshold=0.6,
           nms_thres=0.5):
    print('+ Initializing model')
    model = Darknet(cfg, image_size)
    print('+ Loading model')
    load_darknet_weights(model, weights)
    print('+ Fusing model')
    model.fuse()
    print('+ Loading model to CPU')
    model.to('cpu').eval()
    print('+ Loading webcam')
    cap = LoadKinect(img_size=image_size)
    print('+ Loading classes')
    classes = load_classes(classes_file)
    colors = [[random.randint(0, 255) for _ in range(3)]
              for _ in range(len(classes))]
    print('+ Connecting to remote socket')
    global sock
    sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    sock.connect((socket_ip, socket_port))
    print('+ Enumerating cam')
    for counter, (path, img, im0, vid_cap) in enumerate(cap):
        t = time.time()

        print('+ Loading image to CPU')
        img = torch.from_numpy(img).unsqueeze(0).to('cpu')
        pred, _ = model(img)
        print('+ Detecting objects')
        det = non_max_suppression(pred, confidence_threshold, nms_thres)[0]

        if det is not None and len(det) > 0:
            detected_classes = []
            print('+ Rescaling model')
            det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                      im0.shape).round()

            print('+ Reading depth')

            depth = get_depth()
            depth_swap = np.swapaxes(depth, 0, 1)

            depth_strip1d = np.array([
                np.sort(stripe)[100] for stripe in depth_swap
            ]).astype(np.uint8)
            depth_strip2d_swap = np.array([
                np.ones(depth_swap.shape[1]) * depth for depth in depth_strip1d
            ]).astype(np.uint8)
            depth_strip2d = np.swapaxes(depth_strip2d_swap, 0, 1)

            depth_edge1d = np.zeros(depth_strip1d.shape)

            state = False
            for counter, _ in np.ndenumerate(depth_edge1d[:-1]):
                state = True if not state and depth_strip1d[
                    counter] < 230 else False
                depth_edge1d[counter[0]] = not state

            state = False
            state_cnt = 0
            for counter, _ in np.ndenumerate(depth_edge1d[:-1]):
                counter = counter[0]
                if depth_edge1d[counter] == state:
                    state_cnt += 1
                else:
                    if state_cnt < 10:
                        for r in range(max(0, counter - 10), counter):
                            depth_edge1d[counter] = state
                    state_cnt = 0
                    state = depth_edge1d[counter]

            depth_edge1d = depth_edge1d * 255

            depth_edge2d_swap = np.array([
                np.ones(100) * awddawd for awddawd in depth_edge1d
            ]).astype(np.uint8)
            depth_edge2d = np.swapaxes(depth_edge2d_swap, 0, 1)

            for *coordinates, conf, cls_conf, cls in det:
                if classes[int(cls)] in RISKY_CLASSES:
                    label = '%s %.2f' % (classes[int(cls)], conf)
                    plot_one_box(coordinates,
                                 im0,
                                 label=label,
                                 color=colors[int(cls)])
                    print(f"+ Detected {classes[int(cls)]}")
                    x_avg_depth = np.mean(depth[coordinates[0] -
                                                5:coordinates[0] + 5])
                    y_avg_depth = np.mean(depth[coordinates[1] -
                                                5:coordinates[1] + 5])
                    detected_classes.append({
                        classes[int(cls)]: {
                            'x': coordinates[0],
                            'y': coordinates[1],
                            'z':
                            np.average(np.array([x_avg_depth, y_avg_depth]))
                        }
                    })

            n = []
            for counter in detected_classes:
                width = im0.shape[1]
                x, y, z = counter[list(counter.keys())[0]].values()
                phi = (x / width * 2 - 1) * (CAMERA_FOV / 2)
                n.append(f"{list(counter.keys())[0]};{phi};{z}|")
            sock.send(''.join(str(x) for x in n)[:-1].encode('utf-8'))
        print('+ Cycle took %.3fs' % (time.time() - t))
        plt.imshow(bgr_to_rgb(im0))
        plt.show(block=False)
        plt.pause(.001)
Ejemplo n.º 25
0
def test(
        model,
        dataloader,
        iou_thres=0.5,
        conf_thres=0.3,
        nms_thres=0.45,
        print_interval=40,
):
    
    
    nC = 1
    mean_mAP, mean_R, mean_P, seen = 0.0, 0.0, 0.0, 0
    print('%11s' * 5 % ('Image', 'Total', 'P', 'R', 'mAP'))
    outputs, mAPs, mR, mP, TP, confidence, pred_class, target_class, jdict = \
        [], [], [], [], [], [], [], [], []
    AP_accum, AP_accum_count = np.zeros(nC), np.zeros(nC)
    for batch_i, (imgs, targets, paths, shapes, targets_len) in enumerate(dataloader):
        t = time.time()
        out = model(imgs.cuda())
        # out = model(imgs)
        output = []
        for i,o in enumerate(out):
            boxes = xyxy2xywh(o['boxes']).cpu()
            scores = o['scores'].cpu().view(-1,1)
            labels = o['labels'].cpu().view(-1,1).float()
            output.append(torch.Tensor(torch.cat((boxes,scores,scores,labels),dim=1)))
        output = non_max_suppression(output, conf_thres=conf_thres, nms_thres=nms_thres)
        for i, o in enumerate(output):
            if o is not None:
                output[i] = o[:, :6]

        # Compute average precision for each sample
        targets = [targets[i][:int(l)] for i,l in enumerate(targets_len)]
        for si, (labels, detections) in enumerate(zip(targets, output)):
            seen += 1

            if detections is None:
                # If there are labels but no detections mark as zero AP
                if labels.size(0) != 0:
                    mAPs.append(0), mR.append(0), mP.append(0)
                continue

            # Get detections sorted by decreasing confidence scores
            detections = detections.cpu().numpy()
            detections = detections[np.argsort(-detections[:, 4])]


            # If no labels add number of detections as incorrect
            correct = []
            if labels.size(0) == 0:
                # correct.extend([0 for _ in range(len(detections))])
                mAPs.append(0), mR.append(0), mP.append(0)
                continue
            else:
                target_cls = torch.zeros_like(labels[:, 0])
                target_boxes = labels[:, 2:6]

                detected = []
                for *pred_bbox, conf, obj_conf  in detections:
                    obj_pred = 0
                    pred_bbox = torch.FloatTensor(pred_bbox).view(1, -1)
                    # Compute iou with target boxes
                    iou = bbox_iou(pred_bbox, target_boxes, x1y1x2y2=True)[0]
                    # Extract index of largest overlap
                    best_i = np.argmax(iou)
                    # If overlap exceeds threshold and classification is correct mark as correct
                    if iou[best_i] > iou_thres and best_i not in detected:
                        correct.append(1)
                        detected.append(best_i)
                    else:
                        correct.append(0)

            # Compute Average Precision (AP) per class
            AP, AP_class, R, P = ap_per_class(tp=correct,
                                              conf=detections[:, 4],
                                              pred_cls=np.zeros_like(detections[:, 5]), # detections[:, 6]
                                              target_cls=target_cls)

            # Accumulate AP per class
            AP_accum_count += np.bincount(AP_class, minlength=nC)
            AP_accum += np.bincount(AP_class, minlength=nC, weights=AP)

            # Compute mean AP across all classes in this image, and append to image list
            mAPs.append(AP.mean())
            mR.append(R.mean())
            mP.append(P.mean())

            # Means of all images
            mean_mAP = np.sum(mAPs) / ( AP_accum_count + 1E-16)
            mean_R = np.sum(mR) / ( AP_accum_count + 1E-16)
            mean_P = np.sum(mP) / (AP_accum_count + 1E-16)

        if batch_i % print_interval==0:
            # Print image mAP and running mean mAP
            print(('%11s%11s' + '%11.3g' * 4 + 's') %
                  (seen, dataloader.dataset.nF, mean_P, mean_R, mean_mAP, time.time() - t))
    # Print mAP per class
    print('%11s' * 5 % ('Image', 'Total', 'P', 'R', 'mAP'))

    print('AP: %-.4f\n\n' % (AP_accum[0] / (AP_accum_count[0] + 1E-16)))

    # Return mAP
    return mean_mAP, mean_R, mean_P
Ejemplo n.º 26
0
    def detect_image(self, image):
        image_shape = np.array(np.shape(image)[0:2])

        crop_img = np.array(
            letterbox_image(
                image, (self.model_image_size[0], self.model_image_size[1])))
        photo = np.array(crop_img, dtype=np.float32)
        photo /= 255.0  # 归一化
        photo = np.transpose(photo, (2, 0, 1))
        photo = photo.astype(np.float32)
        images = []
        images.append(photo)
        images = np.asarray(images)

        with torch.no_grad():
            images = torch.from_numpy(images)
            if self.cuda:
                images = images.cuda()
            outputs = self.net(images)

        output_list = []
        for i in range(3):
            output_list.append(self.yolo_decodes[i](outputs[i]))
        output = torch.cat(output_list, 1)
        batch_detections = non_max_suppression(output,
                                               len(self.class_names),
                                               conf_thres=self.confidence,
                                               nms_thres=0.3)
        try:
            batch_detections = batch_detections[0].cpu().numpy()
        except:
            return []

        top_index = batch_detections[:,
                                     4] * batch_detections[:,
                                                           5] > self.confidence
        top_conf = batch_detections[top_index, 4] * batch_detections[top_index,
                                                                     5]
        top_label = np.array(batch_detections[top_index, -1], np.int32)
        top_bboxes = np.array(batch_detections[top_index, :4])
        top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(
            top_bboxes[:, 0],
            -1), np.expand_dims(top_bboxes[:, 1], -1), np.expand_dims(
                top_bboxes[:, 2], -1), np.expand_dims(top_bboxes[:, 3], -1)

        boxes = yolo_correct_boxes(
            top_ymin, top_xmin, top_ymax, top_xmax,
            np.array([self.model_image_size[0], self.model_image_size[1]]),
            image_shape)

        l = []
        for i, c in enumerate(top_label):
            predicted_class = self.class_names[c]
            score = top_conf[i]

            top, left, bottom, right = boxes[i]
            top = top - 5
            left = left - 5
            bottom = bottom + 5
            right = right + 5

            top = max(0, np.floor(top + 0.5).astype('int32'))
            left = max(0, np.floor(left + 0.5).astype('int32'))
            bottom = min(
                np.shape(image)[0],
                np.floor(bottom + 0.5).astype('int32'))
            right = min(
                np.shape(image)[1],
                np.floor(right + 0.5).astype('int32'))
            l.append([left, top, right, bottom, score, predicted_class])

        return l
Ejemplo n.º 27
0
def test(cfg,
         data,
         weights=None,
         batch_size=16,
         img_size=608,
         iou_thres=0.5,
         conf_thres=0.001,
         nms_thres=0.5,
         save_json=True,
         hyp=None,
         model=None,
         single_cls=False):
    """test the metrics of the trained model

    :param str cfg: model cfg file
    :param str data: data dict
    :param str weights: weights path
    :param int batch_size: batch size
    :param int img_size: image size
    :param float iou_thres: iou threshold
    :param float conf_thres: confidence threshold
    :param float nms_thres: nms threshold
    :param bool save_json: Whether to save the model
    :param str hyp: hyperparameter
    :param str model: yolov4 model
    :param bool single_cls: only one class
    :return: results
    """

    if model is None:
        device = select_device(opt.device)
        verbose = False
        # Initialize model
        model = Model(cfg, img_size).to(device)
        # Load weights
        if weights.endswith('.pt'):
            checkpoint = torch.load(weights, map_location=device)
            state_dict = intersect_dicts(checkpoint['model'],
                                         model.state_dict())
            model.load_state_dict(state_dict, strict=False)
        elif len(weights) > 0:
            load_darknet_weights(model, weights)
        print(f'Loaded weights from {weights}!')

        if torch.cuda.device_count() > 1:
            model = nn.DataParallel(model)
    else:
        device = next(model.parameters()).device
        verbose = False

    test_path = data['valid']
    num_classes, names = (1, ['item']) if single_cls else (int(
        data['num_classes']), data['names'])

    # Dataloader
    dataset = LoadImagesAndLabels(test_path, img_size, batch_size, hyp=hyp)
    dataloader = torch.utils.data.DataLoader(dataset,
                                             batch_size=batch_size,
                                             num_workers=8,
                                             pin_memory=True,
                                             collate_fn=dataset.collate_fn)

    seen = 0
    model.eval()
    coco91class = coco80_to_coco91_class()
    output_format = ('%20s' + '%10s' * 6) % ('Class', 'Images', 'Targets',
                                             'Pre', 'Rec', 'mAP', 'F1')
    precision, recall, f_1, mean_pre, mean_rec, mean_ap, mf1 = 0., 0., 0., 0., 0., 0., 0.
    loss = torch.zeros(3)
    json_dict, stats, aver_pre, ap_class = [], [], [], []
    for batch_i, (imgs, targets, paths,
                  shapes) in enumerate(tqdm(dataloader, desc=output_format)):
        targets = targets.to(device)
        imgs = imgs.to(device) / 255.0
        _, _, height, width = imgs.shape  # batch size, channels, height, width

        # Plot images with bounding boxes
        if batch_i == 0 and not os.path.exists('test_batch0.jpg'):
            plot_images(imgs=imgs,
                        targets=targets,
                        paths=paths,
                        fname='test_batch0.jpg')

        with torch.no_grad():
            inference_output, train_output = model(imgs)

            if hasattr(model, 'hyp'):  # if model has loss hyperparameters
                loss += compute_loss(train_output, targets,
                                     model)[1][:3].cpu()  # GIoU, obj, cls

            output = non_max_suppression(inference_output,
                                         conf_thres=conf_thres,
                                         nms_thres=nms_thres)

        # Statistics per image
        for i, pred in enumerate(output):
            labels = targets[targets[:, 0] == i, 1:]
            num_labels = len(labels)
            target_class = labels[:, 0].tolist() if num_labels else []
            seen += 1

            if pred is None:
                if num_labels:
                    stats.append(
                        ([], torch.Tensor(), torch.Tensor(), target_class))
                continue

            # Append to pycocotools JSON dictionary
            if save_json:
                # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ...
                image_id = int(Path(paths[i]).stem.split('_')[-1])
                box = pred[:, :4].clone()  # xyxy
                scale_coords(imgs[i].shape[1:], box,
                             shapes[i][0])  # to original shape
                box = xyxy2xywh(box)  # xywh
                box[:, :2] -= box[:, 2:] / 2  # xy center to top-left corner
                for det_i, det in enumerate(pred):
                    json_dict.append({
                        'image_id':
                        image_id,
                        'category_id':
                        coco91class[int(det[6])],
                        'bbox':
                        [float(format(x, '.%gf' % 3)) for x in box[det_i]],
                        'score':
                        float(format(det[4], '.%gf' % 5))
                    })

            # Clip boxes to image bounds
            clip_coords(pred, (height, width))

            # Assign all predictions as incorrect
            correct = [0] * len(pred)
            if num_labels:
                detected = []
                tcls_tensor = labels[:, 0]

                # target boxes
                tbox = xywh2xyxy(labels[:, 1:5])
                tbox[:, [0, 2]] *= width
                tbox[:, [1, 3]] *= height

                # Search for correct predictions
                for j, (*pbox, _, _, pcls) in enumerate(pred):

                    # Break if all targets already located in image
                    if len(detected) == num_labels:
                        break

                    # Continue if predicted class not among image classes
                    if pcls.item() not in target_class:
                        continue

                    # Best iou, index between pred and targets
                    mask = (pcls == tcls_tensor).nonzero(
                        as_tuple=False).view(-1)
                    iou, best_iou = bbox_iou(pbox, tbox[mask]).max(0)

                    # If iou > threshold and class is correct mark as correct
                    if iou > iou_thres and mask[
                            best_iou] not in detected:  # and pcls == target_class[bi]:
                        correct[j] = 1
                        detected.append(mask[best_iou])

            # Append statistics (correct, conf, pcls, target_class)
            stats.append(
                (correct, pred[:, 4].cpu(), pred[:, 6].cpu(), target_class))

    # Compute statistics
    stats = [np.concatenate(x, 0) for x in list(zip(*stats))]
    if len(stats):
        precision, recall, aver_pre, f_1, ap_class = ap_per_class(*stats)
        mean_pre, mean_rec, mean_ap, mf1 = precision.mean(), recall.mean(
        ), aver_pre.mean(), f_1.mean()
        num_targets = np.bincount(
            stats[3].astype(np.int64),
            minlength=num_classes)  # number of targets per class
    else:
        num_targets = torch.zeros(1)

    # Print results
    print_format = '%20s' + '%10.3g' * 6
    print(print_format %
          ('all', seen, num_targets.sum(), mean_pre, mean_rec, mean_ap, mf1))

    # Print results per class
    if verbose and num_classes > 1 and stats:
        for i, class_ in enumerate(ap_class):
            print(print_format %
                  (names[class_], seen, num_targets[class_], precision[i],
                   recall[i], aver_pre[i], f_1[i]))

    # Save JSON
    if save_json and mean_ap and json_dict:
        try:
            img_ids = [
                int(Path(x).stem.split('_')[-1]) for x in dataset.img_files
            ]
            with open('results.json', 'w') as file:
                json.dump(json_dict, file)

            # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb
            cocogt = COCO('data/coco/annotations/instances_val2017.json'
                          )  # initialize COCO ground truth api
            cocodt = cocogt.loadRes('results.json')  # initialize COCO pred api

            cocoeval = COCOeval(cocogt, cocodt, 'bbox')
            cocoeval.params.imgIds = img_ids  # [:32]  # only evaluate these images
            cocoeval.evaluate()
            cocoeval.accumulate()
            cocoeval.summarize()
            mean_ap = cocoeval.stats[1]  # update mAP to pycocotools mAP
        except ImportError:
            print(
                'WARNING: missing dependency pycocotools from requirements.txt. Can not compute official COCO mAP.'
            )

    # Return results
    maps = np.zeros(num_classes) + mean_ap
    for i, class_ in enumerate(ap_class):
        maps[class_] = aver_pre[i]
    return (mean_pre, mean_rec, mean_ap, mf1,
            *(loss / len(dataloader)).tolist()), maps
Ejemplo n.º 28
0
    def generate_box(self, image):
        image_shape = np.array(np.shape(image)[0:2])

        crop_img = np.array(letterbox_image(image, (self.model_image_size[0], self.model_image_size[1])))
        photo = np.array(crop_img, dtype=np.float32)
        photo /= 255.0
        photo = np.transpose(photo, (2, 0, 1))
        photo = photo.astype(np.float32)
        images = []
        images.append(photo)

        images = np.asarray(images)
        images = torch.from_numpy(images)
        if self.cuda:
            images = images.cuda()

        with torch.no_grad():
            outputs = self.net(images)
            output_list = []
            for i in range(3):
                output_list.append(self.yolo_decodes[i](outputs[i]))
            output = torch.cat(output_list, 1)
            batch_detections = non_max_suppression(output, self.config["yolo"]["classes"],
                                                   conf_thres=self.confidence,
                                                   nms_thres=0.3)
        try:
            batch_detections = batch_detections[0].cpu().numpy()
        except:
            boxlist = []
            return boxlist
        top_index = batch_detections[:, 4] * batch_detections[:, 5] > self.confidence
        top_conf = batch_detections[top_index, 4] * batch_detections[top_index, 5]
        top_label = np.array(batch_detections[top_index, -1], np.int32)
        top_bboxes = np.array(batch_detections[top_index, :4])
        top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(top_bboxes[:, 0], -1), np.expand_dims(top_bboxes[:, 1],
                                                                                                      -1), np.expand_dims(
            top_bboxes[:, 2], -1), np.expand_dims(top_bboxes[:, 3], -1)

        # 去掉灰条
        boxes = yolo_correct_boxes(top_ymin, top_xmin, top_ymax, top_xmax,
                                   np.array([self.model_image_size[0], self.model_image_size[1]]), image_shape)

        font = ImageFont.truetype(font='model_data/simhei.ttf',
                                  size=np.floor(3e-2 * np.shape(image)[1] + 0.5).astype('int32'))

        thickness = (np.shape(image)[0] + np.shape(image)[1]) // self.model_image_size[0]

        boxlist = []
        for i, c in enumerate(top_label):
            top, left, bottom, right = boxes[i]
            top = top - 5
            left = left - 5
            bottom = bottom + 5
            right = right + 5

            top = max(0, np.floor(top + 0.5).astype('int32'))
            left = max(0, np.floor(left + 0.5).astype('int32'))
            bottom = min(np.shape(image)[0], np.floor(bottom + 0.5).astype('int32'))
            right = min(np.shape(image)[1], np.floor(right + 0.5).astype('int32'))

            box_str = str(left) + ',' + str(top) + ',' + str(right) + ',' + str(bottom)
            boxlist.append(box_str)


        return boxlist
Ejemplo n.º 29
0
    def get_FPS(self, image, test_interval):
        image_shape = np.array(np.shape(image)[0:2])
        if self.letterbox_image:
            crop_img = np.array(
                letterbox_image(
                    image,
                    (self.model_image_size[1], self.model_image_size[0])))
        else:
            crop_img = image.convert('RGB')
            crop_img = crop_img.resize(
                (self.model_image_size[1], self.model_image_size[0]),
                Image.BICUBIC)
        photo = np.array(crop_img, dtype=np.float32) / 255.0
        photo = np.transpose(photo, (2, 0, 1))
        images = [photo]

        with torch.no_grad():
            images = torch.from_numpy(np.asarray(images))
            if self.cuda:
                images = images.cuda()

            outputs = self.net(images)
            output_list = []
            for i in range(2):
                output_list.append(self.yolo_decodes[i](outputs[i]))

            output = torch.cat(output_list, 1)
            batch_detections = non_max_suppression(output,
                                                   len(self.class_names),
                                                   conf_thres=self.confidence,
                                                   nms_thres=self.iou)
            try:
                batch_detections = batch_detections[0].cpu().numpy()
                top_index = batch_detections[:,
                                             4] * batch_detections[:,
                                                                   5] > self.confidence
                top_conf = batch_detections[top_index,
                                            4] * batch_detections[top_index, 5]
                top_label = np.array(batch_detections[top_index, -1], np.int32)
                top_bboxes = np.array(batch_detections[top_index, :4])
                top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(
                    top_bboxes[:, 0],
                    -1), np.expand_dims(top_bboxes[:, 1], -1), np.expand_dims(
                        top_bboxes[:, 2],
                        -1), np.expand_dims(top_bboxes[:, 3], -1)

                if self.letterbox_image:
                    boxes = yolo_correct_boxes(
                        top_ymin, top_xmin, top_ymax, top_xmax,
                        np.array([
                            self.model_image_size[0], self.model_image_size[1]
                        ]), image_shape)
                else:
                    top_xmin = top_xmin / self.model_image_size[
                        1] * image_shape[1]
                    top_ymin = top_ymin / self.model_image_size[
                        0] * image_shape[0]
                    top_xmax = top_xmax / self.model_image_size[
                        1] * image_shape[1]
                    top_ymax = top_ymax / self.model_image_size[
                        0] * image_shape[0]
                    boxes = np.concatenate(
                        [top_ymin, top_xmin, top_ymax, top_xmax], axis=-1)
            except:
                pass

        t1 = time.time()
        for _ in range(test_interval):
            with torch.no_grad():
                outputs = self.net(images)
                output_list = []
                for i in range(2):
                    output_list.append(self.yolo_decodes[i](outputs[i]))

                output = torch.cat(output_list, 1)
                batch_detections = non_max_suppression(
                    output,
                    len(self.class_names),
                    conf_thres=self.confidence,
                    nms_thres=self.iou)
                try:
                    batch_detections = batch_detections[0].cpu().numpy()
                    top_index = batch_detections[:,
                                                 4] * batch_detections[:,
                                                                       5] > self.confidence
                    top_conf = batch_detections[
                        top_index, 4] * batch_detections[top_index, 5]
                    top_label = np.array(batch_detections[top_index, -1],
                                         np.int32)
                    top_bboxes = np.array(batch_detections[top_index, :4])
                    top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(
                        top_bboxes[:, 0], -1), np.expand_dims(
                            top_bboxes[:, 1], -1), np.expand_dims(
                                top_bboxes[:, 2],
                                -1), np.expand_dims(top_bboxes[:, 3], -1)

                    if self.letterbox_image:
                        boxes = yolo_correct_boxes(
                            top_ymin, top_xmin, top_ymax, top_xmax,
                            np.array([
                                self.model_image_size[0],
                                self.model_image_size[1]
                            ]), image_shape)
                    else:
                        top_xmin = top_xmin / self.model_image_size[
                            1] * image_shape[1]
                        top_ymin = top_ymin / self.model_image_size[
                            0] * image_shape[0]
                        top_xmax = top_xmax / self.model_image_size[
                            1] * image_shape[1]
                        top_ymax = top_ymax / self.model_image_size[
                            0] * image_shape[0]
                        boxes = np.concatenate(
                            [top_ymin, top_xmin, top_ymax, top_xmax], axis=-1)
                except:
                    pass
        t2 = time.time()
        tact_time = (t2 - t1) / test_interval
        return tact_time
Ejemplo n.º 30
0
    def detect_image(self, image):
        image_shape = np.array(np.shape(image)[0:2])
        #---------------------------------------------------------#
        #   给图像增加灰条,实现不失真的resize
        #---------------------------------------------------------#
        crop_img = np.array(
            letterbox_image(image, [self.input_shape[1], self.input_shape[0]]))
        photo = np.array(crop_img, dtype=np.float32)
        photo = np.transpose(preprocess_input(photo), (2, 0, 1))

        with torch.no_grad():
            images = torch.from_numpy(np.asarray([photo]))
            if self.cuda:
                images = images.cuda()

            #---------------------------------------------------------#
            #   传入网络当中进行预测
            #---------------------------------------------------------#
            _, regression, classification, anchors = self.net(images)

            #-----------------------------------------------------------#
            #   将预测结果进行解码
            #-----------------------------------------------------------#
            regression = decodebox(regression, anchors, images)
            detection = torch.cat([regression, classification], axis=-1)
            batch_detections = non_max_suppression(detection,
                                                   len(self.class_names),
                                                   conf_thres=self.confidence,
                                                   nms_thres=self.iou)
            #--------------------------------------#
            #   如果没有检测到物体,则返回原图
            #--------------------------------------#
            try:
                batch_detections = batch_detections[0].cpu().numpy()
            except:
                return image

            #-----------------------------------------------------------#
            #   筛选出其中得分高于confidence的框
            #-----------------------------------------------------------#
            top_index = batch_detections[:, 4] > self.confidence
            top_conf = batch_detections[top_index, 4]
            top_label = np.array(batch_detections[top_index, -1], np.int32)
            top_bboxes = np.array(batch_detections[top_index, :4])
            top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(
                top_bboxes[:, 0],
                -1), np.expand_dims(top_bboxes[:, 1], -1), np.expand_dims(
                    top_bboxes[:, 2],
                    -1), np.expand_dims(top_bboxes[:, 3], -1)

            #-----------------------------------------------------------#
            #   去掉灰条部分
            #-----------------------------------------------------------#
            boxes = retinanet_correct_boxes(
                top_ymin, top_xmin, top_ymax, top_xmax,
                np.array([self.input_shape[0], self.input_shape[1]]),
                image_shape)

        font = ImageFont.truetype(font='model_data/simhei.ttf',
                                  size=np.floor(3e-2 * np.shape(image)[1] +
                                                0.5).astype('int32'))

        thickness = max(
            (np.shape(image)[0] + np.shape(image)[1]) // self.input_shape[0],
            1)

        for i, c in enumerate(top_label):
            predicted_class = self.class_names[c]
            score = top_conf[i]

            top, left, bottom, right = boxes[i]
            top = top - 5
            left = left - 5
            bottom = bottom + 5
            right = right + 5

            top = max(0, np.floor(top + 0.5).astype('int32'))
            left = max(0, np.floor(left + 0.5).astype('int32'))
            bottom = min(
                np.shape(image)[0],
                np.floor(bottom + 0.5).astype('int32'))
            right = min(
                np.shape(image)[1],
                np.floor(right + 0.5).astype('int32'))

            # 画框框
            label = '{} {:.2f}'.format(predicted_class, score)
            draw = ImageDraw.Draw(image)
            label_size = draw.textsize(label, font)
            label = label.encode('utf-8')
            print(label, top, left, bottom, right)

            if top - label_size[1] >= 0:
                text_origin = np.array([left, top - label_size[1]])
            else:
                text_origin = np.array([left, top + 1])

            for i in range(thickness):
                draw.rectangle([left + i, top + i, right - i, bottom - i],
                               outline=self.colors[self.class_names.index(
                                   predicted_class)])
            draw.rectangle(
                [tuple(text_origin),
                 tuple(text_origin + label_size)],
                fill=self.colors[self.class_names.index(predicted_class)])
            draw.text(text_origin,
                      str(label, 'UTF-8'),
                      fill=(0, 0, 0),
                      font=font)
            del draw
        return image