Beispiel #1
0
    def get_FPS(self, image, test_interval):
        image_shape = np.array(np.shape(image)[0:2])
        #---------------------------------------------------------#
        #   给图像增加灰条,实现不失真的resize
        #---------------------------------------------------------#
        crop_img = letterbox_image(image,
                                   [self.input_shape[0], self.input_shape[1]])
        #----------------------------------------------------------------------------------#
        #   将RGB转化成BGR,这是因为原始的centernet_hourglass权值是使用BGR通道的图片训练的
        #----------------------------------------------------------------------------------#
        photo = np.array(crop_img, dtype=np.float32)[:, :, ::-1]
        #-----------------------------------------------------------#
        #   图片预处理,归一化。获得的photo的shape为[1, 512, 512, 3]
        #-----------------------------------------------------------#
        photo = np.reshape(
            preprocess_image(photo),
            [1, self.input_shape[0], self.input_shape[1], self.input_shape[2]])

        preds = self.get_pred(photo).numpy()

        if self.nms:
            preds = np.array(nms(preds, self.nms_threhold))

        if len(preds[0]) > 0:
            preds[0][:, 0:4] = preds[0][:, 0:4] / (self.input_shape[0] / 4)

            det_label = preds[0][:, -1]
            det_conf = preds[0][:, -2]
            det_xmin, det_ymin, det_xmax, det_ymax = preds[0][:, 0], preds[
                0][:, 1], preds[0][:, 2], preds[0][:, 3]

            top_indices = [
                i for i, conf in enumerate(det_conf) if conf >= self.confidence
            ]
            top_conf = det_conf[top_indices]
            top_label_indices = det_label[top_indices].tolist()
            top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(
                det_xmin[top_indices],
                -1), np.expand_dims(det_ymin[top_indices], -1), np.expand_dims(
                    det_xmax[top_indices],
                    -1), np.expand_dims(det_ymax[top_indices], -1)

            boxes = centernet_correct_boxes(
                top_ymin, top_xmin, top_ymax, top_xmax,
                np.array([self.input_shape[0], self.input_shape[1]]),
                image_shape)

        t1 = time.time()
        for _ in range(test_interval):
            preds = self.get_pred(photo).numpy()

            if self.nms:
                preds = np.array(nms(preds, self.nms_threhold))

            if len(preds[0]) > 0:
                preds[0][:, 0:4] = preds[0][:, 0:4] / (self.input_shape[0] / 4)

                det_label = preds[0][:, -1]
                det_conf = preds[0][:, -2]
                det_xmin, det_ymin, det_xmax, det_ymax = preds[0][:, 0], preds[
                    0][:, 1], preds[0][:, 2], preds[0][:, 3]

                top_indices = [
                    i for i, conf in enumerate(det_conf)
                    if conf >= self.confidence
                ]
                top_conf = det_conf[top_indices]
                top_label_indices = det_label[top_indices].tolist()
                top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(
                    det_xmin[top_indices], -1), np.expand_dims(
                        det_ymin[top_indices], -1), np.expand_dims(
                            det_xmax[top_indices],
                            -1), np.expand_dims(det_ymax[top_indices], -1)

                boxes = centernet_correct_boxes(
                    top_ymin, top_xmin, top_ymax, top_xmax,
                    np.array([self.input_shape[0], self.input_shape[1]]),
                    image_shape)

        t2 = time.time()
        tact_time = (t2 - t1) / test_interval
        return tact_time
Beispiel #2
0
    def detect_image(self, image):
        #---------------------------------------------------#
        #   对输入图像进行一个备份,后面用于绘图
        #---------------------------------------------------#
        old_image = image.copy()

        image = np.array(image, np.float32)
        im_height, im_width, _ = np.shape(image)

        #---------------------------------------------------#
        #   计算scale,用于将获得的预测框转换成原图的高宽
        #---------------------------------------------------#
        scale = [
            np.shape(image)[1],
            np.shape(image)[0],
            np.shape(image)[1],
            np.shape(image)[0]
        ]
        scale_for_landmarks = [
            np.shape(image)[1],
            np.shape(image)[0],
            np.shape(image)[1],
            np.shape(image)[0],
            np.shape(image)[1],
            np.shape(image)[0],
            np.shape(image)[1],
            np.shape(image)[0],
            np.shape(image)[1],
            np.shape(image)[0]
        ]

        #---------------------------------------------------------#
        #   letterbox_image可以给图像增加灰条,实现不失真的resize
        #---------------------------------------------------------#
        if self.letterbox_image:
            image = letterbox_image(image,
                                    [self.input_shape[1], self.input_shape[0]])
        else:
            self.anchors = Anchors(self.cfg,
                                   image_size=(im_height,
                                               im_width)).get_anchors()

        #-----------------------------------------------------------#
        #   图片预处理,归一化。
        #-----------------------------------------------------------#
        photo = np.expand_dims(preprocess_input(image), 0)

        preds = self.retinaface.predict(photo)
        #-----------------------------------------------------------#
        #   将预测结果进行解码
        #-----------------------------------------------------------#
        results = self.bbox_util.detection_out(
            preds, self.anchors, confidence_threshold=self.confidence)

        #--------------------------------------#
        #   如果没有检测到物体,则返回原图
        #--------------------------------------#
        if len(results) <= 0:
            return old_image

        results = np.array(results)
        #---------------------------------------------------------#
        #   如果使用了letterbox_image的话,要把灰条的部分去除掉。
        #---------------------------------------------------------#
        if self.letterbox_image:
            results = retinaface_correct_boxes(
                results, np.array([self.input_shape[0], self.input_shape[1]]),
                np.array([im_height, im_width]))

        results[:, :4] = results[:, :4] * scale
        results[:, 5:] = results[:, 5:] * scale_for_landmarks

        for b in results:
            text = "{:.4f}".format(b[4])
            b = list(map(int, b))

            # b[0]-b[3]为人脸框的坐标,b[4]为得分
            cv2.rectangle(old_image, (b[0], b[1]), (b[2], b[3]), (0, 0, 255),
                          2)
            cx = b[0]
            cy = b[1] + 12
            cv2.putText(old_image, text, (cx, cy), cv2.FONT_HERSHEY_DUPLEX,
                        0.5, (255, 255, 255))

            print(b[0], b[1], b[2], b[3], b[4])
            # b[5]-b[14]为人脸关键点的坐标
            cv2.circle(old_image, (b[5], b[6]), 1, (0, 0, 255), 4)
            cv2.circle(old_image, (b[7], b[8]), 1, (0, 255, 255), 4)
            cv2.circle(old_image, (b[9], b[10]), 1, (255, 0, 255), 4)
            cv2.circle(old_image, (b[11], b[12]), 1, (0, 255, 0), 4)
            cv2.circle(old_image, (b[13], b[14]), 1, (255, 0, 0), 4)
        return old_image
Beispiel #3
0
    def detect_image(self, image):
        image_shape = np.array(np.shape(image)[0:2])

        crop_img = np.array(
            letterbox_image(
                image, (self.model_image_size[0], self.model_image_size[1])))
        photo = np.array(crop_img, dtype=np.float32)
        photo /= 255.0
        photo = np.transpose(photo, (2, 0, 1))
        photo = photo.astype(np.float32)
        images = []
        images.append(photo)
        images = np.asarray(images)

        with torch.no_grad():
            images = torch.from_numpy(images)
            if self.cuda:
                images = images.cuda()
            outputs = self.net(images)

        output_list = []
        for i in range(3):
            output_list.append(self.yolo_decodes[i](outputs[i]))
        output = torch.cat(output_list, 1)
        batch_detections = non_max_suppression(output,
                                               len(self.class_names),
                                               conf_thres=self.confidence,
                                               nms_thres=0.3)
        try:
            batch_detections = batch_detections[0].cpu().numpy()
        except:
            return image

        top_index = batch_detections[:,
                                     4] * batch_detections[:,
                                                           5] > self.confidence
        top_conf = batch_detections[top_index, 4] * batch_detections[top_index,
                                                                     5]
        top_label = np.array(batch_detections[top_index, -1], np.int32)
        top_bboxes = np.array(batch_detections[top_index, :4])
        top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(
            top_bboxes[:, 0],
            -1), np.expand_dims(top_bboxes[:, 1], -1), np.expand_dims(
                top_bboxes[:, 2], -1), np.expand_dims(top_bboxes[:, 3], -1)

        # 去掉灰条
        boxes = yolo_correct_boxes(
            top_ymin, top_xmin, top_ymax, top_xmax,
            np.array([self.model_image_size[0], self.model_image_size[1]]),
            image_shape)

        font = ImageFont.truetype(font='model_data/simhei.ttf',
                                  size=np.floor(3e-2 * np.shape(image)[1] +
                                                0.5).astype('int32'))

        thickness = (np.shape(image)[0] +
                     np.shape(image)[1]) // self.model_image_size[0]

        for i, c in enumerate(top_label):
            predicted_class = self.class_names[c]
            score = top_conf[i]

            top, left, bottom, right = boxes[i]
            top = top - 5
            left = left - 5
            bottom = bottom + 5
            right = right + 5

            top = max(0, np.floor(top + 0.5).astype('int32'))
            left = max(0, np.floor(left + 0.5).astype('int32'))
            bottom = min(
                np.shape(image)[0],
                np.floor(bottom + 0.5).astype('int32'))
            right = min(
                np.shape(image)[1],
                np.floor(right + 0.5).astype('int32'))
            oo = [bottom - top, right - left]  # 所画出框框的长和宽
            # 画框框
            label = '{} {:.2f}'.format(predicted_class, score)
            draw = ImageDraw.Draw(image)
            label_size = draw.textsize(label, font)
            label = label.encode('utf-8')
            class_score = {predicted_class: '%.2f' % score}
            # print(label)

            if top - label_size[1] >= 0:
                text_origin = np.array([left, top - label_size[1]])
            else:
                text_origin = np.array([left, top + 1])

            for i in range(thickness):
                draw.rectangle([left + i, top + i, right - i, bottom - i],
                               outline=self.colors[self.class_names.index(
                                   predicted_class)])
            draw.rectangle(
                [tuple(text_origin),
                 tuple(text_origin + label_size)],
                fill=self.colors[self.class_names.index(predicted_class)])
            draw.text(text_origin,
                      str(label, 'UTF-8'),
                      fill=(0, 0, 0),
                      font=font)
            del draw
        try:
            return image, oo, class_score
        except:
            return image
Beispiel #4
0
    def get_FPS(self, image, test_interval):
        image_shape = np.array(np.shape(image)[0:2])
        # ---------------------------------------------------------#
        #   给图像增加灰条,实现不失真的resize
        #   也可以直接resize进行识别
        # ---------------------------------------------------------#
        if self.letterbox_image:
            crop_img = np.array(letterbox_image(image, (self.model_image_size[1], self.model_image_size[0])))
        else:
            crop_img = image.convert('RGB')
            crop_img = crop_img.resize((self.model_image_size[1], self.model_image_size[0]), Image.BICUBIC)
        photo = np.array(crop_img, dtype=np.float32) / 255.0
        photo = np.transpose(photo, (2, 0, 1))
        # ---------------------------------------------------------#
        #   添加上batch_size维度
        # ---------------------------------------------------------#
        images = [photo]

        with torch.no_grad():
            images = torch.from_numpy(np.asarray(images))
            if self.cuda:
                images = images.cuda()
            outputs = self.net(images)
            output_list = []
            for i in range(3):
                output_list.append(self.yolo_decodes[i](outputs[i]))
            output = torch.cat(output_list, 1)
            batch_detections = non_max_suppression(output, len(self.class_names),
                                                   conf_thres=self.confidence,
                                                   nms_thres=self.iou)
            try:
                batch_detections = batch_detections[0].cpu().numpy()
                top_index = batch_detections[:, 4] * batch_detections[:, 5] > self.confidence
                top_conf = batch_detections[top_index, 4] * batch_detections[top_index, 5]
                top_label = np.array(batch_detections[top_index, -1], np.int32)
                top_bboxes = np.array(batch_detections[top_index, :4])
                top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(top_bboxes[:, 0], -1), np.expand_dims(
                    top_bboxes[:, 1], -1), np.expand_dims(top_bboxes[:, 2], -1), np.expand_dims(top_bboxes[:, 3], -1)

                if self.letterbox_image:
                    boxes = yolo_correct_boxes(top_ymin, top_xmin, top_ymax, top_xmax,
                                               np.array([self.model_image_size[0], self.model_image_size[1]]),
                                               image_shape)
                else:
                    top_xmin = top_xmin / self.model_image_size[1] * image_shape[1]
                    top_ymin = top_ymin / self.model_image_size[0] * image_shape[0]
                    top_xmax = top_xmax / self.model_image_size[1] * image_shape[1]
                    top_ymax = top_ymax / self.model_image_size[0] * image_shape[0]
                    boxes = np.concatenate([top_ymin, top_xmin, top_ymax, top_xmax], axis=-1)
            except:
                pass

        t1 = time.time()
        for _ in range(test_interval):
            with torch.no_grad():
                outputs = self.net(images)
                output_list = []
                for i in range(3):
                    output_list.append(self.yolo_decodes[i](outputs[i]))
                output = torch.cat(output_list, 1)
                batch_detections = non_max_suppression(output, len(self.class_names),
                                                       conf_thres=self.confidence,
                                                       nms_thres=self.iou)
                try:
                    batch_detections = batch_detections[0].cpu().numpy()
                    top_index = batch_detections[:, 4] * batch_detections[:, 5] > self.confidence
                    top_conf = batch_detections[top_index, 4] * batch_detections[top_index, 5]
                    top_label = np.array(batch_detections[top_index, -1], np.int32)
                    top_bboxes = np.array(batch_detections[top_index, :4])
                    top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(top_bboxes[:, 0], -1), np.expand_dims(
                        top_bboxes[:, 1], -1), np.expand_dims(top_bboxes[:, 2], -1), np.expand_dims(top_bboxes[:, 3],
                                                                                                    -1)

                    if self.letterbox_image:
                        boxes = yolo_correct_boxes(top_ymin, top_xmin, top_ymax, top_xmax,
                                                   np.array([self.model_image_size[0], self.model_image_size[1]]),
                                                   image_shape)
                    else:
                        top_xmin = top_xmin / self.model_image_size[1] * image_shape[1]
                        top_ymin = top_ymin / self.model_image_size[0] * image_shape[0]
                        top_xmax = top_xmax / self.model_image_size[1] * image_shape[1]
                        top_ymax = top_ymax / self.model_image_size[0] * image_shape[0]
                        boxes = np.concatenate([top_ymin, top_xmin, top_ymax, top_xmax], axis=-1)
                except:
                    pass

        t2 = time.time()
        tact_time = (t2 - t1) / test_interval
        return tact_time
    def detect_image(self, image):

        start = timer()

        # convert img_size to input_size
        new_image_size = (self.model_image_size[0], self.model_image_size[1])
        boxed_image = letterbox_image(image, new_image_size)
        image_data = np.array(boxed_image, dtype='float32')
        image_data /= 255.
        image_data = np.expand_dims(image_data, 0)

        # sess.run
        out_boxes, out_scores, out_classes = self.sess.run(
            [self.boxes, self.scores, self.classes],
            feed_dict={
                self.yolo_model.input: image_data,
                self.input_image_shape: [image.size[1], image.size[0]],
                K.learning_phase(): 0
            })
        # print(out_scores)
        # print(out_boxes)
        print('Found {} boxes for {}'.format(len(out_boxes), 'img'))

        # starting draw bounding boxes
        font = ImageFont.truetype(font='font/simhei.ttf',
                                  size=np.floor(2e-2 * image.size[1] + 0.5).astype('int32'))
        # thickness of bounding box and this thickness is changing according to img_size
        thickness = (image.size[0] + image.size[1]) // 500

        for i, c in list(enumerate(out_classes)):
            predicted_class = self.class_names[c]
            box = out_boxes[i]
            score = out_scores[i]

            top, left, bottom, right = box
            top = top - 5
            left = left - 5
            bottom = bottom + 5
            right = right + 5

            top = max(0, np.floor(top + 0.5).astype('int32'))
            left = max(0, np.floor(left + 0.5).astype('int32'))
            bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32'))
            right = min(image.size[0], np.floor(right + 0.5).astype('int32'))

            label = '{} {:.2f}'.format(predicted_class, score)
            draw = ImageDraw.Draw(image)
            label_size = draw.textsize(label, font)
            label = label.encode('utf-8')
            print(label)

            if top - label_size[1] >= 0:
                text_origin = np.array([left, top - label_size[1]])
            else:
                text_origin = np.array([left, top + 1])

            for i in range(thickness):
                draw.rectangle([left + i, top + i, right - i, bottom - i],
                               outline=self.colors[c])
            draw.rectangle([tuple(text_origin), tuple(text_origin + label_size)],
                           fill=self.colors[c])
            draw.text(text_origin, str(label, 'UTF-8'), fill=(0, 0, 0), font=font)
            del draw

        end = timer()
        print('detect time:', end - start)
        return image
Beispiel #6
0
    def detect_image(self, image_id, image):
        self.confidence = 0.01
        self.iou = 0.5
        f = open("./input/detection-results/" + image_id + ".txt", "w")
        image_shape = np.array(np.shape(image)[0:2])

        #---------------------------------------------------------#
        #   给图像增加灰条,实现不失真的resize
        #---------------------------------------------------------#
        crop_img = np.array(
            letterbox_image(
                image, (self.model_image_size[1], self.model_image_size[0])))
        photo = np.array(crop_img, dtype=np.float32) / 255.0
        photo = np.transpose(photo, (2, 0, 1))
        #---------------------------------------------------------#
        #   添加上batch_size维度
        #---------------------------------------------------------#
        images = [photo]

        with torch.no_grad():
            images = torch.from_numpy(np.asarray(images))
            if self.cuda:
                images = images.cuda()

            #---------------------------------------------------------#
            #   将图像输入网络当中进行预测!
            #---------------------------------------------------------#
            outputs = self.net(images)
            output_list = []
            for i in range(3):
                output_list.append(self.yolo_decodes[i](outputs[i]))

            #---------------------------------------------------------#
            #   将预测框进行堆叠,然后进行非极大抑制
            #---------------------------------------------------------#
            output = torch.cat(output_list, 1)
            batch_detections = non_max_suppression(output,
                                                   len(self.class_names),
                                                   conf_thres=self.confidence,
                                                   nms_thres=self.iou)

            #---------------------------------------------------------#
            #   如果没有检测出物体,返回原图
            #---------------------------------------------------------#
            try:
                batch_detections = batch_detections[0].cpu().numpy()
            except:
                return image

            #---------------------------------------------------------#
            #   对预测框进行得分筛选
            #---------------------------------------------------------#
            top_index = batch_detections[:,
                                         4] * batch_detections[:,
                                                               5] > self.confidence
            top_conf = batch_detections[top_index,
                                        4] * batch_detections[top_index, 5]
            top_label = np.array(batch_detections[top_index, -1], np.int32)
            top_bboxes = np.array(batch_detections[top_index, :4])
            top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(
                top_bboxes[:, 0],
                -1), np.expand_dims(top_bboxes[:, 1], -1), np.expand_dims(
                    top_bboxes[:, 2],
                    -1), np.expand_dims(top_bboxes[:, 3], -1)

            #-----------------------------------------------------------------#
            #   在图像传入网络预测前会进行letterbox_image给图像周围添加灰条
            #   因此生成的top_bboxes是相对于有灰条的图像的
            #   我们需要对其进行修改,去除灰条的部分。
            #-----------------------------------------------------------------#
            boxes = yolo_correct_boxes(
                top_ymin, top_xmin, top_ymax, top_xmax,
                np.array([self.model_image_size[0], self.model_image_size[1]]),
                image_shape)

        for i, c in enumerate(top_label):
            predicted_class = self.class_names[c]
            score = str(top_conf[i])

            top, left, bottom, right = boxes[i]
            f.write("%s %s %s %s %s %s\n" %
                    (predicted_class, score[:6], str(int(left)), str(
                        int(top)), str(int(right)), str(int(bottom))))

        f.close()
        return
    def detect_image(self, image):
        image_shape = np.array(np.shape(image)[0:2])

        crop_img = np.array(letterbox_image(image, self.image_size))
        photo = np.array(crop_img, dtype=np.float32)
        photo = np.transpose(preprocess_input(photo), (2, 0, 1))
        images = []
        images.append(photo)
        images = np.asarray(images)

        with torch.no_grad():
            images = torch.from_numpy(images)
            if self.cuda:
                images = images.cuda()
            _, regression, classification, anchors = self.net(images)

            regression = decodebox(regression, anchors, images)
            detection = torch.cat([regression, classification], axis=-1)
            batch_detections = non_max_suppression(detection,
                                                   len(self.class_names),
                                                   conf_thres=self.confidence,
                                                   nms_thres=self.iou)
        try:
            batch_detections = batch_detections[0].cpu().numpy()
        except:
            return image

        top_index = batch_detections[:, 4] > self.confidence
        top_conf = batch_detections[top_index, 4]
        top_label = np.array(batch_detections[top_index, -1], np.int32)
        top_bboxes = np.array(batch_detections[top_index, :4])
        top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(
            top_bboxes[:, 0],
            -1), np.expand_dims(top_bboxes[:, 1], -1), np.expand_dims(
                top_bboxes[:, 2], -1), np.expand_dims(top_bboxes[:, 3], -1)

        # 去掉灰条
        boxes = retinanet_correct_boxes(top_ymin, top_xmin, top_ymax, top_xmax,
                                        np.array(self.image_size), image_shape)

        font = ImageFont.truetype(font='model_data/simhei.ttf',
                                  size=np.floor(3e-2 * np.shape(image)[1] +
                                                0.5).astype('int32'))

        thickness = (np.shape(image)[0] +
                     np.shape(image)[1]) // self.image_size[0]

        for i, c in enumerate(top_label):
            predicted_class = self.class_names[c]
            score = top_conf[i]

            top, left, bottom, right = boxes[i]
            top = top - 5
            left = left - 5
            bottom = bottom + 5
            right = right + 5

            top = max(0, np.floor(top + 0.5).astype('int32'))
            left = max(0, np.floor(left + 0.5).astype('int32'))
            bottom = min(
                np.shape(image)[0],
                np.floor(bottom + 0.5).astype('int32'))
            right = min(
                np.shape(image)[1],
                np.floor(right + 0.5).astype('int32'))

            # 画框框
            label = '{} {:.2f}'.format(predicted_class, score)
            draw = ImageDraw.Draw(image)
            label_size = draw.textsize(label, font)
            label = label.encode('utf-8')

            if top - label_size[1] >= 0:
                text_origin = np.array([left, top - label_size[1]])
            else:
                text_origin = np.array([left, top + 1])

            for i in range(thickness):
                draw.rectangle([left + i, top + i, right - i, bottom - i],
                               outline=self.colors[self.class_names.index(
                                   predicted_class)])
            draw.rectangle(
                [tuple(text_origin),
                 tuple(text_origin + label_size)],
                fill=self.colors[self.class_names.index(predicted_class)])
            draw.text(text_origin,
                      str(label, 'UTF-8'),
                      fill=(0, 0, 0),
                      font=font)
            del draw
        return image
Beispiel #8
0
    def detect_image(self, image_id, image):
        f = open("./input/detection-results/" + image_id + ".txt", "w")
        self.confidence = 0.01
        self.nms_threhold = 0.5
        image_shape = np.array(np.shape(image)[0:2])
        #---------------------------------------------------------#
        #   给图像增加灰条,实现不失真的resize
        #---------------------------------------------------------#
        crop_img = letterbox_image(image,
                                   [self.input_shape[0], self.input_shape[1]])
        #----------------------------------------------------------------------------------#
        #   将RGB转化成BGR,这是因为原始的centernet_hourglass权值是使用BGR通道的图片训练的
        #----------------------------------------------------------------------------------#
        photo = np.array(crop_img, dtype=np.float32)[:, :, ::-1]
        #-----------------------------------------------------------#
        #   图片预处理,归一化。获得的photo的shape为[1, 512, 512, 3]
        #-----------------------------------------------------------#
        photo = np.reshape(
            preprocess_image(photo),
            [1, self.input_shape[0], self.input_shape[1], self.input_shape[2]])

        preds = self.get_pred(photo).numpy()
        #-------------------------------------------------------#
        #   对于centernet网络来讲,确立中心非常重要。
        #   对于大目标而言,会存在许多的局部信息。
        #   此时对于同一个大目标,中心点比较难以确定。
        #   使用最大池化的非极大抑制方法无法去除局部框
        #   所以我还是写了另外一段对框进行非极大抑制的代码
        #   实际测试中,hourglass为主干网络时有无额外的nms相差不大,resnet相差较大。
        #-------------------------------------------------------#
        if self.nms:
            preds = np.array(nms(preds, self.nms_threhold))

        if len(preds[0]) <= 0:
            return

        #-----------------------------------------------------------#
        #   将预测结果转换成小数的形式
        #-----------------------------------------------------------#
        preds[0][:, 0:4] = preds[0][:, 0:4] / (self.input_shape[0] / 4)

        det_label = preds[0][:, -1]
        det_conf = preds[0][:, -2]
        det_xmin, det_ymin, det_xmax, det_ymax = preds[0][:, 0], preds[
            0][:, 1], preds[0][:, 2], preds[0][:, 3]
        #-----------------------------------------------------------#
        #   筛选出其中得分高于confidence的框
        #-----------------------------------------------------------#
        top_indices = [
            i for i, conf in enumerate(det_conf) if conf >= self.confidence
        ]
        top_conf = det_conf[top_indices]
        top_label_indices = det_label[top_indices].tolist()
        top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(
            det_xmin[top_indices],
            -1), np.expand_dims(det_ymin[top_indices], -1), np.expand_dims(
                det_xmax[top_indices],
                -1), np.expand_dims(det_ymax[top_indices], -1)

        #-----------------------------------------------------------#
        #   去掉灰条部分
        #-----------------------------------------------------------#
        boxes = centernet_correct_boxes(
            top_ymin, top_xmin, top_ymax, top_xmax,
            np.array([self.input_shape[0], self.input_shape[1]]), image_shape)

        for i, c in enumerate(top_label_indices):
            predicted_class = self.class_names[int(c)]
            score = str(top_conf[i])

            top, left, bottom, right = boxes[i]
            f.write("%s %s %s %s %s %s\n" %
                    (predicted_class, score[:6], str(int(left)), str(
                        int(top)), str(int(right)), str(int(bottom))))

        f.close()
        return
    def detect_image(self, image):
        image_shape = np.array(np.shape(image)[0:2])
        #---------------------------------------------------------#
        #   给图像增加灰条,实现不失真的resize
        #---------------------------------------------------------#
        crop_img = letterbox_image(image,
                                   [self.image_size[0], self.image_size[1]])
        #----------------------------------------------------------------------------------#
        #   将RGB转化成BGR,这是因为原始的centernet_hourglass权值是使用BGR通道的图片训练的
        #----------------------------------------------------------------------------------#
        photo = np.array(crop_img, dtype=np.float32)[:, :, ::-1]
        #-----------------------------------------------------------#
        #   图片预处理,归一化。获得的photo的shape为[1, 512, 512, 3]
        #-----------------------------------------------------------#
        photo = np.reshape(
            np.transpose(preprocess_image(photo), (2, 0, 1)),
            [1, self.image_size[2], self.image_size[0], self.image_size[1]])

        with torch.no_grad():
            images = Variable(
                torch.from_numpy(np.asarray(photo)).type(torch.FloatTensor))
            if self.cuda:
                images = images.cuda()

            outputs = self.centernet(images)
            if self.backbone == 'hourglass':
                outputs = [
                    outputs[-1]["hm"].sigmoid(), outputs[-1]["wh"],
                    outputs[-1]["reg"]
                ]
            #-----------------------------------------------------------#
            #   利用预测结果进行解码
            #-----------------------------------------------------------#
            outputs = decode_bbox(outputs[0], outputs[1], outputs[2],
                                  self.image_size, self.confidence, self.cuda)

            #-------------------------------------------------------#
            #   对于centernet网络来讲,确立中心非常重要。
            #   对于大目标而言,会存在许多的局部信息。
            #   此时对于同一个大目标,中心点比较难以确定。
            #   使用最大池化的非极大抑制方法无法去除局部框
            #   所以我还是写了另外一段对框进行非极大抑制的代码
            #   实际测试中,hourglass为主干网络时有无额外的nms相差不大,resnet相差较大。
            #-------------------------------------------------------#
            try:
                if self.nms:
                    outputs = np.array(nms(outputs, self.nms_threhold))
            except:
                pass

            output = outputs[0]
            if len(output) <= 0:
                return image

            batch_boxes, det_conf, det_label = output[:, :
                                                      4], output[:,
                                                                 4], output[:,
                                                                            5]
            det_xmin, det_ymin, det_xmax, det_ymax = batch_boxes[:,
                                                                 0], batch_boxes[:,
                                                                                 1], batch_boxes[:,
                                                                                                 2], batch_boxes[:,
                                                                                                                 3]
            #-----------------------------------------------------------#
            #   筛选出其中得分高于confidence的框
            #-----------------------------------------------------------#
            top_indices = [
                i for i, conf in enumerate(det_conf) if conf >= self.confidence
            ]
            top_conf = det_conf[top_indices]
            top_label_indices = det_label[top_indices].tolist()
            top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(
                det_xmin[top_indices],
                -1), np.expand_dims(det_ymin[top_indices], -1), np.expand_dims(
                    det_xmax[top_indices],
                    -1), np.expand_dims(det_ymax[top_indices], -1)

            #-----------------------------------------------------------#
            #   去掉灰条部分
            #-----------------------------------------------------------#
            boxes = centernet_correct_boxes(
                top_ymin, top_xmin, top_ymax, top_xmax,
                np.array([self.image_size[0], self.image_size[1]]),
                image_shape)

        font = ImageFont.truetype(font='model_data/simhei.ttf',
                                  size=np.floor(3e-2 * np.shape(image)[1] +
                                                0.5).astype('int32'))

        thickness = max(
            (np.shape(image)[0] + np.shape(image)[1]) // self.image_size[0], 1)

        for i, c in enumerate(top_label_indices):
            predicted_class = self.class_names[int(c)]
            score = top_conf[i]

            top, left, bottom, right = boxes[i]
            top = top - 5
            left = left - 5
            bottom = bottom + 5
            right = right + 5

            top = max(0, np.floor(top + 0.5).astype('int32'))
            left = max(0, np.floor(left + 0.5).astype('int32'))
            bottom = min(
                np.shape(image)[0],
                np.floor(bottom + 0.5).astype('int32'))
            right = min(
                np.shape(image)[1],
                np.floor(right + 0.5).astype('int32'))

            # 画框框
            label = '{} {:.2f}'.format(predicted_class, score)
            draw = ImageDraw.Draw(image)
            label_size = draw.textsize(label, font)
            label = label.encode('utf-8')
            print(label, top, left, bottom, right)

            if top - label_size[1] >= 0:
                text_origin = np.array([left, top - label_size[1]])
            else:
                text_origin = np.array([left, top + 1])

            for i in range(thickness):
                draw.rectangle([left + i, top + i, right - i, bottom - i],
                               outline=self.colors[int(c)])
            draw.rectangle(
                [tuple(text_origin),
                 tuple(text_origin + label_size)],
                fill=self.colors[int(c)])
            draw.text(text_origin,
                      str(label, 'UTF-8'),
                      fill=(0, 0, 0),
                      font=font)
            del draw
        return image
Beispiel #10
0
    def detect_image(self, image):
        start = timer()

        # 调整图片使其符合输入要求
        new_image_size = (self.model_image_size[1], self.model_image_size[0])
        boxed_image = letterbox_image(image, new_image_size)
        image_data = np.array(boxed_image, dtype='float32')
        image_data /= 255.
        image_data = np.expand_dims(image_data, 0)  # Add batch dimension.

        if self.eager:
            # 预测结果
            input_image_shape = np.expand_dims(
                np.array([image.size[1], image.size[0]], dtype='float32'), 0)
            out_boxes, out_scores, out_classes = self.yolo_model.predict(
                [image_data, input_image_shape])
        else:
            # 预测结果
            out_boxes, out_scores, out_classes = self.sess.run(
                [self.boxes, self.scores, self.classes],
                feed_dict={
                    self.yolo_model.input: image_data,
                    self.input_image_shape: [image.size[1], image.size[0]],
                    K.learning_phase(): 0
                })

        print('Found {} boxes for {}'.format(len(out_boxes), 'img'))
        # 设置字体
        font = ImageFont.truetype(font='font/simhei.ttf',
                                  size=np.floor(3e-2 * image.size[1] +
                                                0.5).astype('int32'))
        thickness = (image.size[0] + image.size[1]) // 300

        small_pic = []
        for i, c in list(enumerate(out_classes)):
            predicted_class = self.class_names[c]
            box = out_boxes[i]
            score = out_scores[i]

            top, left, bottom, right = box
            top = top - 5
            left = left - 5
            bottom = bottom + 5
            right = right + 5
            top = max(0, np.floor(top + 0.5).astype('int32'))
            left = max(0, np.floor(left + 0.5).astype('int32'))
            bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32'))
            right = min(image.size[0], np.floor(right + 0.5).astype('int32'))

            # 画框框
            label = '{} {:.2f}'.format(predicted_class, score)
            draw = ImageDraw.Draw(image)
            label_size = draw.textsize(label, font)
            label = label.encode('utf-8')
            print(label)

            if top - label_size[1] >= 0:
                text_origin = np.array([left, top - label_size[1]])
            else:
                text_origin = np.array([left, top + 1])

            for i in range(thickness):
                draw.rectangle([left + i, top + i, right - i, bottom - i],
                               outline=self.colors[c])
            draw.rectangle(
                [tuple(text_origin),
                 tuple(text_origin + label_size)],
                fill=self.colors[c])
            draw.text(text_origin,
                      str(label, 'UTF-8'),
                      fill=(0, 0, 0),
                      font=font)
            del draw

        end = timer()
        print(end - start)
        return image
Beispiel #11
0
    def detect_image(self, image_id, image):
        self.confidence = 0.01
        f = open("./input/detection-results/" + image_id + ".txt", "w")

        image_shape = np.array(np.shape(image)[0:2])
        #---------------------------------------------------------#
        #   给图像增加灰条,实现不失真的resize
        #   也可以直接resize进行识别
        #---------------------------------------------------------#
        if self.letterbox_image:
            crop_img = np.array(
                letterbox_image(
                    image,
                    (self.model_image_size[1], self.model_image_size[0])))
        else:
            crop_img = image.convert('RGB')
            crop_img = crop_img.resize(
                (self.model_image_size[1], self.model_image_size[0]),
                Image.BICUBIC)
        photo = np.array(crop_img, dtype=np.float64)

        #-----------------------------------------------------------#
        #   图片预处理,归一化。
        #-----------------------------------------------------------#
        photo = preprocess_input(
            np.reshape(photo, [
                1, self.model_image_size[0], self.model_image_size[1],
                self.model_image_size[2]
            ]))

        preds = self.m2det.predict(photo)
        #-----------------------------------------------------------#
        #   将预测结果进行解码
        #-----------------------------------------------------------#
        results = self.bbox_util.detection_out(
            preds, self.prior, confidence_threshold=self.confidence)

        #--------------------------------------#
        #   如果没有检测到物体,则返回原图
        #--------------------------------------#
        if len(results[0]) <= 0:
            return image

        #-----------------------------------------------------------#
        #   筛选出其中得分高于confidence的框
        #-----------------------------------------------------------#
        det_label = results[0][:, 0]
        det_conf = results[0][:, 1]
        det_xmin, det_ymin, det_xmax, det_ymax = results[0][:, 2], results[
            0][:, 3], results[0][:, 4], results[0][:, 5]
        top_indices = [
            i for i, conf in enumerate(det_conf) if conf >= self.confidence
        ]
        top_conf = det_conf[top_indices]
        top_label_indices = det_label[top_indices].tolist()
        top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(
            det_xmin[top_indices],
            -1), np.expand_dims(det_ymin[top_indices], -1), np.expand_dims(
                det_xmax[top_indices],
                -1), np.expand_dims(det_ymax[top_indices], -1)

        #-----------------------------------------------------------#
        #   去掉灰条部分
        #-----------------------------------------------------------#
        if self.letterbox_image:
            boxes = m2det_correct_boxes(
                top_ymin, top_xmin, top_ymax, top_xmax,
                np.array([self.model_image_size[0], self.model_image_size[1]]),
                image_shape)
        else:
            top_xmin = top_xmin * image_shape[1]
            top_ymin = top_ymin * image_shape[0]
            top_xmax = top_xmax * image_shape[1]
            top_ymax = top_ymax * image_shape[0]
            boxes = np.concatenate([top_ymin, top_xmin, top_ymax, top_xmax],
                                   axis=-1)

        for i, c in enumerate(top_label_indices):
            predicted_class = self.class_names[int(c) - 1]
            score = str(top_conf[i])

            top, left, bottom, right = boxes[i]
            f.write("%s %s %s %s %s %s\n" %
                    (predicted_class, score[:6], str(int(left)), str(
                        int(top)), str(int(right)), str(int(bottom))))

        f.close()
        return
Beispiel #12
0
    def detect_image(self, image, trfn, carn, lrx, rrx, carnums, carlist, sy,
                     slx, srx, lx, ly):
        run_a_red_light = 0
        totalcarn = 0
        #list_num=len(carlist)
        #start = timer()
        # 调整图片使其符合输入要求
        new_image_size = (self._defaults["model_image_size"][0],
                          self._defaults["model_image_size"][1])
        boxed_image = letterbox_image(image, new_image_size)
        image_data = np.array(boxed_image, dtype='float32')
        image_data /= 255.
        image_data = np.expand_dims(image_data, 0)  # Add batch dimension.

        # 预测结果

        out_boxes, out_scores, out_classes = self.sess.run(
            [self.boxes, self.scores, self.classes],
            feed_dict={
                self.yolo_model.input: image_data,
                self.input_image_shape: [image.size[1], image.size[0]],
                # K.learning_phase(): 0
            })

        print('Found {} boxes for {}'.format(len(out_boxes), 'img'))

        font = ImageFont.truetype('font/simhei.ttf', 28)

        for i, c in list(enumerate(out_classes)):
            if i == 1:

                trfn = trfn + 1
                outputpath = trafficoutputpath
                img = image.crop((lx - 15, ly - 30, lx + 15, ly + 30))
                img.save(outputpath + str(trfn) + ".jpg")
                temp = check1(outputpath + str(trfn) + ".jpg")
                draw = ImageDraw.Draw(image)

                if temp == 'red':

                    draw.rectangle((lx - 15, ly - 30, lx + 15, ly + 30),
                                   outline="red",
                                   width=2)
                    draw.rectangle((lx - 15, ly - 55, lx + 70, ly - 30),
                                   fill="white")
                    draw.text((lx - 15, ly - 55),
                              "red",
                              fill=(0, 0, 0),
                              font=font)

                    f1 = open(resultpath, "a", encoding='utf-8')
                    f2 = open(run_a_red_lightpath(), "a", encoding='utf-8')
                    f3 = open(Road_ROOTpath() + 'all_illegal_car_info.txt',
                              "a+",
                              encoding='utf-8')
                    for cn, cf in list(enumerate(out_classes)):
                        if cf != 2:
                            continue

                        predicted_class = self.class_names[cf]
                        box = out_boxes[cn]
                        score = out_scores[cn]

                        top, left, bottom, right = box
                        top = top - 5
                        left = left - 5
                        bottom = bottom + 5
                        right = right + 5

                        top = max(0, np.floor(top + 0.5).astype('int32'))
                        left = max(0, np.floor(left + 0.5).astype('int32'))
                        bottom = min(image.size[1],
                                     np.floor(bottom + 0.5).astype('int32'))
                        right = min(image.size[0],
                                    np.floor(right + 0.5).astype('int32'))
                        # 画框框
                        label = '{} {:.2f}'.format(predicted_class, score)
                        draw = ImageDraw.Draw(image)
                        label_size = draw.textsize(label, font)
                        label = label.encode('utf-8')

                        if top - label_size[1] >= 0:
                            text_origin = np.array([left, top - label_size[1]])
                        else:
                            text_origin = np.array([left, top + 1])

                        x = left
                        y1 = top
                        y2 = bottom
                        if cf == 2 and x < srx and (x > slx and
                                                    y2 < 1080) and y1 > 500:
                            carn = carn + 1

                            car_outputpath = caroutputpath
                            img = image.crop((left, top, right, bottom))
                            img.save(car_outputpath + str(carn) + ".jpg")
                            # if  y>500 :
                            ch = cartag(car_outputpath + str(carn) + ".jpg")

                            f1.write(ch + "\n")
                            draw.rectangle((left, top, right, bottom),
                                           outline=self.colors[cf],
                                           width=2)
                            draw.rectangle([
                                tuple(text_origin),
                                tuple(text_origin + label_size)
                            ],
                                           fill=self.colors[cf])
                            draw.text(text_origin,
                                      str(label, 'UTF-8'),
                                      fill=(0, 0, 0),
                                      font=font)

                            draw.rectangle((right, top, right + 150, top + 40),
                                           fill=self.colors[cf])
                            draw.text((right, top, right + 30, top + 60),
                                      ch,
                                      fill=(0, 0, 0),
                                      font=font)
                            if ((top + bottom) / 2 - 50) < sy:
                                if ch is not '00000' and ch[1] is not '1':
                                    f3.write(ch + " 闯红灯" + "\n")
                                    f2.write(ch + " 闯红灯" + "\n")
                                    run_a_red_light += 1
                                draw.rectangle(
                                    (right, top + 40, right + 100, top + 80),
                                    fill='red')
                                draw.text(
                                    (right, top + 40, right + 60, top + 140),
                                    "闯红灯",
                                    fill=(0, 0, 0),
                                    font=font)
                                if ch is not '00000' and ch[1] is not '1':
                                    image.save(run_a_red_light_img_path() +
                                               ch + '_闯红灯' + ".jpg")
                    # f3.close()
                    # f2.close()
                    # f1.close()
                else:
                    draw.rectangle((lx - 15, ly - 30, lx + 15, ly + 30),
                                   outline="green",
                                   width=2)
                    draw.rectangle((lx - 15, ly - 55, lx + 70, ly - 30),
                                   fill="white")
                    draw.text((lx - 15, ly - 55),
                              "green",
                              fill=(0, 0, 0),
                              font=font)
                del draw

            if c == 9:
                continue

            predicted_class = self.class_names[c]
            box = out_boxes[i]
            score = out_scores[i]

            top, left, bottom, right = box
            top = top - 5
            left = left - 5
            bottom = bottom + 5
            right = right + 5

            top = max(0, np.floor(top + 0.5).astype('int32'))
            left = max(0, np.floor(left + 0.5).astype('int32'))
            bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32'))
            right = min(image.size[0], np.floor(right + 0.5).astype('int32'))

            # 画框框
            label = '{} {:.2f}'.format(predicted_class, score)
            draw = ImageDraw.Draw(image)
            label_size = draw.textsize(label, font)
            label = label.encode('utf-8')

            if top - label_size[1] >= 0:
                text_origin = np.array([left, top - label_size[1]])
            else:
                text_origin = np.array([left, top + 1])

            draw.rectangle((left, top, right, bottom),
                           outline=self.colors[c],
                           width=2)
            draw.rectangle(
                [tuple(text_origin),
                 tuple(text_origin + label_size)],
                fill=self.colors[c])
            draw.text(text_origin,
                      str(label, 'UTF-8'),
                      fill=(0, 0, 0),
                      font=font)

            if top - label_size[1] >= 0:
                text_origin = np.array([left, top - label_size[1]])
            else:
                text_origin = np.array([left, top + 1])

            draw.rectangle((left, top, right, bottom),
                           outline=self.colors[c],
                           width=2)
            draw.rectangle(
                [tuple(text_origin),
                 tuple(text_origin + label_size)],
                fill=self.colors[c])
            draw.text(text_origin,
                      str(label, 'UTF-8'),
                      fill=(0, 0, 0),
                      font=font)

            if c == 2 or c == 3 or c == 5 or c == 7:
                totalcarn += 1
            del draw

            if c == 2 and bottom > 950 and left > slx:
                carnums += 1
                img = image.crop((left, top, right, bottom))
                img.save(caridpath + str(carnums) + '.jpg')
                carid = cartag(caridpath + str(carnums) + '.jpg')
                draw = ImageDraw.Draw(image)
                draw.rectangle((right, top, right + 150, top + 40),
                               fill="green")
                draw.text((right, top, right + 30, top + 60),
                          carid,
                          fill=(0, 0, 0),
                          font=font)

                if carid in carlist:
                    continue
                else:
                    if carid != '00000' and len(carid) == 7:
                        carlist.append(carid)

        draw = ImageDraw.Draw(image)
        draw.rectangle((1300, 150, 1900, 200), fill="white")
        draw.text((1300, 160, 1900, 180),
                  "路口通过car的数量为:" + str(len(carlist)),
                  fill="black",
                  font=font)

        del draw

        draw = ImageDraw.Draw(image)
        draw.rectangle((1300, 50, 1900, 140), fill="white")
        if totalcarn >= 10:
            draw.text((1300, 60, 1900, 100),
                      "当前路口机动车数量为" + str(totalcarn) + "大于9" + "  拥堵",
                      fill="black",
                      font=font)
        else:
            draw.text((1300, 60, 1800, 100),
                      "当前路口机动车数量:" + str(totalcarn),
                      fill="black",
                      font=font)
        draw.text((1300, 100, 1800, 140),
                  "当前路口闯红灯数量:" + str(run_a_red_light),
                  fill="black",
                  font=font)

        del draw
        return image, trfn, carn, carnums, len(
            carlist), run_a_red_light, totalcarn
Beispiel #13
0
    def detect_image(self, image):
        image_shape = np.array(np.shape(image)[0:2])
        #---------------------------------------------------------#
        #   给图像增加灰条,实现不失真的resize
        #   也可以直接resize进行识别
        #---------------------------------------------------------#
        if self.letterbox_image:
            crop_img = np.array(
                letterbox_image(image,
                                (self.input_shape[1], self.input_shape[0])))
        else:
            crop_img = image.convert('RGB')
            crop_img = crop_img.resize(
                (self.input_shape[1], self.input_shape[0]), Image.BICUBIC)
        photo = np.array(crop_img, dtype=np.float64)
        #-----------------------------------------------------------#
        #   图片预处理,归一化。
        #-----------------------------------------------------------#
        photo = preprocess_input(
            np.reshape(photo,
                       [1, self.input_shape[0], self.input_shape[1], 3]))
        preds = self.get_pred(photo).numpy()

        #-----------------------------------------------------------#
        #   将预测结果进行解码
        #-----------------------------------------------------------#
        results = self.bbox_util.detection_out(
            preds, confidence_threshold=self.confidence)

        #--------------------------------------#
        #   如果没有检测到物体,则返回原图
        #--------------------------------------#
        if len(results[0]) <= 0:
            return image

        #-----------------------------------------------------------#
        #   筛选出其中得分高于confidence的框
        #-----------------------------------------------------------#
        det_label = results[0][:, 0]
        det_conf = results[0][:, 1]
        det_xmin, det_ymin, det_xmax, det_ymax = results[0][:, 2], results[
            0][:, 3], results[0][:, 4], results[0][:, 5]
        top_indices = [
            i for i, conf in enumerate(det_conf) if conf >= self.confidence
        ]
        top_conf = det_conf[top_indices]
        top_label_indices = det_label[top_indices].tolist()
        top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(
            det_xmin[top_indices],
            -1), np.expand_dims(det_ymin[top_indices], -1), np.expand_dims(
                det_xmax[top_indices],
                -1), np.expand_dims(det_ymax[top_indices], -1)

        #-----------------------------------------------------------#
        #   去掉灰条部分
        #-----------------------------------------------------------#
        if self.letterbox_image:
            boxes = ssd_correct_boxes(
                top_ymin, top_xmin, top_ymax, top_xmax,
                np.array([self.input_shape[0], self.input_shape[1]]),
                image_shape)
        else:
            top_xmin = top_xmin * image_shape[1]
            top_ymin = top_ymin * image_shape[0]
            top_xmax = top_xmax * image_shape[1]
            top_ymax = top_ymax * image_shape[0]
            boxes = np.concatenate([top_ymin, top_xmin, top_ymax, top_xmax],
                                   axis=-1)

        font = ImageFont.truetype(font='model_data/simhei.ttf',
                                  size=np.floor(3e-2 * np.shape(image)[1] +
                                                0.5).astype('int32'))

        thickness = max(
            (np.shape(image)[0] + np.shape(image)[1]) // self.input_shape[0],
            1)

        for i, c in enumerate(top_label_indices):
            predicted_class = self.class_names[int(c) - 1]
            score = top_conf[i]

            top, left, bottom, right = boxes[i]
            top = top - 5
            left = left - 5
            bottom = bottom + 5
            right = right + 5

            top = max(0, np.floor(top + 0.5).astype('int32'))
            left = max(0, np.floor(left + 0.5).astype('int32'))
            bottom = min(
                np.shape(image)[0],
                np.floor(bottom + 0.5).astype('int32'))
            right = min(
                np.shape(image)[1],
                np.floor(right + 0.5).astype('int32'))

            # 画框框
            label = '{} {:.2f}'.format(predicted_class, score)
            draw = ImageDraw.Draw(image)
            label_size = draw.textsize(label, font)
            label = label.encode('utf-8')
            print(label, top, left, bottom, right)

            if top - label_size[1] >= 0:
                text_origin = np.array([left, top - label_size[1]])
            else:
                text_origin = np.array([left, top + 1])

            for i in range(thickness):
                draw.rectangle([left + i, top + i, right - i, bottom - i],
                               outline=self.colors[int(c) - 1])
            draw.rectangle(
                [tuple(text_origin),
                 tuple(text_origin + label_size)],
                fill=self.colors[int(c) - 1])
            draw.text(text_origin,
                      str(label, 'UTF-8'),
                      fill=(0, 0, 0),
                      font=font)
            del draw
        return image
Beispiel #14
0
    def detect_image(self, image):
        #---------------------------------------------------------#
        #   给图像增加灰条,实现不失真的resize
        #   也可以直接resize进行识别
        #---------------------------------------------------------#
        if self.letterbox_image:
            boxed_image = letterbox_image(
                image, (self.model_image_size[1], self.model_image_size[0]))
        else:
            boxed_image = image.convert('RGB')
            boxed_image = boxed_image.resize(
                (self.model_image_size[1], self.model_image_size[0]),
                Image.BICUBIC)
        image_data = np.array(boxed_image, dtype='float32')
        image_data /= 255.
        #---------------------------------------------------------#
        #   添加上batch_size维度
        #---------------------------------------------------------#
        image_data = np.expand_dims(image_data, 0)

        #---------------------------------------------------------#
        #   将图像输入网络当中进行预测!
        #---------------------------------------------------------#
        out_boxes, out_scores, out_classes = self.sess.run(
            [self.boxes, self.scores, self.classes],
            feed_dict={
                self.yolo_model.input: image_data,
                self.input_image_shape: [image.size[1], image.size[0]],
                K.learning_phase(): 0
            })

        print('Found {} boxes for {}'.format(len(out_boxes), 'img'))

        #---------------------------------------------------------#
        #   设置字体
        #---------------------------------------------------------#
        font = ImageFont.truetype(font='font/simhei.ttf',
                                  size=np.floor(3e-2 * image.size[1] +
                                                0.5).astype('int32'))

        thickness = max((image.size[0] + image.size[1]) // 300, 1)

        for i, c in list(enumerate(out_classes)):
            predicted_class = self.class_names[c]
            box = out_boxes[i]
            score = out_scores[i]

            top, left, bottom, right = box
            top = top - 5
            left = left - 5
            bottom = bottom + 5
            right = right + 5

            top = max(0, np.floor(top + 0.5).astype('int32'))
            left = max(0, np.floor(left + 0.5).astype('int32'))
            bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32'))
            right = min(image.size[0], np.floor(right + 0.5).astype('int32'))

            # 画框框
            label = '{} {:.2f}'.format(predicted_class, score)
            draw = ImageDraw.Draw(image)
            label_size = draw.textsize(label, font)
            label = label.encode('utf-8')
            print(label, top, left, bottom, right)

            if top - label_size[1] >= 0:
                text_origin = np.array([left, top - label_size[1]])
            else:
                text_origin = np.array([left, top + 1])

            for i in range(thickness):
                draw.rectangle([left + i, top + i, right - i, bottom - i],
                               outline=self.colors[c])
            draw.rectangle(
                [tuple(text_origin),
                 tuple(text_origin + label_size)],
                fill=self.colors[c])
            draw.text(text_origin,
                      str(label, 'UTF-8'),
                      fill=(0, 0, 0),
                      font=font)
            del draw

        return image
Beispiel #15
0
    def get_FPS(self, image, test_interval):
        #---------------------------------------------------------#
        #   给图像增加灰条,实现不失真的resize
        #   也可以直接resize进行识别
        #---------------------------------------------------------#
        if self.letterbox_image:
            boxed_image = letterbox_image(
                image, (self.model_image_size[1], self.model_image_size[0]))
        else:
            boxed_image = image.convert('RGB')
            boxed_image = boxed_image.resize(
                (self.model_image_size[1], self.model_image_size[0]),
                Image.BICUBIC)
        image_data = np.array(boxed_image, dtype='float32')
        image_data /= 255.
        #---------------------------------------------------------#
        #   添加上batch_size维度
        #---------------------------------------------------------#
        image_data = np.expand_dims(image_data, 0)  # Add batch dimension.

        #---------------------------------------------------------#
        #   将图像输入网络当中进行预测!
        #---------------------------------------------------------#
        if self.eager:
            # 预测结果
            input_image_shape = np.expand_dims(
                np.array([image.size[1], image.size[0]], dtype='float32'), 0)
            out_boxes, out_scores, out_classes = self.get_pred(
                image_data, input_image_shape)
        else:
            # 预测结果
            out_boxes, out_scores, out_classes = self.sess.run(
                [self.boxes, self.scores, self.classes],
                feed_dict={
                    self.yolo_model.input: image_data,
                    self.input_image_shape: [image.size[1], image.size[0]],
                    K.learning_phase(): 0
                })

        t1 = time.time()
        for _ in range(test_interval):
            #---------------------------------------------------------#
            #   将图像输入网络当中进行预测!
            #---------------------------------------------------------#
            if self.eager:
                # 预测结果
                input_image_shape = np.expand_dims(
                    np.array([image.size[1], image.size[0]], dtype='float32'),
                    0)
                out_boxes, out_scores, out_classes = self.get_pred(
                    image_data, input_image_shape)
            else:
                # 预测结果
                out_boxes, out_scores, out_classes = self.sess.run(
                    [self.boxes, self.scores, self.classes],
                    feed_dict={
                        self.yolo_model.input: image_data,
                        self.input_image_shape: [image.size[1], image.size[0]],
                        K.learning_phase(): 0
                    })
        t2 = time.time()
        tact_time = (t2 - t1) / test_interval
        return tact_time
Beispiel #16
0
    def detect_image(self, image_id, image):
        f = open("./input/detection-results/" + image_id + ".txt", "w")
        self.confidence = 0.01
        self.nms_threhold = 0.5

        image_shape = np.array(np.shape(image)[0:2])
        #---------------------------------------------------------#
        #   给图像增加灰条,实现不失真的resize
        #---------------------------------------------------------#
        crop_img = letterbox_image(image,
                                   [self.image_size[0], self.image_size[1]])
        #----------------------------------------------------------------------------------#
        #   将RGB转化成BGR,这是因为原始的centernet_hourglass权值是使用BGR通道的图片训练的
        #----------------------------------------------------------------------------------#
        photo = np.array(crop_img, dtype=np.float32)[:, :, ::-1]
        #-----------------------------------------------------------#
        #   图片预处理,归一化。获得的photo的shape为[1, 512, 512, 3]
        #-----------------------------------------------------------#
        photo = np.reshape(
            np.transpose(preprocess_image(photo), (2, 0, 1)),
            [1, self.image_size[2], self.image_size[0], self.image_size[1]])

        with torch.no_grad():
            images = Variable(
                torch.from_numpy(np.asarray(photo)).type(torch.FloatTensor))
            if self.cuda:
                images = images.cuda()

            outputs = self.centernet(images)
            if self.backbone == 'hourglass':
                outputs = [
                    outputs[-1]["hm"].sigmoid(), outputs[-1]["wh"],
                    outputs[-1]["reg"]
                ]
            #-----------------------------------------------------------#
            #   利用预测结果进行解码
            #-----------------------------------------------------------#
            outputs = decode_bbox(outputs[0], outputs[1], outputs[2],
                                  self.image_size, self.confidence, self.cuda)

            #-------------------------------------------------------#
            #   对于centernet网络来讲,确立中心非常重要。
            #   对于大目标而言,会存在许多的局部信息。
            #   此时对于同一个大目标,中心点比较难以确定。
            #   使用最大池化的非极大抑制方法无法去除局部框
            #   所以我还是写了另外一段对框进行非极大抑制的代码
            #   实际测试中,hourglass为主干网络时有无额外的nms相差不大,resnet相差较大。
            #-------------------------------------------------------#
            try:
                if self.nms:
                    outputs = np.array(nms(outputs, self.nms_threhold))
            except:
                pass

            output = outputs[0]
            if len(output) <= 0:
                return image

            batch_boxes, det_conf, det_label = output[:, :
                                                      4], output[:,
                                                                 4], output[:,
                                                                            5]
            det_xmin, det_ymin, det_xmax, det_ymax = batch_boxes[:,
                                                                 0], batch_boxes[:,
                                                                                 1], batch_boxes[:,
                                                                                                 2], batch_boxes[:,
                                                                                                                 3]
            #-----------------------------------------------------------#
            #   筛选出其中得分高于confidence的框
            #-----------------------------------------------------------#
            top_indices = [
                i for i, conf in enumerate(det_conf) if conf >= self.confidence
            ]
            top_conf = det_conf[top_indices]
            top_label_indices = det_label[top_indices].tolist()
            top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(
                det_xmin[top_indices],
                -1), np.expand_dims(det_ymin[top_indices], -1), np.expand_dims(
                    det_xmax[top_indices],
                    -1), np.expand_dims(det_ymax[top_indices], -1)

            #-----------------------------------------------------------#
            #   去掉灰条部分
            #-----------------------------------------------------------#
            boxes = centernet_correct_boxes(
                top_ymin, top_xmin, top_ymax, top_xmax,
                np.array([self.image_size[0], self.image_size[1]]),
                image_shape)

        for i, c in enumerate(top_label_indices):
            predicted_class = self.class_names[int(c)]
            score = str(top_conf[i])

            top, left, bottom, right = boxes[i]
            f.write("%s %s %s %s %s %s\n" %
                    (predicted_class, score[:6], str(int(left)), str(
                        int(top)), str(int(right)), str(int(bottom))))

        f.close()
        return
Beispiel #17
0
import numpy as np
import tensorflow as tf
from PIL import Image, ImageFont, ImageDraw
from utils.utils import letterbox_image
from utils.setup_tool import get_classes, get_anchors
import cv2

img = "test_data/london.jpg"
image = Image.open(img)
model_image_size = (416, 416)

image_shape = (image.size[1], image.size[0], 3)

model_image_size[0] % 32 == 0, 'Multiples of 32 required'
model_image_size[1] % 32 == 0, 'Multiples of 32 required'
boxed_image = letterbox_image(image, tuple(reversed(model_image_size)))

image_data = np.array(boxed_image, dtype='float32')
image_data /= 255.
image_data = np.expand_dims(image_data, 0)

#print(image.size)
print(image_shape)
print(image_data.shape)

# Load TFLite model and allocate tensors.
interpreter = tf.lite.Interpreter(
    model_path="model_data/small_mobilenet_yolo.tflite")
interpreter.allocate_tensors()

# Get input and output tensors.
Beispiel #18
0
    def detect_image(self, image, classroom_id):
        start = timer()

        # 调整图片使其符合输入要求
        new_image_size = (image.width - (image.width % 32),
                          image.height - (image.height % 32))
        boxed_image = letterbox_image(image, new_image_size)
        image_data = np.array(boxed_image, dtype='float32')
        image_data /= 255.
        image_data = np.expand_dims(image_data, 0)  # Add batch dimension.

        # 预测结果
        out_boxes, out_scores, out_classes = self.sess.run(
            [self.boxes, self.scores, self.classes],
            feed_dict={
                self.yolo_model.input: image_data,
                self.input_image_shape: [image.size[1], image.size[0]],
                K.learning_phase(): 0
            })

        print('Found {} boxes for {}'.format(len(out_boxes), 'img'))
        # 设置字体
        font = ImageFont.truetype(font='font/simhei.ttf',
                                  size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32'))
        thickness = (image.size[0] + image.size[1]) // 300

        small_pic = []

        for i, c in list(enumerate(out_classes)):
            predicted_class = self.class_names[c]
            box = out_boxes[i]
            score = out_scores[i]

            top, left, bottom, right = box
            top = top - 5
            left = left - 5
            bottom = bottom + 5
            right = right + 5

            top = max(0, np.floor(top + 0.5).astype('int32'))
            left = max(0, np.floor(left + 0.5).astype('int32'))
            bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32'))
            right = min(image.size[0], np.floor(right + 0.5).astype('int32'))
            # print(top, left, bottom, right)
            # point_x = (right + left) / 2
            # point_y = (top + bottom) / 2
            # # 判断是否有人
            # if predicted_class == 'person':
            #     result = mysql.seat_select(point_x, point_y, classroom_id)
            #     if result.__len__() == 1:
            #         mysql.seat_update(result[0][0])
            #     elif result.__len__() >= 2:
            #         distance = 0.00
            #         r_id = 0
            #         for r in result:
            #             pic_x = (r[3] + r[4]) / 2
            #             pic_y = (r[1] + r[2]) / 2
            #             aa = round(math.sqrt(math.pow((pic_x - point_x)) + math.pow((pic_y - point_y))), 2)
            #             if aa > distance:
            #                 r_id = r[0]
            #                 distance = aa
            #         if r_id != 0:
            #             mysql.seat_update(r_id)

        # print('6' * 60)

            if predicted_class == 'person':
            # 画框
                label = '{} {:.2f}'.format(predicted_class, score)
                draw = ImageDraw.Draw(image)
                label_size = draw.textsize(label, font)
                label = label.encode('utf-8')
                print(label)

                if top - label_size[1] >= 0:
                    text_origin = np.array([left, top - label_size[1]])
                else:
                    text_origin = np.array([left, top + 1])

                for i in range(thickness):
                    draw.rectangle(
                         [left + i, top + i, right - i, bottom - i],
                         outline=self.colors[c])
                draw.rectangle(
                    [tuple(text_origin), tuple(text_origin + label_size)],
                    fill=self.colors[c])
                draw.text(text_origin, str(label, 'UTF-8'), fill=(0, 0, 0), font=font)
                del draw

        end = timer()
        print(end - start)
        return image
Beispiel #19
0
    def detect_image(self, image):
        image_shape = np.array(np.shape(image)[0:2])

        #---------------------------------------------------------#
        #   给图像增加灰条,实现不失真的resize
        #   也可以直接resize进行识别
        #---------------------------------------------------------#
        if self.letterbox_image:
            crop_img = np.array(letterbox_image(image, (self.model_image_size[1],self.model_image_size[0])))
        else:
            crop_img = image.convert('RGB')
            crop_img = crop_img.resize((self.model_image_size[1],self.model_image_size[0]), Image.BICUBIC)
        photo = np.array(crop_img,dtype = np.float32) / 255.0
        photo = np.transpose(photo, (2, 0, 1))
        #---------------------------------------------------------#
        #   添加上batch_size维度
        #---------------------------------------------------------#
        images = [photo]

        with torch.no_grad():
            images = torch.from_numpy(np.asarray(images))
            if self.cuda:
                images = images.cuda()

            #---------------------------------------------------------#
            #   将图像输入网络当中进行预测!
            #---------------------------------------------------------#
            outputs = self.net(images)
            output_list = []
            for i in range(3):
                output_list.append(self.yolo_decodes[i](outputs[i]))
                
            #---------------------------------------------------------#
            #   将预测框进行堆叠,然后进行非极大抑制
            #---------------------------------------------------------#
            output = torch.cat(output_list, 1)
            batch_detections = non_max_suppression(output, self.config["yolo"]["classes"],
                                                    conf_thres=self.confidence,
                                                    nms_thres=self.iou)
                                                    
            #---------------------------------------------------------#
            #   如果没有检测出物体,返回原图
            #---------------------------------------------------------#
            try :
                batch_detections = batch_detections[0].cpu().numpy()
            except:
                return image

            #---------------------------------------------------------#
            #   对预测框进行得分筛选
            #---------------------------------------------------------#
            top_index = batch_detections[:,4] * batch_detections[:,5] > self.confidence
            top_conf = batch_detections[top_index,4]*batch_detections[top_index,5]
            top_label = np.array(batch_detections[top_index,-1],np.int32)
            top_bboxes = np.array(batch_detections[top_index,:4])
            top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(top_bboxes[:,0],-1),np.expand_dims(top_bboxes[:,1],-1),np.expand_dims(top_bboxes[:,2],-1),np.expand_dims(top_bboxes[:,3],-1)

            #-----------------------------------------------------------------#
            #   在图像传入网络预测前会进行letterbox_image给图像周围添加灰条
            #   因此生成的top_bboxes是相对于有灰条的图像的
            #   我们需要对其进行修改,去除灰条的部分。
            #-----------------------------------------------------------------#
            if self.letterbox_image:
                boxes = yolo_correct_boxes(top_ymin,top_xmin,top_ymax,top_xmax,np.array([self.model_image_size[0],self.model_image_size[1]]),image_shape)
            else:
                top_xmin = top_xmin / self.model_image_size[1] * image_shape[1]
                top_ymin = top_ymin / self.model_image_size[0] * image_shape[0]
                top_xmax = top_xmax / self.model_image_size[1] * image_shape[1]
                top_ymax = top_ymax / self.model_image_size[0] * image_shape[0]
                boxes = np.concatenate([top_ymin,top_xmin,top_ymax,top_xmax], axis=-1)
                
        font = ImageFont.truetype(font='model_data/simhei.ttf',size=np.floor(3e-2 * np.shape(image)[1] + 0.5).astype('int32'))

        thickness = max((np.shape(image)[0] + np.shape(image)[1]) // self.model_image_size[0], 1)

        for i, c in enumerate(top_label):
            predicted_class = self.class_names[c]
            score = top_conf[i]

            top, left, bottom, right = boxes[i]
            top = top - 5
            left = left - 5
            bottom = bottom + 5
            right = right + 5

            top = max(0, np.floor(top + 0.5).astype('int32'))
            left = max(0, np.floor(left + 0.5).astype('int32'))
            bottom = min(np.shape(image)[0], np.floor(bottom + 0.5).astype('int32'))
            right = min(np.shape(image)[1], np.floor(right + 0.5).astype('int32'))

            # 画框框
            label = '{} {:.2f}'.format(predicted_class, score)
            draw = ImageDraw.Draw(image)
            label_size = draw.textsize(label, font)
            label = label.encode('utf-8')
            print(label, top, left, bottom, right)
            
            if top - label_size[1] >= 0:
                text_origin = np.array([left, top - label_size[1]])
            else:
                text_origin = np.array([left, top + 1])

            for i in range(thickness):
                draw.rectangle(
                    [left + i, top + i, right - i, bottom - i],
                    outline=self.colors[self.class_names.index(predicted_class)])
            draw.rectangle(
                [tuple(text_origin), tuple(text_origin + label_size)],
                fill=self.colors[self.class_names.index(predicted_class)])
            draw.text(text_origin, str(label,'UTF-8'), fill=(0, 0, 0), font=font)
            del draw
        return image
Beispiel #20
0
    def detect_image(self, image):
        '''检测图片

        Parameters
        ----------
            image: Image, 
        Returns
        -------
            image: 

        '''
        start = timer()

        if self.model_image_size != (None, None):
            assert self.model_image_size[
                0] % 32 == 0, 'Multiples of 32 required'
            assert self.model_image_size[
                1] % 32 == 0, 'Multiples of 32 required'
            boxed_image = letterbox_image(image,
                                          tuple(reversed(
                                              self.model_image_size)))  # 填充图像
        else:
            new_image_size = (image.width - (image.width % 32),
                              image.height - (image.height % 32))
            # <PIL.Image.Image image mode=RGB size=416*416 at 0x.. >
            boxed_image = letterbox_image(image, new_image_size)

        # 调整图片使其符合输入要求
        # new_image_size = (image.width - (image.width % 32),
        #                     image.height - (image.height % 32))
        # boxed_image = letterbox_image(image, new_image_size)

        # image_date: array, shape=(416,416,3)
        image_data = np.array(boxed_image, dtype='float32')
        image_data /= 255.

        # image_data: array, shape=(1, 414, 414, 3)
        image_data = np.expand_dims(image_data, 0)  # Add batch dimension.

        # 预测结果:参数盒子、得分、类别;输入图像0~1,4维;原始图像的尺寸
        # out_boxes:array,shape=(n, 4), n为输出图片中方框数量
        # out_scores:array, shape=(n,), n同上
        # out_classes:array, shape=(n,), n同上
        out_boxes, out_scores, out_classes = self.sess.run(
            [self.boxes, self.scores, self.classes],
            feed_dict={
                self.yolo_model.input: image_data,
                self.input_image_shape: [image.size[1], image.size[0]],
                K.learning_phase(): 0
            })

        # 输出检测出的框
        print('Found {} boxes for {}'.format(len(out_boxes), 'img'))
        # 设置字体
        font = ImageFont.truetype(font='font/simhei.ttf',
                                  size=np.floor(3e-2 * image.size[1] +
                                                0.5).astype('int32'))
        thickness = (image.size[0] + image.size[1]) // 300

        # small_pic=[]
        # c为类别标号,如c=6,表示为car
        # i表示图片中第几个方框,如: i=0,c=6  表示第1个方框为car,predicted_class='car'
        for i, c in list(enumerate(out_classes)):
            predicted_class = self.class_names[c]
            box = out_boxes[i]
            score = out_scores[i]

            top, left, bottom, right = box
            top = top - 5
            left = left - 5
            bottom = bottom + 5
            right = right + 5

            top = max(0, np.floor(top + 0.5).astype('int32'))
            left = max(0, np.floor(left + 0.5).astype('int32'))
            bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32'))
            right = min(image.size[0], np.floor(right + 0.5).astype('int32'))

            # 画框框
            label = '{} {:.2f}'.format(predicted_class, score)
            draw = ImageDraw.Draw(image)
            label_size = draw.textsize(label, font)
            label = label.encode('utf-8')
            print(label)

            if top - label_size[1] >= 0:
                text_origin = np.array([left, top - label_size[1]])
            else:
                text_origin = np.array([left, top + 1])

            for i in range(thickness):
                draw.rectangle([left + i, top + i, right - i, bottom - i],
                               outline=self.colors[c])
            draw.rectangle(
                [tuple(text_origin),
                 tuple(text_origin + label_size)],
                fill=self.colors[c])
            draw.text(text_origin,
                      str(label, 'UTF-8'),
                      fill=(0, 0, 0),
                      font=font)
            del draw

        end = timer()
        # 检测执行时间
        print("检测执行时间:" + str(end - start))
        return image
    def detect_image(self, image_id, image):
        self.confidence = 0.001
        f = open("./input/detection-results/" + image_id + ".txt", "w")
        image_shape = np.array(np.shape(image)[0:2])

        crop_img = np.array(
            letterbox_image(
                image, (self.model_image_size[0], self.model_image_size[1])))
        photo = np.array(crop_img, dtype=np.float32)
        photo /= 255.0
        photo = np.transpose(photo, (2, 0, 1))
        photo = photo.astype(np.float32)
        images = []
        images.append(photo)

        images = np.asarray(images)
        images = torch.from_numpy(images)
        if self.cuda:
            images = images.cuda()

        with torch.no_grad():
            outputs = self.net(images)
            output_list = []
            for i in range(3):
                output_list.append(self.yolo_decodes[i](outputs[i]))
            output = torch.cat(output_list, 1)
            batch_detections = non_max_suppression(
                output,
                self.config["yolo"]["classes"],
                conf_thres=self.confidence,
                nms_thres=0.3)
        try:
            batch_detections = batch_detections[0].cpu().numpy()
        except:
            return image
        top_index = batch_detections[:,
                                     4] * batch_detections[:,
                                                           5] > self.confidence
        top_conf = batch_detections[top_index, 4] * batch_detections[top_index,
                                                                     5]
        top_label = np.array(batch_detections[top_index, -1], np.int32)
        top_bboxes = np.array(batch_detections[top_index, :4])
        top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(
            top_bboxes[:, 0],
            -1), np.expand_dims(top_bboxes[:, 1], -1), np.expand_dims(
                top_bboxes[:, 2], -1), np.expand_dims(top_bboxes[:, 3], -1)

        # 去掉灰条
        boxes = yolo_correct_boxes(
            top_ymin, top_xmin, top_ymax, top_xmax,
            np.array([self.model_image_size[0], self.model_image_size[1]]),
            image_shape)

        for i, c in enumerate(top_label):
            predicted_class = self.class_names[c]
            score = str(top_conf[i])

            top, left, bottom, right = boxes[i]
            f.write("%s %s %s %s %s %s\n" %
                    (predicted_class, score[:6], str(int(left)), str(
                        int(top)), str(int(right)), str(int(bottom))))

        f.close()
        return
Beispiel #22
0
    def detect_image(self, image):
        image_shape = np.array(np.shape(image)[0:2])
        #---------------------------------------------------------#
        #   给图像增加灰条,实现不失真的resize
        #---------------------------------------------------------#
        crop_img = letterbox_image(image,
                                   [self.input_shape[0], self.input_shape[1]])
        #----------------------------------------------------------------------------------#
        #   将RGB转化成BGR,这是因为原始的centernet_hourglass权值是使用BGR通道的图片训练的
        #----------------------------------------------------------------------------------#
        photo = np.array(crop_img, dtype=np.float32)[:, :, ::-1]
        photo = np.reshape(
            preprocess_image(photo),
            [1, self.input_shape[0], self.input_shape[1], self.input_shape[2]])

        preds = self.centernet.predict(photo)
        #--------------------------------------------------------------------------#
        #   对于centernet网络来讲,确立中心非常重要。
        #   对于大目标而言,会存在许多的局部信息。
        #   此时对于同一个大目标,中心点比较难以确定。
        #   使用最大池化的非极大抑制方法无法去除局部框
        #   所以我还是写了另外一段对框进行非极大抑制的代码
        #   实际测试中,hourglass为主干网络时有无额外的nms相差不大,resnet相差较大。
        #---------------------------------------------------------------------------#
        if self.nms:
            preds = np.array(nms(preds, self.nms_threhold))

        if len(preds[0]) <= 0:
            return image

        #-----------------------------------------------------------#
        #   将预测结果转换成小数的形式
        #-----------------------------------------------------------#
        preds[0][:, 0:4] = preds[0][:, 0:4] / (self.input_shape[0] / 4)

        det_label = preds[0][:, -1]
        det_conf = preds[0][:, -2]
        det_xmin, det_ymin, det_xmax, det_ymax = preds[0][:, 0], preds[
            0][:, 1], preds[0][:, 2], preds[0][:, 3]
        #-----------------------------------------------------------#
        #   筛选出其中得分高于confidence的框
        #-----------------------------------------------------------#
        top_indices = [
            i for i, conf in enumerate(det_conf) if conf >= self.confidence
        ]
        top_conf = det_conf[top_indices]
        top_label_indices = det_label[top_indices].tolist()
        top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(
            det_xmin[top_indices],
            -1), np.expand_dims(det_ymin[top_indices], -1), np.expand_dims(
                det_xmax[top_indices],
                -1), np.expand_dims(det_ymax[top_indices], -1)

        #-----------------------------------------------------------#
        #   去掉灰条部分
        #-----------------------------------------------------------#
        boxes = centernet_correct_boxes(
            top_ymin, top_xmin, top_ymax, top_xmax,
            np.array([self.input_shape[0], self.input_shape[1]]), image_shape)

        font = ImageFont.truetype(font='model_data/simhei.ttf',
                                  size=np.floor(3e-2 * np.shape(image)[1] +
                                                0.5).astype('int32'))

        thickness = max(
            (np.shape(image)[0] + np.shape(image)[1]) // self.input_shape[0],
            1)

        for i, c in enumerate(top_label_indices):
            predicted_class = self.class_names[int(c)]
            score = top_conf[i]

            top, left, bottom, right = boxes[i]
            top = top - 5
            left = left - 5
            bottom = bottom + 5
            right = right + 5

            top = max(0, np.floor(top + 0.5).astype('int32'))
            left = max(0, np.floor(left + 0.5).astype('int32'))
            bottom = min(
                np.shape(image)[0],
                np.floor(bottom + 0.5).astype('int32'))
            right = min(
                np.shape(image)[1],
                np.floor(right + 0.5).astype('int32'))

            # 画框框
            label = '{} {:.2f}'.format(predicted_class, score)
            draw = ImageDraw.Draw(image)
            label_size = draw.textsize(label, font)
            label = label.encode('utf-8')
            print(label, top, left, bottom, right)

            if top - label_size[1] >= 0:
                text_origin = np.array([left, top - label_size[1]])
            else:
                text_origin = np.array([left, top + 1])

            for i in range(thickness):
                draw.rectangle([left + i, top + i, right - i, bottom - i],
                               outline=self.colors[int(c)])
            draw.rectangle(
                [tuple(text_origin),
                 tuple(text_origin + label_size)],
                fill=self.colors[int(c)])
            draw.text(text_origin,
                      str(label, 'UTF-8'),
                      fill=(0, 0, 0),
                      font=font)
            del draw
        return image
Beispiel #23
0
    def get_FPS(self, image, test_interval):
        # 调整图片使其符合输入要求
        image_shape = np.array(np.shape(image)[0:2])
        crop_img = letterbox_image(
            image, [self.model_image_size[0], self.model_image_size[1]])
        photo = np.array(crop_img, dtype=np.float32)
        # 图片预处理,归一化
        photo = np.reshape(preprocess_input(photo), [
            1, self.model_image_size[0], self.model_image_size[1],
            self.model_image_size[2]
        ])
        preds = self.get_pred(photo)
        preds = [pred.numpy() for pred in preds]
        # 将预测结果进行解码
        results = self.bbox_util.detection_out(
            preds, self.prior, confidence_threshold=self.confidence)
        if len(results[0]) > 0:
            results = np.array(results)
            # 筛选出其中得分高于confidence的框
            det_label = results[0][:, 5]
            det_conf = results[0][:, 4]
            det_xmin, det_ymin, det_xmax, det_ymax = results[0][:, 0], results[
                0][:, 1], results[0][:, 2], results[0][:, 3]
            top_indices = [
                i for i, conf in enumerate(det_conf) if conf >= self.confidence
            ]
            top_conf = det_conf[top_indices]
            top_label_indices = det_label[top_indices].tolist()
            top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(
                det_xmin[top_indices],
                -1), np.expand_dims(det_ymin[top_indices], -1), np.expand_dims(
                    det_xmax[top_indices],
                    -1), np.expand_dims(det_ymax[top_indices], -1)
            # 去掉灰条
            boxes = efficientdet_correct_boxes(
                top_ymin, top_xmin, top_ymax, top_xmax,
                np.array([self.model_image_size[0], self.model_image_size[1]]),
                image_shape)

        t1 = time.time()
        for _ in range(test_interval):
            preds = self.get_pred(photo)
            preds = [pred.numpy() for pred in preds]
            # 将预测结果进行解码
            results = self.bbox_util.detection_out(
                preds, self.prior, confidence_threshold=self.confidence)
            if len(results[0]) > 0:
                results = np.array(results)
                # 筛选出其中得分高于confidence的框
                det_label = results[0][:, 5]
                det_conf = results[0][:, 4]
                det_xmin, det_ymin, det_xmax, det_ymax = results[
                    0][:, 0], results[0][:, 1], results[0][:, 2], results[0][:,
                                                                             3]
                top_indices = [
                    i for i, conf in enumerate(det_conf)
                    if conf >= self.confidence
                ]
                top_conf = det_conf[top_indices]
                top_label_indices = det_label[top_indices].tolist()
                top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(
                    det_xmin[top_indices], -1), np.expand_dims(
                        det_ymin[top_indices], -1), np.expand_dims(
                            det_xmax[top_indices],
                            -1), np.expand_dims(det_ymax[top_indices], -1)
                # 去掉灰条
                boxes = efficientdet_correct_boxes(
                    top_ymin, top_xmin, top_ymax, top_xmax,
                    np.array(
                        [self.model_image_size[0], self.model_image_size[1]]),
                    image_shape)

        t2 = time.time()
        tact_time = (t2 - t1) / test_interval
        return tact_time
    def get_FPS(self, image, test_interval):
        # 调整图片使其符合输入要求
        image_shape = np.array(np.shape(image)[0:2])
        #---------------------------------------------------------#
        #   给图像增加灰条,实现不失真的resize
        #   也可以直接resize进行识别
        #---------------------------------------------------------#
        if self.letterbox_image:
            crop_img = np.array(
                letterbox_image(image,
                                (self.input_shape[1], self.input_shape[0])))
        else:
            crop_img = image.convert('RGB')
            crop_img = crop_img.resize(
                (self.input_shape[1], self.input_shape[0]), Image.BICUBIC)
        photo = np.array(crop_img, dtype=np.float64)

        photo = preprocess_input(
            np.reshape(photo,
                       [1, self.input_shape[0], self.input_shape[1], 3]))
        preds = self.ssd_model.predict(photo)

        results = self.bbox_util.detection_out(
            preds, confidence_threshold=self.confidence)
        if len(results[0]) > 0:
            det_label = results[0][:, 0]
            det_conf = results[0][:, 1]
            det_xmin, det_ymin, det_xmax, det_ymax = results[0][:, 2], results[
                0][:, 3], results[0][:, 4], results[0][:, 5]
            top_indices = [
                i for i, conf in enumerate(det_conf) if conf >= self.confidence
            ]
            top_conf = det_conf[top_indices]
            top_label_indices = det_label[top_indices].tolist()
            top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(
                det_xmin[top_indices],
                -1), np.expand_dims(det_ymin[top_indices], -1), np.expand_dims(
                    det_xmax[top_indices],
                    -1), np.expand_dims(det_ymax[top_indices], -1)

            #-----------------------------------------------------------#
            #   去掉灰条部分
            #-----------------------------------------------------------#
            if self.letterbox_image:
                boxes = ssd_correct_boxes(
                    top_ymin, top_xmin, top_ymax, top_xmax,
                    np.array([self.input_shape[0], self.input_shape[1]]),
                    image_shape)
            else:
                top_xmin = top_xmin * image_shape[1]
                top_ymin = top_ymin * image_shape[0]
                top_xmax = top_xmax * image_shape[1]
                top_ymax = top_ymax * image_shape[0]
                boxes = np.concatenate(
                    [top_ymin, top_xmin, top_ymax, top_xmax], axis=-1)

        t1 = time.time()
        for _ in range(test_interval):
            preds = self.ssd_model.predict(photo)
            results = self.bbox_util.detection_out(
                preds, confidence_threshold=self.confidence)
            if len(results[0]) > 0:
                det_label = results[0][:, 0]
                det_conf = results[0][:, 1]
                det_xmin, det_ymin, det_xmax, det_ymax = results[
                    0][:, 2], results[0][:, 3], results[0][:, 4], results[0][:,
                                                                             5]
                top_indices = [
                    i for i, conf in enumerate(det_conf)
                    if conf >= self.confidence
                ]
                top_conf = det_conf[top_indices]
                top_label_indices = det_label[top_indices].tolist()
                top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(
                    det_xmin[top_indices], -1), np.expand_dims(
                        det_ymin[top_indices], -1), np.expand_dims(
                            det_xmax[top_indices],
                            -1), np.expand_dims(det_ymax[top_indices], -1)
                #-----------------------------------------------------------#
                #   去掉灰条部分
                #-----------------------------------------------------------#
                if self.letterbox_image:
                    boxes = ssd_correct_boxes(
                        top_ymin, top_xmin, top_ymax, top_xmax,
                        np.array([self.input_shape[0], self.input_shape[1]]),
                        image_shape)
                else:
                    top_xmin = top_xmin * image_shape[1]
                    top_ymin = top_ymin * image_shape[0]
                    top_xmax = top_xmax * image_shape[1]
                    top_ymax = top_ymax * image_shape[0]
                    boxes = np.concatenate(
                        [top_ymin, top_xmin, top_ymax, top_xmax], axis=-1)

        t2 = time.time()
        tact_time = (t2 - t1) / test_interval
        return tact_time
Beispiel #25
0
    def get_FPS(self, image, test_interval):
        image = np.array(image, np.float32)
        im_height, im_width, _ = np.shape(image)

        scale = [
            np.shape(image)[1],
            np.shape(image)[0],
            np.shape(image)[1],
            np.shape(image)[0]
        ]
        scale_for_landmarks = [
            np.shape(image)[1],
            np.shape(image)[0],
            np.shape(image)[1],
            np.shape(image)[0],
            np.shape(image)[1],
            np.shape(image)[0],
            np.shape(image)[1],
            np.shape(image)[0],
            np.shape(image)[1],
            np.shape(image)[0]
        ]
        #---------------------------------------------------------#
        #   letterbox_image可以给图像增加灰条,实现不失真的resize
        #---------------------------------------------------------#
        if self.letterbox_image:
            image = letterbox_image(image,
                                    [self.input_shape[1], self.input_shape[0]])
        else:
            self.anchors = Anchors(self.cfg,
                                   image_size=(im_height,
                                               im_width)).get_anchors()

        photo = np.expand_dims(preprocess_input(image), 0)
        preds = self.retinaface.predict(photo)
        results = self.bbox_util.detection_out(
            preds, self.anchors, confidence_threshold=self.confidence)

        if len(results) > 0:
            results = np.array(results)
            #---------------------------------------------------------#
            #   如果使用了letterbox_image的话,要把灰条的部分去除掉。
            #---------------------------------------------------------#
            if self.letterbox_image:
                results = retinaface_correct_boxes(
                    results,
                    np.array([self.input_shape[0], self.input_shape[1]]),
                    np.array([im_height, im_width]))

            results[:, :4] = results[:, :4] * scale
            results[:, 5:] = results[:, 5:] * scale_for_landmarks

        t1 = time.time()
        for _ in range(test_interval):
            preds = self.retinaface.predict(photo)
            results = self.bbox_util.detection_out(
                preds, self.anchors, confidence_threshold=self.confidence)

            if len(results) > 0:
                results = np.array(results)
                #---------------------------------------------------------#
                #   如果使用了letterbox_image的话,要把灰条的部分去除掉。
                #---------------------------------------------------------#
                if self.letterbox_image:
                    results = retinaface_correct_boxes(
                        results,
                        np.array([self.input_shape[0], self.input_shape[1]]),
                        np.array([im_height, im_width]))

                results[:, :4] = results[:, :4] * scale
                results[:, 5:] = results[:, 5:] * scale_for_landmarks
        t2 = time.time()
        tact_time = (t2 - t1) / test_interval
        return tact_time
Beispiel #26
0
    def get_bbox(self, image):
        image_shape = np.array(np.shape(image)[0:2])

        crop_img = np.array(
            letterbox_image(
                image, (self.model_image_size[0], self.model_image_size[1])))
        photo = np.array(crop_img, dtype=np.float32)
        photo /= 255.0
        photo = np.transpose(photo, (2, 0, 1))
        photo = photo.astype(np.float32)
        images = []
        images.append(photo)

        images = np.asarray(images)
        images = torch.from_numpy(images)
        if self.cuda:
            images = images.cuda()

        with torch.no_grad():
            outputs = self.net(images)
            output_list = []
            for i in range(3):
                output_list.append(self.yolo_decodes[i](outputs[i]))
            output = torch.cat(output_list, 1)
            batch_detections = non_max_suppression(
                output,
                self.config["yolo"]["classes"],
                conf_thres=self.confidence,
                nms_thres=0.3)
        try:
            batch_detections = batch_detections[0].cpu().numpy()
        except:
            return [None]
        #print(batch_detections)
        top_index = batch_detections[:,
                                     4] * batch_detections[:,
                                                           5] > self.confidence
        top_conf = batch_detections[top_index, 4] * batch_detections[top_index,
                                                                     5]
        top_label = np.array(batch_detections[top_index, -1], np.int32)
        top_bboxes = np.array(batch_detections[top_index, :4])
        top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(
            top_bboxes[:, 0],
            -1), np.expand_dims(top_bboxes[:, 1], -1), np.expand_dims(
                top_bboxes[:, 2], -1), np.expand_dims(top_bboxes[:, 3], -1)
        boxes = yolo_correct_boxes(
            top_ymin, top_xmin, top_ymax, top_xmax,
            np.array([self.model_image_size[0], self.model_image_size[1]]),
            image_shape)

        bboxes = []
        for i, c in enumerate(top_label):

            top, left, bottom, right = boxes[i]

            top = top - 5
            left = left - 5
            bottom = bottom + 5
            right = right + 5

            top = max(0, np.floor(top + 0.5).astype('int32'))
            left = max(0, np.floor(left + 0.5).astype('int32'))
            bottom = min(
                np.shape(image)[0],
                np.floor(bottom + 0.5).astype('int32'))
            right = min(
                np.shape(image)[1],
                np.floor(right + 0.5).astype('int32'))

            score = top_conf[i]
            box = [left, top, right, bottom, score, c]
            bboxes.append(box)

        return bboxes
    def detect_image(self, image_id, image):
        self.confidence = 0.01
        self.iou = 0.5
        f = open("./input/detection-results/" + image_id + ".txt", "w")
        image_shape = np.array(np.shape(image)[0:2])
        #---------------------------------------------------------#
        #   给图像增加灰条,实现不失真的resize
        #---------------------------------------------------------#
        crop_img = np.array(
            letterbox_image(image,
                            (image_sizes[self.phi], image_sizes[self.phi])))
        photo = np.array(crop_img, dtype=np.float32)
        photo = np.transpose(preprocess_input(photo), (2, 0, 1))

        with torch.no_grad():
            images = torch.from_numpy(np.asarray([photo]))
            if self.cuda:
                images = images.cuda()

            #---------------------------------------------------------#
            #   传入网络当中进行预测
            #---------------------------------------------------------#
            _, regression, classification, anchors = self.net(images)

            #-----------------------------------------------------------#
            #   将预测结果进行解码
            #-----------------------------------------------------------#
            regression = decodebox(regression, anchors, images)
            detection = torch.cat([regression, classification], axis=-1)
            batch_detections = non_max_suppression(detection,
                                                   len(self.class_names),
                                                   conf_thres=self.confidence,
                                                   nms_thres=self.iou)
            #--------------------------------------#
            #   如果没有检测到物体,则返回原图
            #--------------------------------------#
            try:
                batch_detections = batch_detections[0].cpu().numpy()
            except:
                return

            #-----------------------------------------------------------#
            #   筛选出其中得分高于confidence的框
            #-----------------------------------------------------------#
            top_index = batch_detections[:, 4] > self.confidence
            top_conf = batch_detections[top_index, 4]
            top_label = np.array(batch_detections[top_index, -1], np.int32)
            top_bboxes = np.array(batch_detections[top_index, :4])
            top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(
                top_bboxes[:, 0],
                -1), np.expand_dims(top_bboxes[:, 1], -1), np.expand_dims(
                    top_bboxes[:, 2],
                    -1), np.expand_dims(top_bboxes[:, 3], -1)

            #-----------------------------------------------------------#
            #   去掉灰条部分
            #-----------------------------------------------------------#
            boxes = efficientdet_correct_boxes(
                top_ymin, top_xmin, top_ymax, top_xmax,
                np.array([image_sizes[self.phi], image_sizes[self.phi]]),
                image_shape)

        for i, c in enumerate(top_label):
            predicted_class = self.class_names[c]
            score = str(top_conf[i])

            top, left, bottom, right = boxes[i]
            f.write("%s %s %s %s %s %s\n" %
                    (predicted_class, score[:6], str(int(left)), str(
                        int(top)), str(int(right)), str(int(bottom))))

        f.close()
        return
Beispiel #28
0
    def detect_image(self, image_id, image, results):
        self.confidence = 0.01
        self.nms_threhold = 0.5

        image_shape = np.array(np.shape(image)[0:2])
        crop_img = letterbox_image(image,
                                   [self.input_shape[0], self.input_shape[1]])
        # 将RGB转化成BGR,这是因为原始的centernet_hourglass权值是使用BGR通道的图片训练的
        photo = np.array(crop_img, dtype=np.float32)[:, :, ::-1]

        # 图片预处理,归一化
        photo = np.reshape(
            preprocess_image(photo),
            [1, self.input_shape[0], self.input_shape[1], self.input_shape[2]])
        preds = self.centernet.predict(photo)

        if self.nms:
            preds = np.array(nms(preds, self.nms_threhold))

        if len(preds[0]) <= 0:
            return results

        preds[0][:, 0:4] = preds[0][:, 0:4] / (self.input_shape[0] / 4)

        # 筛选出其中得分高于confidence的框
        det_label = preds[0][:, -1]
        det_conf = preds[0][:, -2]
        det_xmin, det_ymin, det_xmax, det_ymax = preds[0][:, 0], preds[
            0][:, 1], preds[0][:, 2], preds[0][:, 3]

        top_indices = [
            i for i, conf in enumerate(det_conf) if conf >= self.confidence
        ]
        top_conf = det_conf[top_indices]
        top_label_indices = det_label[top_indices].tolist()
        top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(
            det_xmin[top_indices],
            -1), np.expand_dims(det_ymin[top_indices], -1), np.expand_dims(
                det_xmax[top_indices],
                -1), np.expand_dims(det_ymax[top_indices], -1)

        # 去掉灰条
        boxes = centernet_correct_boxes(
            top_ymin, top_xmin, top_ymax, top_xmax,
            np.array([self.input_shape[0], self.input_shape[1]]), image_shape)

        for i, c in enumerate(top_label_indices):
            result = {}
            predicted_class = self.class_names[int(c)]
            top, left, bottom, right = boxes[i]

            top = max(0, np.floor(top + 0.5).astype('int32'))
            left = max(0, np.floor(left + 0.5).astype('int32'))
            bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32'))
            right = min(image.size[0], np.floor(right + 0.5).astype('int32'))

            result["image_id"] = int(image_id)
            result["category_id"] = clsid2catid[c]
            result["bbox"] = [
                float(left),
                float(top),
                float(right - left),
                float(bottom - top)
            ]
            result["score"] = float(top_conf[i])
            results.append(result)

        return results
Beispiel #29
0
    def detect_image(self, image):
        image_shape = np.array(np.shape(image)[0:2])

        crop_img, x_offset, y_offset = letterbox_image(
            image, [self.model_image_size[0], self.model_image_size[1]])
        photo = np.array(crop_img, dtype=np.float64)

        # 图片预处理,归一化
        photo = preprocess_input(
            np.reshape(photo, [
                1, self.model_image_size[0], self.model_image_size[1],
                self.model_image_size[2]
            ]))
        preds = self.m2det.predict(photo)
        # 将预测结果进行解码
        results = self.bbox_util.detection_out(
            preds, self.prior, confidence_threshold=self.confidence)
        if len(results[0]) <= 0:
            return image

        # 筛选出其中得分高于confidence的框
        det_label = results[0][:, 0]
        det_conf = results[0][:, 1]
        det_xmin, det_ymin, det_xmax, det_ymax = results[0][:, 2], results[
            0][:, 3], results[0][:, 4], results[0][:, 5]

        top_indices = [
            i for i, conf in enumerate(det_conf) if conf >= self.confidence
        ]
        top_conf = det_conf[top_indices]
        top_label_indices = det_label[top_indices].tolist()
        top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(
            det_xmin[top_indices],
            -1), np.expand_dims(det_ymin[top_indices], -1), np.expand_dims(
                det_xmax[top_indices],
                -1), np.expand_dims(det_ymax[top_indices], -1)

        # 去掉灰条
        boxes = m2det_correct_boxes(
            top_ymin, top_xmin, top_ymax, top_xmax,
            np.array([self.model_image_size[0], self.model_image_size[1]]),
            image_shape)

        font = ImageFont.truetype(font='model_data/simhei.ttf',
                                  size=np.floor(3e-2 * np.shape(image)[1] +
                                                0.5).astype('int32'))

        thickness = (np.shape(image)[0] +
                     np.shape(image)[1]) // self.model_image_size[0]

        for i, c in enumerate(top_label_indices):
            predicted_class = self.class_names[int(c - 1)]
            score = top_conf[i]

            top, left, bottom, right = boxes[i]
            top = top - 5
            left = left - 5
            bottom = bottom + 5
            right = right + 5

            top = max(0, np.floor(top + 0.5).astype('int32'))
            left = max(0, np.floor(left + 0.5).astype('int32'))
            bottom = min(
                np.shape(image)[0],
                np.floor(bottom + 0.5).astype('int32'))
            right = min(
                np.shape(image)[1],
                np.floor(right + 0.5).astype('int32'))

            # 画框框
            label = '{} {:.2f}'.format(predicted_class, score)
            draw = ImageDraw.Draw(image)
            label_size = draw.textsize(label, font)
            label = label.encode('utf-8')
            print(label)

            if top - label_size[1] >= 0:
                text_origin = np.array([left, top - label_size[1]])
            else:
                text_origin = np.array([left, top + 1])

            for i in range(thickness):
                draw.rectangle([left + i, top + i, right - i, bottom - i],
                               outline=self.colors[int(c - 1)])
            draw.rectangle(
                [tuple(text_origin),
                 tuple(text_origin + label_size)],
                fill=self.colors[int(c - 1)])
            draw.text(text_origin,
                      str(label, 'UTF-8'),
                      fill=(0, 0, 0),
                      font=font)
            del draw
        return image
def detect_image(sess, image):
    start = timer()

    image_shape = (image.size[1], image.size[0], 3)

    model_image_size[0] % 32 == 0, 'Multiples of 32 required'
    model_image_size[1] % 32 == 0, 'Multiples of 32 required'
    boxed_image = letterbox_image(image, tuple(reversed(model_image_size)))

    image_data = np.array(boxed_image, dtype='float32')
    image_data /= 255.
    image_data = np.expand_dims(image_data, 0)

    #print(image.size)
    #print(image_shape)
    #print(image_data.shape)

    outs = tf_out(sess, image_data)

    out_boxes, out_classes, out_scores = yolo_out(outs, image_shape)

    print(model_image_size)
    if not out_boxes is None:
        print('Found {} boxes for {}'.format(len(out_boxes), 'img'))

        #print(out_boxes)

        font = ImageFont.truetype(font='font/FiraMono-Medium.otf',
                                  size=np.floor(3e-2 * image.size[1] +
                                                0.5).astype('int32'))
        thickness = (image.size[0] + image.size[1]) // 300

        for i, c in reversed(list(enumerate(out_classes))):
            predicted_class = class_names[c]
            box = out_boxes[i]
            score = out_scores[i]

            label = '{} {:.2f}'.format(predicted_class, score)
            draw = ImageDraw.Draw(image)
            label_size = draw.textsize(label, font)

            x, y, w, h = box
            top = max(0, np.floor(y + 0.5).astype('int32'))
            left = max(0, np.floor(x + 0.5).astype('int32'))
            bottom = min(image.size[1], np.floor(y + h + 0.5).astype('int32'))
            right = min(image.size[0], np.floor(x + w + 0.5).astype('int32'))
            print(label, (left, top), (right, bottom),
                  ((right - left), (bottom - top)))

            if top - label_size[1] >= 0:
                text_origin = np.array([left, top - label_size[1]])
            else:
                text_origin = np.array([left, top + 1])

            # My kingdom for a good redistributable image drawing library.
            for i in range(thickness):
                draw.rectangle([left + i, top + i, right - i, bottom - i],
                               outline=colors[c])
            draw.rectangle(
                [tuple(text_origin),
                 tuple(text_origin + label_size)],
                fill=colors[c])
            draw.text(text_origin, label, fill=(0, 0, 0), font=font)
            del draw

    else:
        print('No Boxes')

    end = timer()
    print(end - start)
    return image