Ejemplo n.º 1
0
    def detect_image(self, image_id, image):
        self.confidence = 0.01
        self.iou = 0.45
        f = open("./input/detection-results/" + image_id + ".txt", "w")

        image_shape = np.array(np.shape(image)[0:2])
        old_width, old_height = image_shape[1], image_shape[0]
        old_image = copy.deepcopy(image)

        width, height = get_new_img_size(old_width, old_height)
        image = image.resize([width, height], Image.BICUBIC)

        #-----------------------------------------------------------#
        #   图片预处理,归一化。
        #-----------------------------------------------------------#
        photo = np.transpose(
            np.array(image, dtype=np.float32) / 255, (2, 0, 1))

        with torch.no_grad():
            images = torch.from_numpy(np.asarray([photo]))
            if self.cuda:
                images = images.cuda()

            roi_cls_locs, roi_scores, rois, _ = self.model(images)

            #-------------------------------------------------------------#
            #   利用classifier的预测结果对建议框进行解码,获得预测框
            #-------------------------------------------------------------#
            outputs = self.decodebox.forward(roi_cls_locs[0],
                                             roi_scores[0],
                                             rois,
                                             height=height,
                                             width=width,
                                             nms_iou=self.iou,
                                             score_thresh=self.confidence)
            #---------------------------------------------------------#
            #   如果没有检测出物体,返回原图
            #---------------------------------------------------------#
            if len(outputs) == 0:
                return old_image
            outputs = np.array(outputs)
            bbox = outputs[:, :4]
            label = outputs[:, 4]
            conf = outputs[:, 5]

            bbox[:, 0::2] = (bbox[:, 0::2]) / width * old_width
            bbox[:, 1::2] = (bbox[:, 1::2]) / height * old_height

        for i, c in enumerate(label):
            predicted_class = self.class_names[int(c)]
            score = str(conf[i])

            left, top, right, bottom = bbox[i]
            f.write("%s %s %s %s %s %s\n" %
                    (predicted_class, score[:6], str(int(left)), str(
                        int(top)), str(int(right)), str(int(bottom))))

        f.close()
        return
Ejemplo n.º 2
0
    def get_FPS(self, image, test_interval):
        image_shape = np.array(np.shape(image)[0:2])
        old_width, old_height = image_shape[1], image_shape[0]
        old_image = copy.deepcopy(image)

        #---------------------------------------------------------#
        #   给原图像进行resize,resize到短边为600的大小上
        #---------------------------------------------------------#
        width, height = get_new_img_size(old_width, old_height)
        image = image.resize([width, height], Image.BICUBIC)

        #-----------------------------------------------------------#
        #   图片预处理,归一化。
        #-----------------------------------------------------------#
        photo = np.transpose(
            np.array(image, dtype=np.float32) / 255, (2, 0, 1))

        with torch.no_grad():
            images = torch.from_numpy(np.asarray([photo]))
            if self.cuda:
                images = images.cuda()

            roi_cls_locs, roi_scores, rois, _ = self.model(images)
            #-------------------------------------------------------------#
            #   利用classifier的预测结果对建议框进行解码,获得预测框
            #-------------------------------------------------------------#
            outputs = self.decodebox.forward(roi_cls_locs[0],
                                             roi_scores[0],
                                             rois,
                                             height=height,
                                             width=width,
                                             nms_iou=self.iou,
                                             score_thresh=self.confidence)
            #---------------------------------------------------------#
            #   如果没有检测出物体,返回原图
            #---------------------------------------------------------#
            if len(outputs) > 0:
                outputs = np.array(outputs)
                bbox = outputs[:, :4]
                label = outputs[:, 4]
                conf = outputs[:, 5]

                bbox[:, 0::2] = (bbox[:, 0::2]) / width * old_width
                bbox[:, 1::2] = (bbox[:, 1::2]) / height * old_height

        t1 = time.time()
        for _ in range(test_interval):
            with torch.no_grad():
                roi_cls_locs, roi_scores, rois, _ = self.model(images)
                #-------------------------------------------------------------#
                #   利用classifier的预测结果对建议框进行解码,获得预测框
                #-------------------------------------------------------------#
                outputs = self.decodebox.forward(roi_cls_locs[0],
                                                 roi_scores[0],
                                                 rois,
                                                 height=height,
                                                 width=width,
                                                 nms_iou=self.iou,
                                                 score_thresh=self.confidence)
                #---------------------------------------------------------#
                #   如果没有检测出物体,返回原图
                #---------------------------------------------------------#
                if len(outputs) > 0:
                    outputs = np.array(outputs)
                    bbox = outputs[:, :4]
                    label = outputs[:, 4]
                    conf = outputs[:, 5]

                    bbox[:, 0::2] = (bbox[:, 0::2]) / width * old_width
                    bbox[:, 1::2] = (bbox[:, 1::2]) / height * old_height

        t2 = time.time()
        tact_time = (t2 - t1) / test_interval
        return tact_time
Ejemplo n.º 3
0
    def detect_image(self, image):
        image_shape = np.array(np.shape(image)[0:2])
        old_width, old_height = image_shape[1], image_shape[0]
        old_image = copy.deepcopy(image)

        #---------------------------------------------------------#
        #   给原图像进行resize,resize到短边为600的大小上
        #---------------------------------------------------------#
        width, height = get_new_img_size(old_width, old_height)
        image = image.resize([width, height], Image.BICUBIC)

        #-----------------------------------------------------------#
        #   图片预处理,归一化。
        #-----------------------------------------------------------#
        photo = np.transpose(
            np.array(image, dtype=np.float32) / 255, (2, 0, 1))

        with torch.no_grad():
            images = torch.from_numpy(np.asarray([photo]))
            if self.cuda:
                images = images.cuda()

            roi_cls_locs, roi_scores, rois, _ = self.model(images)
            #-------------------------------------------------------------#
            #   利用classifier的预测结果对建议框进行解码,获得预测框
            #-------------------------------------------------------------#
            outputs = self.decodebox.forward(roi_cls_locs[0],
                                             roi_scores[0],
                                             rois,
                                             height=height,
                                             width=width,
                                             nms_iou=self.iou,
                                             score_thresh=self.confidence)
            #---------------------------------------------------------#
            #   如果没有检测出物体,返回原图
            #---------------------------------------------------------#
            if len(outputs) == 0:
                return old_image
            outputs = np.array(outputs)
            bbox = outputs[:, :4]
            label = outputs[:, 4]
            conf = outputs[:, 5]

            bbox[:, 0::2] = (bbox[:, 0::2]) / width * old_width
            bbox[:, 1::2] = (bbox[:, 1::2]) / height * old_height

        font = ImageFont.truetype(font='model_data/simhei.ttf',
                                  size=np.floor(3e-2 * np.shape(image)[1] +
                                                0.5).astype('int32'))

        thickness = max(
            (np.shape(old_image)[0] + np.shape(old_image)[1]) // old_width * 2,
            1)

        image = old_image
        for i, c in enumerate(label):
            predicted_class = self.class_names[int(c)]
            score = conf[i]

            left, top, right, bottom = bbox[i]
            top = top - 5
            left = left - 5
            bottom = bottom + 5
            right = right + 5

            top = max(0, np.floor(top + 0.5).astype('int32'))
            left = max(0, np.floor(left + 0.5).astype('int32'))
            bottom = min(
                np.shape(image)[0],
                np.floor(bottom + 0.5).astype('int32'))
            right = min(
                np.shape(image)[1],
                np.floor(right + 0.5).astype('int32'))

            # 画框框
            label = '{} {:.2f}'.format(predicted_class, score)
            draw = ImageDraw.Draw(image)
            label_size = draw.textsize(label, font)
            label = label.encode('utf-8')
            print(label, top, left, bottom, right)

            if top - label_size[1] >= 0:
                text_origin = np.array([left, top - label_size[1]])
            else:
                text_origin = np.array([left, top + 1])

            for i in range(thickness):
                draw.rectangle([left + i, top + i, right - i, bottom - i],
                               outline=self.colors[int(c)])
            draw.rectangle(
                [tuple(text_origin),
                 tuple(text_origin + label_size)],
                fill=self.colors[int(c)])
            draw.text(text_origin,
                      str(label, 'UTF-8'),
                      fill=(0, 0, 0),
                      font=font)
            del draw

        return image
Ejemplo n.º 4
0
    def get_map_txt(self, image_id, image, class_names, map_out_path):
        f = open(
            os.path.join(map_out_path,
                         "detection-results/" + image_id + ".txt"), "w")
        #---------------------------------------------------#
        #   计算输入图片的高和宽
        #---------------------------------------------------#
        image_shape = np.array(np.shape(image)[0:2])
        input_shape = get_new_img_size(image_shape[0], image_shape[1])
        #---------------------------------------------------------#
        #   在这里将图像转换成RGB图像,防止灰度图在预测时报错。
        #   代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB
        #---------------------------------------------------------#
        image = cvtColor(image)
        #---------------------------------------------------------#
        #   给原图像进行resize,resize到短边为600的大小上
        #---------------------------------------------------------#
        image_data = resize_image(image, [input_shape[1], input_shape[0]])
        #---------------------------------------------------------#
        #   添加上batch_size维度
        #---------------------------------------------------------#
        image_data = np.expand_dims(
            preprocess_input(np.array(image_data, dtype='float32')), 0)

        #---------------------------------------------------------#
        #   获得rpn网络预测结果和base_layer
        #---------------------------------------------------------#
        rpn_pred = self.model_rpn(image_data)
        rpn_pred = [x.numpy() for x in rpn_pred]
        #---------------------------------------------------------#
        #   生成先验框并解码
        #---------------------------------------------------------#
        anchors = get_anchors(input_shape, self.backbone, self.anchors_size)
        rpn_results = self.bbox_util.detection_out_rpn(rpn_pred, anchors)

        #-------------------------------------------------------------#
        #   利用建议框获得classifier网络预测结果
        #-------------------------------------------------------------#
        classifier_pred = self.model_classifier(
            [rpn_pred[2], rpn_results[:, :, [1, 0, 3, 2]]])
        classifier_pred = [x.numpy() for x in classifier_pred]
        #-------------------------------------------------------------#
        #   利用classifier的预测结果对建议框进行解码,获得预测框
        #-------------------------------------------------------------#
        results = self.bbox_util.detection_out_classifier(
            classifier_pred, rpn_results, image_shape, input_shape,
            self.confidence)

        #--------------------------------------#
        #   如果没有检测到物体,则返回原图
        #--------------------------------------#
        if len(results[0]) <= 0:
            return

        top_label = np.array(results[0][:, 5], dtype='int32')
        top_conf = results[0][:, 4]
        top_boxes = results[0][:, :4]

        for i, c in list(enumerate(top_label)):
            predicted_class = self.class_names[int(c)]
            box = top_boxes[i]
            score = str(top_conf[i])

            top, left, bottom, right = box

            if predicted_class not in class_names:
                continue

            f.write("%s %s %s %s %s %s\n" %
                    (predicted_class, score[:6], str(int(left)), str(
                        int(top)), str(int(right)), str(int(bottom))))

        f.close()
        return
Ejemplo n.º 5
0
    def get_FPS(self, image, test_interval):
        #---------------------------------------------------#
        #   计算输入图片的高和宽
        #---------------------------------------------------#
        image_shape = np.array(np.shape(image)[0:2])
        input_shape = get_new_img_size(image_shape[0], image_shape[1])
        #---------------------------------------------------------#
        #   在这里将图像转换成RGB图像,防止灰度图在预测时报错。
        #   代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB
        #---------------------------------------------------------#
        image = cvtColor(image)
        #---------------------------------------------------------#
        #   给原图像进行resize,resize到短边为600的大小上
        #---------------------------------------------------------#
        image_data = resize_image(image, [input_shape[1], input_shape[0]])
        #---------------------------------------------------------#
        #   添加上batch_size维度
        #---------------------------------------------------------#
        image_data = np.expand_dims(
            preprocess_input(np.array(image_data, dtype='float32')), 0)

        #---------------------------------------------------------#
        #   获得rpn网络预测结果和base_layer
        #---------------------------------------------------------#
        rpn_pred = self.model_rpn(image_data)
        rpn_pred = [x.numpy() for x in rpn_pred]
        #---------------------------------------------------------#
        #   生成先验框并解码
        #---------------------------------------------------------#
        anchors = get_anchors(input_shape, self.backbone, self.anchors_size)
        rpn_results = self.bbox_util.detection_out_rpn(rpn_pred, anchors)

        #-------------------------------------------------------------#
        #   利用建议框获得classifier网络预测结果
        #-------------------------------------------------------------#
        classifier_pred = self.model_classifier(
            [rpn_pred[2], rpn_results[:, :, [1, 0, 3, 2]]])
        classifier_pred = [x.numpy() for x in classifier_pred]
        #-------------------------------------------------------------#
        #   利用classifier的预测结果对建议框进行解码,获得预测框
        #-------------------------------------------------------------#
        results = self.bbox_util.detection_out_classifier(
            classifier_pred, rpn_results, image_shape, input_shape,
            self.confidence)

        t1 = time.time()
        for _ in range(test_interval):
            #---------------------------------------------------------#
            #   获得rpn网络预测结果和base_layer
            #---------------------------------------------------------#
            rpn_pred = self.model_rpn(image_data)
            rpn_pred = [x.numpy() for x in rpn_pred]
            #---------------------------------------------------------#
            #   生成先验框并解码
            #---------------------------------------------------------#
            anchors = get_anchors(input_shape, self.backbone,
                                  self.anchors_size)
            rpn_results = self.bbox_util.detection_out_rpn(rpn_pred, anchors)
            temp_ROIs = rpn_results[:, :, [1, 0, 3, 2]]

            #-------------------------------------------------------------#
            #   利用建议框获得classifier网络预测结果
            #-------------------------------------------------------------#
            classifier_pred = self.model_classifier([rpn_pred[2], temp_ROIs])
            classifier_pred = [x.numpy() for x in classifier_pred]
            #-------------------------------------------------------------#
            #   利用classifier的预测结果对建议框进行解码,获得预测框
            #-------------------------------------------------------------#
            results = self.bbox_util.detection_out_classifier(
                classifier_pred, rpn_results, image_shape, input_shape,
                self.confidence)

        t2 = time.time()
        tact_time = (t2 - t1) / test_interval
        return tact_time
Ejemplo n.º 6
0
    def detect_image(self, image, crop=False):
        #---------------------------------------------------#
        #   计算输入图片的高和宽
        #---------------------------------------------------#
        image_shape = np.array(np.shape(image)[0:2])
        #---------------------------------------------------#
        #   计算输入到网络中进行运算的图片的高和宽
        #   保证短边是600的
        #---------------------------------------------------#
        input_shape = get_new_img_size(image_shape[0], image_shape[1])
        #---------------------------------------------------------#
        #   在这里将图像转换成RGB图像,防止灰度图在预测时报错。
        #   代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB
        #---------------------------------------------------------#
        image = cvtColor(image)
        #---------------------------------------------------------#
        #   给原图像进行resize,resize到短边为600的大小上
        #---------------------------------------------------------#
        image_data = resize_image(image, [input_shape[1], input_shape[0]])
        #---------------------------------------------------------#
        #   添加上batch_size维度
        #---------------------------------------------------------#
        image_data = np.expand_dims(
            preprocess_input(np.array(image_data, dtype='float32')), 0)

        #---------------------------------------------------------#
        #   获得rpn网络预测结果和base_layer
        #---------------------------------------------------------#
        rpn_pred = self.model_rpn(image_data)
        rpn_pred = [x.numpy() for x in rpn_pred]
        #---------------------------------------------------------#
        #   生成先验框并解码
        #---------------------------------------------------------#
        anchors = get_anchors(input_shape, self.backbone, self.anchors_size)
        rpn_results = self.bbox_util.detection_out_rpn(rpn_pred, anchors)

        #-------------------------------------------------------------#
        #   利用建议框获得classifier网络预测结果
        #-------------------------------------------------------------#
        classifier_pred = self.model_classifier(
            [rpn_pred[2], rpn_results[:, :, [1, 0, 3, 2]]])
        classifier_pred = [x.numpy() for x in classifier_pred]
        #-------------------------------------------------------------#
        #   利用classifier的预测结果对建议框进行解码,获得预测框
        #-------------------------------------------------------------#
        results = self.bbox_util.detection_out_classifier(
            classifier_pred, rpn_results, image_shape, input_shape,
            self.confidence)

        if len(results[0]) == 0:
            return image

        top_label = np.array(results[0][:, 5], dtype='int32')
        top_conf = results[0][:, 4]
        top_boxes = results[0][:, :4]
        #---------------------------------------------------------#
        #   设置字体与边框厚度
        #---------------------------------------------------------#
        font = ImageFont.truetype(font='model_data/simhei.ttf',
                                  size=np.floor(3e-2 * np.shape(image)[1] +
                                                0.5).astype('int32'))
        thickness = max(
            (np.shape(image)[0] + np.shape(image)[1]) // input_shape[0], 1)

        #---------------------------------------------------------#
        #   是否进行目标的裁剪
        #---------------------------------------------------------#
        if crop:
            for i, c in list(enumerate(top_label)):
                top, left, bottom, right = top_boxes[i]
                top = max(0, np.floor(top).astype('int32'))
                left = max(0, np.floor(left).astype('int32'))
                bottom = min(image.size[1], np.floor(bottom).astype('int32'))
                right = min(image.size[0], np.floor(right).astype('int32'))

                dir_save_path = "img_crop"
                if not os.path.exists(dir_save_path):
                    os.makedirs(dir_save_path)
                crop_image = image.crop([left, top, right, bottom])
                crop_image.save(os.path.join(dir_save_path,
                                             "crop_" + str(i) + ".png"),
                                quality=95,
                                subsampling=0)
                print("save crop_" + str(i) + ".png to " + dir_save_path)
        #---------------------------------------------------------#
        #   图像绘制
        #---------------------------------------------------------#
        for i, c in list(enumerate(top_label)):
            predicted_class = self.class_names[int(c)]
            box = top_boxes[i]
            score = top_conf[i]

            top, left, bottom, right = box

            top = max(0, np.floor(top).astype('int32'))
            left = max(0, np.floor(left).astype('int32'))
            bottom = min(image.size[1], np.floor(bottom).astype('int32'))
            right = min(image.size[0], np.floor(right).astype('int32'))

            label = '{} {:.2f}'.format(predicted_class, score)
            draw = ImageDraw.Draw(image)
            label_size = draw.textsize(label, font)
            label = label.encode('utf-8')
            print(label, top, left, bottom, right)

            if top - label_size[1] >= 0:
                text_origin = np.array([left, top - label_size[1]])
            else:
                text_origin = np.array([left, top + 1])

            for i in range(thickness):
                draw.rectangle([left + i, top + i, right - i, bottom - i],
                               outline=self.colors[c])
            draw.rectangle(
                [tuple(text_origin),
                 tuple(text_origin + label_size)],
                fill=self.colors[c])
            draw.text(text_origin,
                      str(label, 'UTF-8'),
                      fill=(0, 0, 0),
                      font=font)
            del draw

        return image
Ejemplo n.º 7
0
    def detect_image(self, image):
        with torch.no_grad():
            start_time = time.time()
            image_shape = np.array(np.shape(image)[0:2])
            old_width = image_shape[1]
            old_height = image_shape[0]
            old_image = copy.deepcopy(image)
            width, height = get_new_img_size(old_width, old_height)
            image = image.resize([width, height], Image.BICUBIC)
            photo = np.array(image, dtype=np.float32) / 255
            photo = np.transpose(photo, (2, 0, 1))

            images = []
            images.append(photo)
            images = np.asarray(images)
            images = torch.from_numpy(images)
            if self.cuda:
                images = images.cuda()
            roi_cls_locs, roi_scores, rois, roi_indices = self.model(images)
            decodebox = DecodeBox(self.std, self.mean, self.num_classes)
            outputs = decodebox.forward(roi_cls_locs, roi_scores, rois, height=height, width=width, nms_iou=self.iou,
                                        score_thresh=self.confidence)
            if len(outputs) == 0:
                return old_image
            bbox = outputs[:, :4]
            conf = outputs[:, 4]
            label = outputs[:, 5]

            bbox[:, 0::2] = (bbox[:, 0::2]) / width * old_width
            bbox[:, 1::2] = (bbox[:, 1::2]) / height * old_height
            bbox = np.array(bbox, np.int32)
        image = old_image
        thickness = (np.shape(old_image)[0] + np.shape(old_image)[1]) // old_width * 2
        font = ImageFont.truetype(font='model_data/simhei.ttf',
                                  size=np.floor(3e-2 * np.shape(image)[1] + 0.5).astype('int32'))
        for i, c in enumerate(label):
            predicted_class = self.class_names[int(c)]
            score = conf[i]

            left, top, right, bottom = bbox[i]
            top = top - 5
            left = left - 5
            bottom = bottom + 5
            right = right + 5

            top = max(0, np.floor(top + 0.5).astype('int32'))
            left = max(0, np.floor(left + 0.5).astype('int32'))
            bottom = min(np.shape(image)[0], np.floor(bottom + 0.5).astype('int32'))
            right = min(np.shape(image)[1], np.floor(right + 0.5).astype('int32'))

            # 画框框
            label = '{} {:.2f}'.format(predicted_class, score)
            draw = ImageDraw.Draw(image)
            label_size = draw.textsize(label, font)
            label = label.encode('utf-8')
            print(label)

            if top - label_size[1] >= 0:
                text_origin = np.array([left, top - label_size[1]])
            else:
                text_origin = np.array([left, top + 1])

            for i in range(thickness):
                draw.rectangle(
                    [left + i, top + i, right - i, bottom - i],
                    outline=self.colors[int(c)])
            draw.rectangle(
                [tuple(text_origin), tuple(text_origin + label_size)],
                fill=self.colors[int(c)])
            draw.text(text_origin, str(label, 'UTF-8'), fill=(0, 0, 0), font=font)
            del draw

        print("time:", time.time() - start_time)
        return image