Ejemplo n.º 1
    def get_map_txt(self, image_id, image, class_names, map_out_path):
        f = open(os.path.join(map_out_path, "detection-results/"+image_id+".txt"),"w") 
        #   在这里将图像转换成RGB图像,防止灰度图在预测时报错。
        image       = cvtColor(image)
        #   给图像增加灰条,实现不失真的resize
        #   也可以直接resize进行识别
        image_data  = resize_image(image, (self.input_shape[1],self.input_shape[0]), self.letterbox_image)
        #   添加上batch_size维度,并进行归一化
        image_data  = np.expand_dims(preprocess_input(np.array(image_data, dtype='float32')), 0)

        out_boxes, out_scores, out_classes = self.sess.run(
            [self.boxes, self.scores, self.classes],
                self.yolo_model.input: image_data,
                self.input_image_shape: [image.size[1], image.size[0]],
                K.learning_phase(): 0

        for i, c in enumerate(out_classes):
            predicted_class             = self.class_names[int(c)]
            score                       = str(out_scores[i])
            top, left, bottom, right    = out_boxes[i]
            if predicted_class not in class_names:

            f.write("%s %s %s %s %s %s\n" % (predicted_class, score[:6], str(int(left)), str(int(top)), str(int(right)),str(int(bottom))))

Ejemplo n.º 2
    def get_FPS(self, image, test_interval):
        image_shape = np.array(np.shape(image)[0:2])
        #   在这里将图像转换成RGB图像,防止灰度图在预测时报错。
        #   代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB
        image       = cvtColor(image)
        #   给图像增加灰条,实现不失真的resize
        #   也可以直接resize进行识别
        image_data = resize_image(image, (self.input_shape[1], self.input_shape[0]), self.letterbox_image)
        #   添加上batch_size维度,图片预处理,归一化。
        image_data = preprocess_input(np.expand_dims(np.array(image_data, dtype='float32'), 0))

        preds       = self.get_pred(image_data).numpy()
        #   将预测结果进行解码
        results     = self.bbox_util.decode_box(preds, self.anchors, image_shape, 
                                                self.input_shape, self.letterbox_image, confidence=self.confidence)
        t1 = time.time()
        for _ in range(test_interval):
            preds       = self.get_pred(image_data).numpy()
            #   将预测结果进行解码
            results     = self.bbox_util.decode_box(preds, self.anchors, image_shape, 
                                                    self.input_shape, self.letterbox_image, confidence=self.confidence)
        t2 = time.time()
        tact_time = (t2 - t1) / test_interval
        return tact_time
Ejemplo n.º 3
    def get_FPS(self, image, test_interval):
        #   在这里将图像转换成RGB图像,防止灰度图在预测时报错。
        #   代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB
        image       = cvtColor(image)
        #   给图像增加灰条,实现不失真的resize
        #   也可以直接resize进行识别
        image_data  = resize_image(image, (self.input_shape[1], self.input_shape[0]), self.letterbox_image)
        #   添加上batch_size维度,并进行归一化
        image_data  = np.expand_dims(preprocess_input(np.array(image_data, dtype='float32')), 0)
        #   将图像输入网络当中进行预测!
        input_image_shape = np.expand_dims(np.array([image.size[1], image.size[0]], dtype='float32'), 0)
        out_boxes, out_scores, out_classes = self.get_pred(image_data, input_image_shape) 

        t1 = time.time()
        for _ in range(test_interval):
            out_boxes, out_scores, out_classes = self.get_pred(image_data, input_image_shape) 
        t2 = time.time()
        tact_time = (t2 - t1) / test_interval
        return tact_time
Ejemplo n.º 4
    def get_FPS(self, image, test_interval):
        #   在这里将图像转换成RGB图像,防止灰度图在预测时报错。
        #   代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB
        image = cvtColor(image)
        #   给图像增加灰条,实现不失真的resize
        #   也可以直接resize进行识别
        image_data, nw, nh = resize_image(
            image, (self.input_shape[1], self.input_shape[0]))
        #   添加上batch_size维度
        image_data = np.expand_dims(
            np.transpose(preprocess_input(np.array(image_data, np.float32)),
                         (2, 0, 1)), 0)

        with torch.no_grad():
            images = torch.from_numpy(image_data)
            if self.cuda:
                images = images.cuda()

            #   图片传入网络进行预测
            pr = self.net(images)[0]
            #   取出每一个像素点的种类
            pr = F.softmax(pr.permute(1, 2, 0),
            #   将灰条部分截取掉
            pr = pr[int((self.input_shape[0] - nh) // 2) : int((self.input_shape[0] - nh) // 2 + nh), \
                    int((self.input_shape[1] - nw) // 2) : int((self.input_shape[1] - nw) // 2 + nw)]

        t1 = time.time()
        for _ in range(test_interval):
            with torch.no_grad():
                #   图片传入网络进行预测
                pr = self.net(images)[0]
                #   取出每一个像素点的种类
                pr = F.softmax(pr.permute(1, 2, 0),
                #   将灰条部分截取掉
                pr = pr[int((self.input_shape[0] - nh) // 2) : int((self.input_shape[0] - nh) // 2 + nh), \
                        int((self.input_shape[1] - nw) // 2) : int((self.input_shape[1] - nw) // 2 + nw)]
        t2 = time.time()
        tact_time = (t2 - t1) / test_interval
        return tact_time
Ejemplo n.º 5
    def get_map_txt(self, image_id, image, class_names, map_out_path):
        f = open(
                         "detection-results/" + image_id + ".txt"), "w")
        image_shape = np.array(np.shape(image)[0:2])
        #   在这里将图像转换成RGB图像,防止灰度图在预测时报错。
        #   代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB
        image = cvtColor(image)
        #   给图像增加灰条,实现不失真的resize
        #   也可以直接resize进行识别
        image_data = resize_image(image,
                                  (self.input_shape[1], self.input_shape[0]),
        #   添加上batch_size维度,图片预处理,归一化。
        image_data = preprocess_input(
            np.expand_dims(np.array(image_data, dtype='float32'), 0))

        preds = self.m2det.predict(image_data)
        #   将预测结果进行解码
        results = self.bbox_util.decode_box(preds,
        #   如果没有检测到物体,则返回原图
        if len(results[0]) <= 0:

        top_label = results[0][:, 4]
        top_conf = results[0][:, 5]
        top_boxes = results[0][:, :4]

        for i, c in list(enumerate(top_label)):
            predicted_class = self.class_names[int(c)]
            box = top_boxes[i]
            score = str(top_conf[i])

            top, left, bottom, right = box

            if predicted_class not in class_names:

            f.write("%s %s %s %s %s %s\n" %
                    (predicted_class, score[:6], str(int(left)), str(
                        int(top)), str(int(right)), str(int(bottom))))

Ejemplo n.º 6
    def get_FPS(self, image, test_interval):
        #   获得输入图片的高和宽
        image_shape = np.array(np.shape(image)[0:2])
        #   在这里将图像转换成RGB图像,防止灰度图在预测时报错。
        #   代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB
        image = cvtColor(image)
        #   给图像增加灰条,实现不失真的resize
        #   也可以直接resize进行识别
        image_data = resize_image(image,
                                  (self.input_shape[1], self.input_shape[0]),
        #   添加上batch_size维度,图片预处理,归一化。
        image_data = np.expand_dims(
            preprocess_input(np.array(image_data, dtype='float32')), 0)

        outputs = self.centernet.predict(image_data)
        #   centernet后处理的过程,包括门限判断和传统非极大抑制。
        #   对于centernet网络来讲,确立中心非常重要。对于大目标而言,会存在许多的局部信息。
        #   此时大目标中心点比较难以确定。使用最大池化的非极大抑制方法无法去除局部框
        #   这里面存在传统的nms处理方法,可以选择关闭和开启。
        #   实际测试中,hourglass为主干网络时有无额外的nms相差不大,resnet相差较大。
        results = self.bbox_util.postprocess(outputs,

        t1 = time.time()
        for _ in range(test_interval):
            outputs = self.centernet.predict(image_data)
            #   centernet后处理的过程,包括门限判断和传统非极大抑制。
            #   对于centernet网络来讲,确立中心非常重要。对于大目标而言,会存在许多的局部信息。
            #   此时大目标中心点比较难以确定。使用最大池化的非极大抑制方法无法去除局部框
            #   这里面存在传统的nms处理方法,可以选择关闭和开启。
            #   实际测试中,hourglass为主干网络时有无额外的nms相差不大,resnet相差较大。
            results = self.bbox_util.postprocess(outputs,

        t2 = time.time()
        tact_time = (t2 - t1) / test_interval
        return tact_time
Ejemplo n.º 7
    def get_map_txt(self, image_id, image, class_names, map_out_path):
        f = open(os.path.join(map_out_path, "detection-results/" + image_id + ".txt"), "w")
        # ---------------------------------------------------#
        #   获得输入图片的高和宽
        # ---------------------------------------------------#
        image_shape = np.array(np.shape(image)[0:2])
        # ---------------------------------------------------------#
        #   在这里将图像转换成RGB图像,防止灰度图在预测时报错。
        #   代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB
        # ---------------------------------------------------------#
        image = cvtColor(image)
        # ---------------------------------------------------------#
        #   给图像增加灰条,实现不失真的resize
        #   也可以直接resize进行识别
        # ---------------------------------------------------------#
        image_data = resize_image(image, (self.input_shape[1], self.input_shape[0]), self.letterbox_image)
        # ---------------------------------------------------------#
        #   添加上batch_size维度,图片预处理,归一化。
        # ---------------------------------------------------------#
        image_data = np.expand_dims(preprocess_input(np.array(image_data, dtype='float32')), 0)

        outputs = self.get_pred(image_data).numpy()
        # --------------------------------------------------------------------------------------------#
        #   centernet后处理的过程,包括门限判断和传统非极大抑制。
        #   对于centernet网络来讲,确立中心非常重要。对于大目标而言,会存在许多的局部信息。
        #   此时大目标中心点比较难以确定。使用最大池化的非极大抑制方法无法去除局部框
        #   这里面存在传统的nms处理方法,可以选择关闭和开启。
        #   实际测试中,hourglass为主干网络时有无额外的nms相差不大,resnet相差较大。
        # --------------------------------------------------------------------------------------------#
        results = self.bbox_util.postprocess(outputs, self.nms, image_shape, self.input_shape, self.letterbox_image,

        # --------------------------------------#
        #   如果没有检测到物体,则返回原图
        # --------------------------------------#
        if results[0] is None:

        top_label = np.array(results[0][:, 5], dtype='int32')
        top_conf = results[0][:, 4]
        top_boxes = results[0][:, :4]

        for i, c in list(enumerate(top_label)):
            predicted_class = self.class_names[int(c)]
            box = top_boxes[i]
            score = str(top_conf[i])

            top, left, bottom, right = box

            if predicted_class not in class_names:

            f.write("%s %s %s %s %s %s\n" % (
            predicted_class, score[:6], str(int(left)), str(int(top)), str(int(right)), str(int(bottom))))

Ejemplo n.º 8
    def detect_heatmap(self, image, heatmap_save_path):
        import cv2
        import matplotlib.pyplot as plt

        def sigmoid(x):
            y = 1.0 / (1.0 + np.exp(-x))
            return y

        #   在这里将图像转换成RGB图像,防止灰度图在预测时报错。
        #   代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB
        image = cvtColor(image)
        #   给图像增加灰条,实现不失真的resize
        #   也可以直接resize进行识别
        image_data = resize_image(image,
                                  (self.input_shape[1], self.input_shape[0]),
        #   添加上batch_size维度,并进行归一化
        image_data = np.expand_dims(
            preprocess_input(np.array(image_data, dtype='float32')), 0)

        output = self.yolo_model.predict(image_data)

        plt.imshow(image, alpha=1)
        mask = np.zeros((image.size[1], image.size[0]))
        for sub_output in output:
            b, h, w, c = np.shape(sub_output)
            sub_output = np.reshape(sub_output, [b, h, w, 3, -1])[0]
            score = np.max(sigmoid(sub_output[..., 4]), -1)
            score = cv2.resize(score, (image.size[0], image.size[1]))
            normed_score = (score * 255).astype('uint8')
            mask = np.maximum(mask, normed_score)

        plt.imshow(mask, alpha=0.5, interpolation='nearest', cmap="jet")

        plt.margins(0, 0)
        print("Save to the " + heatmap_save_path)
    def get_map_txt(self, image_id, image, class_names, map_out_path):
        f = open(os.path.join(map_out_path, "detection-results/"+image_id+".txt"),"w") 
        image_shape = np.array(np.shape(image)[0:2])
        #   在这里将图像转换成RGB图像,防止灰度图在预测时报错。
        #   代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB
        image       = cvtColor(image)
        #   给图像增加灰条,实现不失真的resize
        #   也可以直接resize进行识别
        image_data = resize_image(image, (self.input_shape[1], self.input_shape[0]), self.letterbox_image)
        #   添加上batch_size维度,图片预处理,归一化。
        image_data = np.expand_dims(np.transpose(preprocess_input(np.array(image_data, dtype='float32')), (2, 0, 1)), 0)

        with torch.no_grad():
            images = torch.from_numpy(image_data)
            if self.cuda:
                images = images.cuda()
            #   传入网络当中进行预测
            _, regression, classification, anchors = self.net(images)
            #   将预测结果进行解码
            outputs     = decodebox(regression, anchors, self.input_shape)
            results     = non_max_suppression(torch.cat([outputs, classification], axis=-1), self.input_shape, 
                                    image_shape, self.letterbox_image, conf_thres = self.confidence, nms_thres = self.nms_iou)
            if results[0] is None: 

            top_label   = np.array(results[0][:, 5], dtype = 'int32')
            top_conf    = results[0][:, 4]
            top_boxes   = results[0][:, :4]

        for i, c in list(enumerate(top_label)):
            predicted_class = self.class_names[int(c)]
            box             = top_boxes[i]
            score           = str(top_conf[i])

            top, left, bottom, right = box
            if predicted_class not in class_names:

            f.write("%s %s %s %s %s %s\n" % (predicted_class, score[:6], str(int(left)), str(int(top)), str(int(right)),str(int(bottom))))

Ejemplo n.º 10
    def get_FPS(self, image, test_interval):
        #   在这里将图像转换成RGB图像,防止灰度图在预测时报错。
        #   代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB
        image = cvtColor(image)
        #   给图像增加灰条,实现不失真的resize
        image_data, nw, nh = resize_image(
            image, (self.input_shape[1], self.input_shape[0]))
        #   归一化+添加上batch_size维度
        image_data = np.expand_dims(
            preprocess_input(np.array(image_data, np.float32)), 0)

        #   图片传入网络进行预测
        pr = self.model.predict(image_data)[0]
        #   将灰条部分截取掉
        pr = pr[int((self.input_shape[0] - nh) // 2) : int((self.input_shape[0] - nh) // 2 + nh), \
                int((self.input_shape[1] - nw) // 2) : int((self.input_shape[1] - nw) // 2 + nw)]
        #   取出每一个像素点的种类
        pr = pr.argmax(axis=-1).reshape(
            [self.input_shape[0], self.input_shape[1]])

        t1 = time.time()
        for _ in range(test_interval):
            #   图片传入网络进行预测
            pr = self.model.predict(image_data)[0]
            #   将灰条部分截取掉
            pr = pr[int((self.input_shape[0] - nh) // 2) : int((self.input_shape[0] - nh) // 2 + nh), \
                    int((self.input_shape[1] - nw) // 2) : int((self.input_shape[1] - nw) // 2 + nw)]
            #   取出每一个像素点的种类
            pr = pr.argmax(axis=-1).reshape(
                [self.input_shape[0], self.input_shape[1]])

        t2 = time.time()
        tact_time = (t2 - t1) / test_interval
        return tact_time
    def get_FPS(self, image, test_interval):
        image_shape = np.array(np.shape(image)[0:2])
        #   在这里将图像转换成RGB图像,防止灰度图在预测时报错。
        #   代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB
        image       = cvtColor(image)
        #   给图像增加灰条,实现不失真的resize
        #   也可以直接resize进行识别
        image_data = resize_image(image, (self.input_shape[1], self.input_shape[0]), self.letterbox_image)
        #   添加上batch_size维度,图片预处理,归一化。
        image_data = np.expand_dims(np.transpose(preprocess_input(np.array(image_data, dtype='float32')), (2, 0, 1)), 0)

        with torch.no_grad():
            images = torch.from_numpy(image_data)
            if self.cuda:
                images = images.cuda()
            #   传入网络当中进行预测
            _, regression, classification, anchors = self.net(images)
            #   将预测结果进行解码
            outputs     = decodebox(regression, anchors, self.input_shape)
            results     = non_max_suppression(torch.cat([outputs, classification], axis=-1), self.input_shape, 
                                    image_shape, self.letterbox_image, conf_thres = self.confidence, nms_thres = self.nms_iou)

        t1 = time.time()
        for _ in range(test_interval):
            with torch.no_grad():
                #   传入网络当中进行预测
                _, regression, classification, anchors = self.net(images)
                #   将预测结果进行解码
                outputs     = decodebox(regression, anchors, self.input_shape)
                results     = non_max_suppression(torch.cat([outputs, classification], axis=-1), self.input_shape, 
                                        image_shape, self.letterbox_image, conf_thres = self.confidence, nms_thres = self.nms_iou)

        t2 = time.time()
        tact_time = (t2 - t1) / test_interval
        return tact_time
Ejemplo n.º 12
    def get_miou_png(self, image):
        #   在这里将图像转换成RGB图像,防止灰度图在预测时报错。
        #   代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB
        image = cvtColor(image)
        orininal_h = np.array(image).shape[0]
        orininal_w = np.array(image).shape[1]
        #   给图像增加灰条,实现不失真的resize
        #   也可以直接resize进行识别
        image_data, nw, nh = resize_image(
            image, (self.input_shape[1], self.input_shape[0]))
        #   添加上batch_size维度
        image_data = np.expand_dims(
            np.transpose(preprocess_input(np.array(image_data, np.float32)),
                         (2, 0, 1)), 0)

        with torch.no_grad():
            images = torch.from_numpy(image_data)
            if self.cuda:
                images = images.cuda()

            #   图片传入网络进行预测
            pr = self.net(images)[0]
            #   取出每一个像素点的种类
            pr = F.softmax(pr.permute(1, 2, 0), dim=-1).cpu().numpy()
            #   将灰条部分截取掉
            pr = pr[int((self.input_shape[0] - nh) // 2) : int((self.input_shape[0] - nh) // 2 + nh), \
                    int((self.input_shape[1] - nw) // 2) : int((self.input_shape[1] - nw) // 2 + nw)]
            #   进行图片的resize
            pr = cv2.resize(pr, (orininal_w, orininal_h),
            #   取出每一个像素点的种类
            pr = pr.argmax(axis=-1)

        image = Image.fromarray(np.uint8(pr))
        return image
Ejemplo n.º 13
    def detect_heatmap(self, image, heatmap_save_path):
        import cv2
        import matplotlib.pyplot as plt
        def sigmoid(x):
            y = 1.0 / (1.0 + np.exp(-x))
            return y
        #   在这里将图像转换成RGB图像,防止灰度图在预测时报错。
        #   代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB
        image       = cvtColor(image)
        #   给图像增加灰条,实现不失真的resize
        #   也可以直接resize进行识别
        image_data  = resize_image(image, (self.input_shape[1],self.input_shape[0]), self.letterbox_image)
        #   添加上batch_size维度
        image_data  = np.expand_dims(np.transpose(preprocess_input(np.array(image_data, dtype='float32')), (2, 0, 1)), 0)

        with torch.no_grad():
            images = torch.from_numpy(image_data)
            if self.cuda:
                images = images.cuda()
            #   将图像输入网络当中进行预测!
            outputs = self.net(images)
        plt.imshow(image, alpha=1)
        mask    = np.zeros((image.size[1], image.size[0]))
        for sub_output in outputs:
            sub_output = sub_output.cpu().numpy()
            b, c, h, w = np.shape(sub_output)
            sub_output = np.transpose(np.reshape(sub_output, [b, 3, -1, h, w]), [0, 3, 4, 1, 2])[0]
            score      = np.max(sigmoid(sub_output[..., 4]), -1)
            score      = cv2.resize(score, (image.size[0], image.size[1]))
            normed_score    = (score * 255).astype('uint8')
            mask            = np.maximum(mask, normed_score)
        plt.imshow(mask, alpha=0.5, interpolation='nearest', cmap="jet")

        plt.subplots_adjust(top=1, bottom=0, right=1,  left=0, hspace=0, wspace=0)
        plt.margins(0, 0)
        plt.savefig(heatmap_save_path, dpi=200, bbox_inches='tight', pad_inches = -0.1)
        print("Save to the " + heatmap_save_path)
Ejemplo n.º 14
    def get_FPS(self, image, test_interval):
        image_shape = np.array(np.shape(image)[0:2])
        #   在这里将图像转换成RGB图像,防止灰度图在预测时报错。
        #   代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB
        image = cvtColor(image)
        #   给图像增加灰条,实现不失真的resize
        #   也可以直接resize进行识别
        image_data, image_metas, windows = resize_image([np.array(image)],
        #   根据当前输入图像的大小,生成先验框
        anchors = np.expand_dims(get_anchors(self.config, image_data[0].shape),
        #   将图像输入网络当中进行预测!
        detections, _, _, mrcnn_mask, _, _, _ = self.model.predict(
            [image_data, image_metas, anchors], verbose=0)

        #   上面获得的预测结果是相对于padding后的图片的
        #   我们需要将预测结果转换到原图上
        box_thre, class_thre, class_ids, masks_arg, masks_sigmoid = postprocess(
            detections[0], mrcnn_mask[0], image_shape, image_data[0].shape,

        t1 = time.time()
        for _ in range(test_interval):
            #   将图像输入网络当中进行预测!
            detections, _, _, mrcnn_mask, _, _, _ = self.model.predict(
                [image_data, image_metas, anchors], verbose=0)

            #   上面获得的预测结果是相对于padding后的图片的
            #   我们需要将预测结果转换到原图上
            box_thre, class_thre, class_ids, masks_arg, masks_sigmoid = postprocess(
                detections[0], mrcnn_mask[0], image_shape, image_data[0].shape,
        t2 = time.time()
        tact_time = (t2 - t1) / test_interval
        return tact_time
Ejemplo n.º 15
    def get_FPS(self, image, test_interval):
        image_shape = np.array(np.shape(image)[0:2])
        #   在这里将图像转换成RGB图像,防止灰度图在预测时报错。
        #   代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB
        image       = cvtColor(image)
        #   给图像增加灰条,实现不失真的resize
        #   也可以直接resize进行识别
        image_data  = resize_image(image, (self.input_shape[1],self.input_shape[0]), self.letterbox_image)
        #   添加上batch_size维度
        image_data  = np.expand_dims(np.transpose(preprocess_input(np.array(image_data, dtype='float32')), (2, 0, 1)), 0)

        with torch.no_grad():
            images = torch.from_numpy(image_data)
            if self.cuda:
                images = images.cuda()
            #   将图像输入网络当中进行预测!
            outputs = self.net(images)
            outputs = self.bbox_util.decode_box(outputs)
            #   将预测框进行堆叠,然后进行非极大抑制
            results = self.bbox_util.non_max_suppression(torch.cat(outputs, 1), self.num_classes, self.input_shape, 
                        image_shape, self.letterbox_image, conf_thres=self.confidence, nms_thres=self.nms_iou)
        t1 = time.time()
        for _ in range(test_interval):
            with torch.no_grad():
                #   将图像输入网络当中进行预测!
                outputs = self.net(images)
                outputs = self.bbox_util.decode_box(outputs)
                #   将预测框进行堆叠,然后进行非极大抑制
                results = self.bbox_util.non_max_suppression(torch.cat(outputs, 1), self.num_classes, self.input_shape, 
                            image_shape, self.letterbox_image, conf_thres=self.confidence, nms_thres=self.nms_iou)
        t2 = time.time()
        tact_time = (t2 - t1) / test_interval
        return tact_time
Ejemplo n.º 16
    def detect_image(self, image_id, image, results, clsid2catid):
        #   在这里将图像转换成RGB图像,防止灰度图在预测时报错。
        #   代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB
        image = cvtColor(image)
        #   给图像增加灰条,实现不失真的resize
        #   也可以直接resize进行识别
        image_data = resize_image(image,
                                  (self.input_shape[1], self.input_shape[0]),
        #   添加上batch_size维度,并进行归一化
        image_data = np.expand_dims(
            preprocess_input(np.array(image_data, dtype='float32')), 0)

        #   将图像输入网络当中进行预测!
        input_image_shape = np.expand_dims(
            np.array([image.size[1], image.size[0]], dtype='float32'), 0)
        out_boxes, out_scores, out_classes = self.yolo_model.predict(
            [image_data, input_image_shape])

        for i, c in enumerate(out_classes):
            result = {}
            top, left, bottom, right = out_boxes[i]

            result["image_id"] = int(image_id)
            result["category_id"] = clsid2catid[c]
            result["bbox"] = [
                float(right - left),
                float(bottom - top)
            result["score"] = float(out_scores[i])

        return results
Ejemplo n.º 17
    def detect_image(self, image):
        #   在这里将图像转换成RGB图像,防止灰度图在预测时报错。
        #   代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB
        image = cvtColor(image)
        #   对图片进行不失真的resize
        image_data = letterbox_image(
            image, [self.input_shape[1], self.input_shape[0]])
        #   归一化+添加上batch_size维度+转置
        image_data = np.transpose(
            np.expand_dims(preprocess_input(np.array(image_data, np.float32)),
                           0), (0, 3, 1, 2))

        with torch.no_grad():
            photo = torch.from_numpy(image_data)
            if self.cuda:
                photo = photo.cuda()
            #   图片传入网络进行预测
            preds = torch.softmax(self.model(photo)[0], dim=-1).cpu().numpy()
        #   获得所属种类
        class_name = self.class_names[np.argmax(preds)]
        probability = np.max(preds)

        #   绘图并写字
        plt.subplot(1, 1, 1)
        plt.title('Class:%s Probability:%.3f' % (class_name, probability))
        return class_name
Ejemplo n.º 18
    def get_miou_png(self, image):
        #   在这里将图像转换成RGB图像,防止灰度图在预测时报错。
        #   代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB
        image = cvtColor(image)
        orininal_h = np.array(image).shape[0]
        orininal_w = np.array(image).shape[1]
        #   给图像增加灰条,实现不失真的resize
        image_data, nw, nh = resize_image(
            image, (self.input_shape[1], self.input_shape[0]))
        #   归一化+添加上batch_size维度
        image_data = np.expand_dims(
            preprocess_input(np.array(image_data, np.float32)), 0)

        #   图片传入网络进行预测
        pr = self.model.predict(image_data)[0]
        #   将灰条部分截取掉
        pr = pr[int((self.input_shape[0] - nh) // 2) : int((self.input_shape[0] - nh) // 2 + nh), \
                int((self.input_shape[1] - nw) // 2) : int((self.input_shape[1] - nw) // 2 + nw)]
        #   进行图片的resize
        pr = cv2.resize(pr, (orininal_w, orininal_h),
        #   取出每一个像素点的种类
        pr = pr.argmax(axis=-1)

        image = Image.fromarray(np.uint8(pr))
        return image
Ejemplo n.º 19
    def get_map_out(self, image):
        image_shape = np.array(np.shape(image)[0:2])
        #   在这里将图像转换成RGB图像,防止灰度图在预测时报错。
        #   代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB
        image = cvtColor(image)
        #   给图像增加灰条,实现不失真的resize
        #   也可以直接resize进行识别
        image_data, image_metas, windows = resize_image([np.array(image)],
        #   根据当前输入图像的大小,生成先验框
        anchors = np.expand_dims(get_anchors(self.config, image_data[0].shape),
        #   将图像输入网络当中进行预测!
        detections, _, _, mrcnn_mask, _, _, _ = self.model.predict(
            [image_data, image_metas, anchors], verbose=0)

        #   上面获得的预测结果是相对于padding后的图片的
        #   我们需要将预测结果转换到原图上
        box_thre, class_thre, class_ids, masks_arg, masks_sigmoid = postprocess(
            detections[0], mrcnn_mask[0], image_shape, image_data[0].shape,

        outboxes = None
        if box_thre is not None:
            outboxes = np.zeros_like(box_thre)
            outboxes[:, [0, 2]] = box_thre[:, [1, 3]]
            outboxes[:, [1, 3]] = box_thre[:, [0, 2]]
        return outboxes, class_thre, class_ids, masks_arg, masks_sigmoid
Ejemplo n.º 20
    def get_random_data_with_Mosaic(self,
        h, w = input_shape
        min_offset_x = self.rand(0.25, 0.75)
        min_offset_y = self.rand(0.25, 0.75)

        nws = [
            int(w * self.rand(0.4, 1)),
            int(w * self.rand(0.4, 1)),
            int(w * self.rand(0.4, 1)),
            int(w * self.rand(0.4, 1))
        nhs = [
            int(h * self.rand(0.4, 1)),
            int(h * self.rand(0.4, 1)),
            int(h * self.rand(0.4, 1)),
            int(h * self.rand(0.4, 1))

        place_x = [
            int(w * min_offset_x) - nws[0],
            int(w * min_offset_x) - nws[1],
            int(w * min_offset_x),
            int(w * min_offset_x)
        place_y = [
            int(h * min_offset_y) - nhs[0],
            int(h * min_offset_y),
            int(h * min_offset_y),
            int(h * min_offset_y) - nhs[3]

        image_datas = []
        box_datas = []
        index = 0
        for line in annotation_line:
            # 每一行进行分割
            line_content = line.split()
            # 打开图片
            image = Image.open(line_content[0])
            image = cvtColor(image)

            # 图片的大小
            iw, ih = image.size
            # 保存框的位置
            box = np.array([
                np.array(list(map(int, box.split(','))))
                for box in line_content[1:]

            # 是否翻转图片
            flip = self.rand() < .5
            if flip and len(box) > 0:
                image = image.transpose(Image.FLIP_LEFT_RIGHT)
                box[:, [0, 2]] = iw - box[:, [2, 0]]

            nw = nws[index]
            nh = nhs[index]
            image = image.resize((nw, nh), Image.BICUBIC)

            # 将图片进行放置,分别对应四张分割图片的位置
            dx = place_x[index]
            dy = place_y[index]
            new_image = Image.new('RGB', (w, h), (128, 128, 128))
            new_image.paste(image, (dx, dy))
            image_data = np.array(new_image)

            index = index + 1
            box_data = []
            # 对box进行重新处理
            if len(box) > 0:
                box[:, [0, 2]] = box[:, [0, 2]] * nw / iw + dx
                box[:, [1, 3]] = box[:, [1, 3]] * nh / ih + dy
                box[:, 0:2][box[:, 0:2] < 0] = 0
                box[:, 2][box[:, 2] > w] = w
                box[:, 3][box[:, 3] > h] = h
                box_w = box[:, 2] - box[:, 0]
                box_h = box[:, 3] - box[:, 1]
                box = box[np.logical_and(box_w > 1, box_h > 1)]
                box_data = np.zeros((len(box), 5))
                box_data[:len(box)] = box


        # 将图片分割,放在一起
        cutx = int(w * min_offset_x)
        cuty = int(h * min_offset_y)

        new_image = np.zeros([h, w, 3])
        new_image[:cuty, :cutx, :] = image_datas[0][:cuty, :cutx, :]
        new_image[cuty:, :cutx, :] = image_datas[1][cuty:, :cutx, :]
        new_image[cuty:, cutx:, :] = image_datas[2][cuty:, cutx:, :]
        new_image[:cuty, cutx:, :] = image_datas[3][:cuty, cutx:, :]

        # 进行色域变换
        hue = self.rand(-hue, hue)
        sat = self.rand(1, sat) if self.rand() < .5 else 1 / self.rand(1, sat)
        val = self.rand(1, val) if self.rand() < .5 else 1 / self.rand(1, val)
        x = cv2.cvtColor(np.array(new_image / 255, np.float32),
        x[..., 0] += hue * 360
        x[..., 0][x[..., 0] > 1] -= 1
        x[..., 0][x[..., 0] < 0] += 1
        x[..., 1] *= sat
        x[..., 2] *= val
        x[x[:, :, 0] > 360, 0] = 360
        x[:, :, 1:][x[:, :, 1:] > 1] = 1
        x[x < 0] = 0
        new_image = cv2.cvtColor(x, cv2.COLOR_HSV2RGB) * 255

        # 对框进行进一步的处理
        new_boxes = self.merge_bboxes(box_datas, cutx, cuty)

        return new_image, new_boxes
Ejemplo n.º 21
    def get_map_txt(self, image_id, image, class_names, map_out_path):
        f = open(
                         "detection-results/" + image_id + ".txt"), "w")
        #   计算输入图片的高和宽
        image_shape = np.array(np.shape(image)[0:2])
        input_shape = get_new_img_size(image_shape[0], image_shape[1])
        #   在这里将图像转换成RGB图像,防止灰度图在预测时报错。
        #   代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB
        image = cvtColor(image)
        #   给原图像进行resize,resize到短边为600的大小上
        image_data = resize_image(image, [input_shape[1], input_shape[0]])
        #   添加上batch_size维度
        image_data = np.expand_dims(
            preprocess_input(np.array(image_data, dtype='float32')), 0)

        #   获得rpn网络预测结果和base_layer
        rpn_pred = self.model_rpn(image_data)
        rpn_pred = [x.numpy() for x in rpn_pred]
        #   生成先验框并解码
        anchors = get_anchors(input_shape, self.backbone, self.anchors_size)
        rpn_results = self.bbox_util.detection_out_rpn(rpn_pred, anchors)

        #   利用建议框获得classifier网络预测结果
        classifier_pred = self.model_classifier(
            [rpn_pred[2], rpn_results[:, :, [1, 0, 3, 2]]])
        classifier_pred = [x.numpy() for x in classifier_pred]
        #   利用classifier的预测结果对建议框进行解码,获得预测框
        results = self.bbox_util.detection_out_classifier(
            classifier_pred, rpn_results, image_shape, input_shape,

        #   如果没有检测到物体,则返回原图
        if len(results[0]) <= 0:

        top_label = np.array(results[0][:, 5], dtype='int32')
        top_conf = results[0][:, 4]
        top_boxes = results[0][:, :4]

        for i, c in list(enumerate(top_label)):
            predicted_class = self.class_names[int(c)]
            box = top_boxes[i]
            score = str(top_conf[i])

            top, left, bottom, right = box

            if predicted_class not in class_names:

            f.write("%s %s %s %s %s %s\n" %
                    (predicted_class, score[:6], str(int(left)), str(
                        int(top)), str(int(right)), str(int(bottom))))

Ejemplo n.º 22
    def detect_image(self, image):
        image_shape = np.array(np.shape(image)[0:2])
        #   在这里将图像转换成RGB图像,防止灰度图在预测时报错。
        #   代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB
        image = cvtColor(image)
        image_origin = np.array(image, np.uint8)
        #   给图像增加灰条,实现不失真的resize
        #   也可以直接resize进行识别
        image_data, image_metas, windows = resize_image([np.array(image)],
        #   根据当前输入图像的大小,生成先验框
        anchors = np.expand_dims(get_anchors(self.config, image_data[0].shape),
        #   将图像输入网络当中进行预测!
        detections, _, _, mrcnn_mask, _, _, _ = self.model.predict(
            [image_data, image_metas, anchors], verbose=0)

        #   上面获得的预测结果是相对于padding后的图片的
        #   我们需要将预测结果转换到原图上
        box_thre, class_thre, class_ids, masks_arg, masks_sigmoid = postprocess(
            detections[0], mrcnn_mask[0], image_shape, image_data[0].shape,

        if box_thre is None:
            return image

        #   masks_class [image_shape[0], image_shape[1]]
        #   根据每个像素点所属的实例和是否满足门限需求,判断每个像素点的种类
        masks_class = masks_sigmoid * (class_ids[None, None, :] + 1)
        masks_class = np.reshape(masks_class,
                                 [-1, np.shape(masks_sigmoid)[-1]])
        masks_class = np.reshape(
                        np.reshape(masks_arg, [-1])],
            [image_shape[0], image_shape[1]])

        #   设置字体与边框厚度
        scale = 0.6
        thickness = int(
            max((image.size[0] + image.size[1]) // self.IMAGE_MAX_DIM, 1))
        font = cv2.FONT_HERSHEY_DUPLEX
        color_masks = self.colors[masks_class].astype('uint8')
        image_fused = cv2.addWeighted(color_masks,
        for i in range(np.shape(class_ids)[0]):
            top, left, bottom, right = np.array(box_thre[i, :], np.int32)

            #   获取颜色并绘制预测框
            color = self.colors[class_ids[i] + 1].tolist()
            cv2.rectangle(image_fused, (left, top), (right, bottom), color,

            #   获得这个框的种类并写在图片上
            class_name = self.class_names[class_ids[i]]
            print(class_name, top, left, bottom, right)
            text_str = f'{class_name}: {class_thre[i]:.2f}'
            text_w, text_h = cv2.getTextSize(text_str, font, scale, 1)[0]
            cv2.rectangle(image_fused, (left, top),
                          (left + text_w, top + text_h + 5), color, -1)
            cv2.putText(image_fused, text_str, (left, top + 15), font, scale,
                        (255, 255, 255), 1, cv2.LINE_AA)

        image = Image.fromarray(np.uint8(image_fused))
        return image
Ejemplo n.º 23
    def detect_image(self, image):
        #   计算输入图片的高和宽
        image_shape = np.array(np.shape(image)[0:2])
        #   在这里将图像转换成RGB图像,防止灰度图在预测时报错。
        #   代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB
        image = cvtColor(image)
        #   给图像增加灰条,实现不失真的resize
        #   也可以直接resize进行识别
        image_data = resize_image(image,
                                  (self.input_shape[1], self.input_shape[0]),
        #   添加上batch_size维度
        image_data = np.expand_dims(
                preprocess_input(np.array(image_data, dtype='float32')),
                (2, 0, 1)), 0)

        with torch.no_grad():
            images = torch.from_numpy(image_data)
            if self.cuda:
                images = images.cuda()
            #   将图像输入网络当中进行预测!
            outputs = self.net(images)
            outputs = self.bbox_util.decode_box(outputs)
            #   将预测框进行堆叠,然后进行非极大抑制
            results = self.bbox_util.non_max_suppression(
                torch.cat(outputs, 1),

            if results[0] is None:
                return image

            top_label = np.array(results[0][:, 6], dtype='int32')
            top_conf = results[0][:, 4] * results[0][:, 5]
            top_boxes = results[0][:, :4]
        #   设置字体与边框厚度
        font = ImageFont.truetype(font='model_data/simhei.ttf',
                                  size=np.floor(3e-2 * image.size[1] +
        thickness = int(
            max((image.size[0] + image.size[1]) // np.mean(self.input_shape),

        #   图像绘制
        for i, c in list(enumerate(top_label)):
            predicted_class = self.class_names[int(c)]
            box = top_boxes[i]
            score = top_conf[i]

            top, left, bottom, right = box

            top = max(0, np.floor(top).astype('int32'))
            left = max(0, np.floor(left).astype('int32'))
            bottom = min(image.size[1], np.floor(bottom).astype('int32'))
            right = min(image.size[0], np.floor(right).astype('int32'))

            label = '{} {:.2f}'.format(predicted_class, score)
            draw = ImageDraw.Draw(image)
            label_size = draw.textsize(label, font)
            label = label.encode('utf-8')
            print(label, top, left, bottom, right)

            if top - label_size[1] >= 0:
                text_origin = np.array([left, top - label_size[1]])
                text_origin = np.array([left, top + 1])

            for i in range(thickness):
                draw.rectangle([left + i, top + i, right - i, bottom - i],
                 tuple(text_origin + label_size)],
                      str(label, 'UTF-8'),
                      fill=(0, 0, 0),
            del draw

        return image
Ejemplo n.º 24
    def detect_image(self, image):
        #   在这里将图像转换成RGB图像,防止灰度图在预测时报错。
        #   代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB
        image = cvtColor(image)
        #   对输入图像进行一个备份,后面用于绘图
        old_img = copy.deepcopy(image)
        orininal_h = np.array(image).shape[0]
        orininal_w = np.array(image).shape[1]
        #   给图像增加灰条,实现不失真的resize
        image_data, nw, nh = resize_image(
            image, (self.input_shape[1], self.input_shape[0]))
        #   归一化+添加上batch_size维度
        image_data = np.expand_dims(
            preprocess_input(np.array(image_data, np.float32)), 0)

        #   图片传入网络进行预测
        pr = self.model.predict(image_data)[0]
        #   将灰条部分截取掉
        pr = pr[int((self.input_shape[0] - nh) // 2) : int((self.input_shape[0] - nh) // 2 + nh), \
                int((self.input_shape[1] - nw) // 2) : int((self.input_shape[1] - nw) // 2 + nw)]
        #   进行图片的resize
        pr = cv2.resize(pr, (orininal_w, orininal_h),
        #   取出每一个像素点的种类
        pr = pr.argmax(axis=-1)

        if self.mix_type == 0:
            # seg_img = np.zeros((np.shape(pr)[0], np.shape(pr)[1], 3))
            # for c in range(self.num_classes):
            #     seg_img[:, :, 0] += ((pr[:, :] == c ) * self.colors[c][0]).astype('uint8')
            #     seg_img[:, :, 1] += ((pr[:, :] == c ) * self.colors[c][1]).astype('uint8')
            #     seg_img[:, :, 2] += ((pr[:, :] == c ) * self.colors[c][2]).astype('uint8')
            seg_img = np.reshape(
                np.array(self.colors, np.uint8)[np.reshape(pr, [-1])],
                [orininal_h, orininal_w, -1])
            #   将新图片转换成Image的形式
            image = Image.fromarray(np.uint8(seg_img))
            #   将新图与原图及进行混合
            image = Image.blend(old_img, image, 0.7)

        elif self.mix_type == 1:
            # seg_img = np.zeros((np.shape(pr)[0], np.shape(pr)[1], 3))
            # for c in range(self.num_classes):
            #     seg_img[:, :, 0] += ((pr[:, :] == c ) * self.colors[c][0]).astype('uint8')
            #     seg_img[:, :, 1] += ((pr[:, :] == c ) * self.colors[c][1]).astype('uint8')
            #     seg_img[:, :, 2] += ((pr[:, :] == c ) * self.colors[c][2]).astype('uint8')
            seg_img = np.reshape(
                np.array(self.colors, np.uint8)[np.reshape(pr, [-1])],
                [orininal_h, orininal_w, -1])
            #   将新图片转换成Image的形式
            image = Image.fromarray(np.uint8(seg_img))

        elif self.mix_type == 2:
            seg_img = (np.expand_dims(pr != 0, -1) *
                       np.array(old_img, np.float32)).astype('uint8')
            #   将新图片转换成Image的形式
            image = Image.fromarray(np.uint8(seg_img))

        return image
Ejemplo n.º 25
    def detect_image(self, image, crop=False, count=False):
        #   获得输入图片的高和宽
        image_shape = np.array(np.shape(image)[0:2])
        #   在这里将图像转换成RGB图像,防止灰度图在预测时报错。
        #   代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB
        image = cvtColor(image)
        #   给图像增加灰条,实现不失真的resize
        #   也可以直接resize进行识别
        image_data = resize_image(image,
                                  (self.input_shape[1], self.input_shape[0]),
        #   添加上batch_size维度,图片预处理,归一化。
        image_data = np.expand_dims(
            preprocess_input(np.array(image_data, dtype='float32')), 0)

        outputs = self.centernet.predict(image_data)
        #   centernet后处理的过程,包括门限判断和传统非极大抑制。
        #   对于centernet网络来讲,确立中心非常重要。对于大目标而言,会存在许多的局部信息。
        #   此时大目标中心点比较难以确定。使用最大池化的非极大抑制方法无法去除局部框
        #   这里面存在传统的nms处理方法,可以选择关闭和开启。
        #   实际测试中,hourglass为主干网络时有无额外的nms相差不大,resnet相差较大。
        results = self.bbox_util.postprocess(outputs,

        #   如果没有检测到物体,则返回原图
        if results[0] is None:
            return image

        top_label = np.array(results[0][:, 5], dtype='int32')
        top_conf = results[0][:, 4]
        top_boxes = results[0][:, :4]

        #   设置字体与边框厚度
        font = ImageFont.truetype(font='model_data/simhei.ttf',
                                  size=np.floor(3e-2 * np.shape(image)[1] +
        thickness = max(
            (np.shape(image)[0] + np.shape(image)[1]) // self.input_shape[0],
        #   计数
        if count:
            print("top_label:", top_label)
            classes_nums = np.zeros([self.num_classes])
            for i in range(self.num_classes):
                num = np.sum(top_label == i)
                if num > 0:
                    print(self.class_names[i], " : ", num)
                classes_nums[i] = num
            print("classes_nums:", classes_nums)
        #   是否进行目标的裁剪
        if crop:
            for i, c in list(enumerate(top_label)):
                top, left, bottom, right = top_boxes[i]
                top = max(0, np.floor(top).astype('int32'))
                left = max(0, np.floor(left).astype('int32'))
                bottom = min(image.size[1], np.floor(bottom).astype('int32'))
                right = min(image.size[0], np.floor(right).astype('int32'))

                dir_save_path = "img_crop"
                if not os.path.exists(dir_save_path):
                crop_image = image.crop([left, top, right, bottom])
                                             "crop_" + str(i) + ".png"),
                print("save crop_" + str(i) + ".png to " + dir_save_path)
        #   图像绘制
        for i, c in list(enumerate(top_label)):
            predicted_class = self.class_names[int(c)]
            box = top_boxes[i]
            score = top_conf[i]

            top, left, bottom, right = box

            top = max(0, np.floor(top).astype('int32'))
            left = max(0, np.floor(left).astype('int32'))
            bottom = min(image.size[1], np.floor(bottom).astype('int32'))
            right = min(image.size[0], np.floor(right).astype('int32'))

            label = '{} {:.2f}'.format(predicted_class, score)
            draw = ImageDraw.Draw(image)
            label_size = draw.textsize(label, font)
            label = label.encode('utf-8')
            print(label, top, left, bottom, right)

            if top - label_size[1] >= 0:
                text_origin = np.array([left, top - label_size[1]])
                text_origin = np.array([left, top + 1])

            for i in range(thickness):
                draw.rectangle([left + i, top + i, right - i, bottom - i],
                 tuple(text_origin + label_size)],
                      str(label, 'UTF-8'),
                      fill=(0, 0, 0),
            del draw

        return image
Ejemplo n.º 26
    def detect_image(self, image, crop = False, count = False):
        image_shape = np.array(np.shape(image)[0:2])
        #   在这里将图像转换成RGB图像,防止灰度图在预测时报错。
        #   代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB
        image       = cvtColor(image)
        #   给图像增加灰条,实现不失真的resize
        #   也可以直接resize进行识别
        image_data  = resize_image(image, (self.input_shape[1], self.input_shape[0]), self.letterbox_image)
        #   添加上batch_size维度,图片预处理,归一化。
        image_data  = preprocess_input(np.expand_dims(np.array(image_data, dtype='float32'), 0))

        preds       = self.get_pred(image_data).numpy()
        #   将预测结果进行解码
        results     = self.bbox_util.decode_box(preds, self.anchors, image_shape, 
                                                self.input_shape, self.letterbox_image, confidence=self.confidence)
        #   如果没有检测到物体,则返回原图
        if len(results[0])<=0:
            return image

        top_label   = np.array(results[0][:, 4], dtype = 'int32')
        top_conf    = results[0][:, 5]
        top_boxes   = results[0][:, :4]
        #   设置字体与边框厚度
        font = ImageFont.truetype(font='model_data/simhei.ttf', size=np.floor(3e-2 * np.shape(image)[1] + 0.5).astype('int32'))
        thickness = max((np.shape(image)[0] + np.shape(image)[1]) // self.input_shape[0], 1)
        #   计数
        if count:
            print("top_label:", top_label)
            classes_nums    = np.zeros([self.num_classes])
            for i in range(self.num_classes):
                num = np.sum(top_label == i)
                if num > 0:
                    print(self.class_names[i], " : ", num)
                classes_nums[i] = num
            print("classes_nums:", classes_nums)
        #   是否进行目标的裁剪
        if crop:
            for i, c in list(enumerate(top_boxes)):
                top, left, bottom, right = top_boxes[i]
                top     = max(0, np.floor(top).astype('int32'))
                left    = max(0, np.floor(left).astype('int32'))
                bottom  = min(image.size[1], np.floor(bottom).astype('int32'))
                right   = min(image.size[0], np.floor(right).astype('int32'))
                dir_save_path = "img_crop"
                if not os.path.exists(dir_save_path):
                crop_image = image.crop([left, top, right, bottom])
                crop_image.save(os.path.join(dir_save_path, "crop_" + str(i) + ".png"), quality=95, subsampling=0)
                print("save crop_" + str(i) + ".png to " + dir_save_path)
        #   图像绘制
        for i, c in list(enumerate(top_label)):
            predicted_class = self.class_names[int(c)]
            box             = top_boxes[i]
            score           = top_conf[i]

            top, left, bottom, right = box

            top     = max(0, np.floor(top).astype('int32'))
            left    = max(0, np.floor(left).astype('int32'))
            bottom  = min(image.size[1], np.floor(bottom).astype('int32'))
            right   = min(image.size[0], np.floor(right).astype('int32'))

            label = '{} {:.2f}'.format(predicted_class, score)
            draw = ImageDraw.Draw(image)
            label_size = draw.textsize(label, font)
            label = label.encode('utf-8')
            print(label, top, left, bottom, right)
            if top - label_size[1] >= 0:
                text_origin = np.array([left, top - label_size[1]])
                text_origin = np.array([left, top + 1])

            for i in range(thickness):
                draw.rectangle([left + i, top + i, right - i, bottom - i], outline=self.colors[c])
            draw.rectangle([tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[c])
            draw.text(text_origin, str(label,'UTF-8'), fill=(0, 0, 0), font=font)
            del draw

        return image
Ejemplo n.º 27
    def detect_image(self, image, crop = False, count = False):
        #   在这里将图像转换成RGB图像,防止灰度图在预测时报错。
        #   代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB
        image       = cvtColor(image)
        #   给图像增加灰条,实现不失真的resize
        #   也可以直接resize进行识别
        image_data  = resize_image(image, (self.input_shape[1], self.input_shape[0]), self.letterbox_image)
        #   添加上batch_size维度,并进行归一化
        image_data  = np.expand_dims(preprocess_input(np.array(image_data, dtype='float32')), 0)

        #   将图像输入网络当中进行预测!
        out_boxes, out_scores, out_classes = self.sess.run(
            [self.boxes, self.scores, self.classes],
                self.yolo_model.input: image_data,
                self.input_image_shape: [image.size[1], image.size[0]],
                K.learning_phase(): 0})

        print('Found {} boxes for {}'.format(len(out_boxes), 'img'))
        #   设置字体与边框厚度
        font        = ImageFont.truetype(font='model_data/simhei.ttf', size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32'))
        thickness   = int(max((image.size[0] + image.size[1]) // np.mean(self.input_shape), 1))
        #   计数
        if count:
            print("top_label:", out_classes)
            classes_nums    = np.zeros([self.num_classes])
            for i in range(self.num_classes):
                num = np.sum(out_classes == i)
                if num > 0:
                    print(self.class_names[i], " : ", num)
                classes_nums[i] = num
            print("classes_nums:", classes_nums)
        #   是否进行目标的裁剪
        if crop:
            for i, c in list(enumerate(out_boxes)):
                top, left, bottom, right = out_boxes[i]
                top     = max(0, np.floor(top).astype('int32'))
                left    = max(0, np.floor(left).astype('int32'))
                bottom  = min(image.size[1], np.floor(bottom).astype('int32'))
                right   = min(image.size[0], np.floor(right).astype('int32'))
                dir_save_path = "img_crop"
                if not os.path.exists(dir_save_path):
                crop_image = image.crop([left, top, right, bottom])
                crop_image.save(os.path.join(dir_save_path, "crop_" + str(i) + ".png"), quality=95, subsampling=0)
                print("save crop_" + str(i) + ".png to " + dir_save_path)
        #   图像绘制
        for i, c in list(enumerate(out_classes)):
            predicted_class = self.class_names[int(c)]
            box             = out_boxes[i]
            score           = out_scores[i]

            top, left, bottom, right = box

            top     = max(0, np.floor(top).astype('int32'))
            left    = max(0, np.floor(left).astype('int32'))
            bottom  = min(image.size[1], np.floor(bottom).astype('int32'))
            right   = min(image.size[0], np.floor(right).astype('int32'))

            label = '{} {:.2f}'.format(predicted_class, score)
            draw = ImageDraw.Draw(image)
            label_size = draw.textsize(label, font)
            label = label.encode('utf-8')
            print(label, top, left, bottom, right)
            if top - label_size[1] >= 0:
                text_origin = np.array([left, top - label_size[1]])
                text_origin = np.array([left, top + 1])

            for i in range(thickness):
                draw.rectangle([left + i, top + i, right - i, bottom - i], outline=self.colors[c])
            draw.rectangle([tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[c])
            draw.text(text_origin, str(label,'UTF-8'), fill=(0, 0, 0), font=font)
            del draw

        return image
Ejemplo n.º 28
    def get_random_data(self,
        line = annotation_line.split()
        #   读取图像并转换成RGB图像
        image = Image.open(line[0])
        image = cvtColor(image)
        #   获得图像的高宽与目标高宽
        iw, ih = image.size
        h, w = input_shape
        #   获得预测框
        box = np.array(
            [np.array(list(map(int, box.split(',')))) for box in line[1:]])

        if not random:
            scale = min(w / iw, h / ih)
            nw = int(iw * scale)
            nh = int(ih * scale)
            dx = (w - nw) // 2
            dy = (h - nh) // 2

            #   将图像多余的部分加上灰条
            image = image.resize((nw, nh), Image.BICUBIC)
            new_image = Image.new('RGB', (w, h), (128, 128, 128))
            new_image.paste(image, (dx, dy))
            image_data = np.array(new_image, np.float32)

            #   对真实框进行调整
            if len(box) > 0:
                box[:, [0, 2]] = box[:, [0, 2]] * nw / iw + dx
                box[:, [1, 3]] = box[:, [1, 3]] * nh / ih + dy
                box[:, 0:2][box[:, 0:2] < 0] = 0
                box[:, 2][box[:, 2] > w] = w
                box[:, 3][box[:, 3] > h] = h
                box_w = box[:, 2] - box[:, 0]
                box_h = box[:, 3] - box[:, 1]
                box = box[np.logical_and(box_w > 1,
                                         box_h > 1)]  # discard invalid box

            return image_data, box

        #   对图像进行缩放并且进行长和宽的扭曲
        new_ar = w / h * self.rand(1 - jitter, 1 + jitter) / self.rand(
            1 - jitter, 1 + jitter)
        scale = self.rand(.25, 2)
        if new_ar < 1:
            nh = int(scale * h)
            nw = int(nh * new_ar)
            nw = int(scale * w)
            nh = int(nw / new_ar)
        image = image.resize((nw, nh), Image.BICUBIC)

        #   将图像多余的部分加上灰条
        dx = int(self.rand(0, w - nw))
        dy = int(self.rand(0, h - nh))
        new_image = Image.new('RGB', (w, h), (128, 128, 128))
        new_image.paste(image, (dx, dy))
        image = new_image

        #   翻转图像
        flip = self.rand() < .5
        if flip: image = image.transpose(Image.FLIP_LEFT_RIGHT)

        #   色域扭曲
        hue = self.rand(-hue, hue)
        sat = self.rand(1, sat) if self.rand() < .5 else 1 / self.rand(1, sat)
        val = self.rand(1, val) if self.rand() < .5 else 1 / self.rand(1, val)
        x = cv2.cvtColor(np.array(image, np.float32) / 255, cv2.COLOR_RGB2HSV)
        x[..., 0] += hue * 360
        x[..., 0][x[..., 0] > 1] -= 1
        x[..., 0][x[..., 0] < 0] += 1
        x[..., 1] *= sat
        x[..., 2] *= val
        x[x[:, :, 0] > 360, 0] = 360
        x[:, :, 1:][x[:, :, 1:] > 1] = 1
        x[x < 0] = 0
        image_data = cv2.cvtColor(x, cv2.COLOR_HSV2RGB) * 255

        #   对真实框进行调整
        if len(box) > 0:
            box[:, [0, 2]] = box[:, [0, 2]] * nw / iw + dx
            box[:, [1, 3]] = box[:, [1, 3]] * nh / ih + dy
            if flip: box[:, [0, 2]] = w - box[:, [2, 0]]
            box[:, 0:2][box[:, 0:2] < 0] = 0
            box[:, 2][box[:, 2] > w] = w
            box[:, 3][box[:, 3] > h] = h
            box_w = box[:, 2] - box[:, 0]
            box_h = box[:, 3] - box[:, 1]
            box = box[np.logical_and(box_w > 1, box_h > 1)]

        return image_data, box
Ejemplo n.º 29
    def get_random_data(self, annotation_line, input_shape, jitter=.3, hue=.1, sat=0.7, val=0.4, random=True):
        line    = annotation_line.split()
        #   读取图像并转换成RGB图像
        image   = Image.open(line[0])
        image   = cvtColor(image)
        #   获得图像的高宽与目标高宽
        iw, ih  = image.size
        h, w    = input_shape
        #   获得预测框
        box     = np.array([np.array(list(map(int,box.split(',')))) for box in line[1:]])

        if not random:
            scale = min(w/iw, h/ih)
            nw = int(iw*scale)
            nh = int(ih*scale)
            dx = (w-nw)//2
            dy = (h-nh)//2

            #   将图像多余的部分加上灰条
            image       = image.resize((nw,nh), Image.BICUBIC)
            new_image   = Image.new('RGB', (w,h), (128,128,128))
            new_image.paste(image, (dx, dy))
            image_data  = np.array(new_image, np.float32)

            #   对真实框进行调整
            if len(box)>0:
                box[:, [0,2]] = box[:, [0,2]]*nw/iw + dx
                box[:, [1,3]] = box[:, [1,3]]*nh/ih + dy
                box[:, 0:2][box[:, 0:2]<0] = 0
                box[:, 2][box[:, 2]>w] = w
                box[:, 3][box[:, 3]>h] = h
                box_w = box[:, 2] - box[:, 0]
                box_h = box[:, 3] - box[:, 1]
                box = box[np.logical_and(box_w>1, box_h>1)] # discard invalid box

            return image_data, box
        #   对图像进行缩放并且进行长和宽的扭曲
        new_ar = iw/ih * self.rand(1-jitter,1+jitter) / self.rand(1-jitter,1+jitter)
        scale = self.rand(.25, 2)
        if new_ar < 1:
            nh = int(scale*h)
            nw = int(nh*new_ar)
            nw = int(scale*w)
            nh = int(nw/new_ar)
        image = image.resize((nw,nh), Image.BICUBIC)

        #   将图像多余的部分加上灰条
        dx = int(self.rand(0, w-nw))
        dy = int(self.rand(0, h-nh))
        new_image = Image.new('RGB', (w,h), (128,128,128))
        new_image.paste(image, (dx, dy))
        image = new_image

        #   翻转图像
        flip = self.rand()<.5
        if flip: image = image.transpose(Image.FLIP_LEFT_RIGHT)

        image_data      = np.array(image, np.uint8)
        #   对图像进行色域变换
        #   计算色域变换的参数
        r               = np.random.uniform(-1, 1, 3) * [hue, sat, val] + 1
        #   将图像转到HSV上
        hue, sat, val   = cv2.split(cv2.cvtColor(image_data, cv2.COLOR_RGB2HSV))
        dtype           = image_data.dtype
        #   应用变换
        x       = np.arange(0, 256, dtype=r.dtype)
        lut_hue = ((x * r[0]) % 180).astype(dtype)
        lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)
        lut_val = np.clip(x * r[2], 0, 255).astype(dtype)

        image_data = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val)))
        image_data = cv2.cvtColor(image_data, cv2.COLOR_HSV2RGB)

        #   对真实框进行调整
        if len(box)>0:
            box[:, [0,2]] = box[:, [0,2]]*nw/iw + dx
            box[:, [1,3]] = box[:, [1,3]]*nh/ih + dy
            if flip: box[:, [0,2]] = w - box[:, [2,0]]
            box[:, 0:2][box[:, 0:2]<0] = 0
            box[:, 2][box[:, 2]>w] = w
            box[:, 3][box[:, 3]>h] = h
            box_w = box[:, 2] - box[:, 0]
            box_h = box[:, 3] - box[:, 1]
            box = box[np.logical_and(box_w>1, box_h>1)] 
        return image_data, box
    def get_random_data_with_Mosaic(self,
        h, w = input_shape
        min_offset_x = self.rand(0.3, 0.7)
        min_offset_y = self.rand(0.3, 0.7)

        image_datas = []
        box_datas = []
        index = 0
        for line in annotation_line:
            #   每一行进行分割
            line_content = line.split()
            #   打开图片
            image = Image.open(line_content[0])
            image = cvtColor(image)

            #   图片的大小
            iw, ih = image.size
            #   保存框的位置
            box = np.array([
                np.array(list(map(int, box.split(','))))
                for box in line_content[1:]

            #   是否翻转图片
            flip = self.rand() < .5
            if flip and len(box) > 0:
                image = image.transpose(Image.FLIP_LEFT_RIGHT)
                box[:, [0, 2]] = iw - box[:, [2, 0]]

            #   对图像进行缩放并且进行长和宽的扭曲
            new_ar = iw / ih * self.rand(1 - jitter, 1 + jitter) / self.rand(
                1 - jitter, 1 + jitter)
            scale = self.rand(.4, 1)
            if new_ar < 1:
                nh = int(scale * h)
                nw = int(nh * new_ar)
                nw = int(scale * w)
                nh = int(nw / new_ar)
            image = image.resize((nw, nh), Image.BICUBIC)

            #   将图片进行放置,分别对应四张分割图片的位置
            if index == 0:
                dx = int(w * min_offset_x) - nw
                dy = int(h * min_offset_y) - nh
            elif index == 1:
                dx = int(w * min_offset_x) - nw
                dy = int(h * min_offset_y)
            elif index == 2:
                dx = int(w * min_offset_x)
                dy = int(h * min_offset_y)
            elif index == 3:
                dx = int(w * min_offset_x)
                dy = int(h * min_offset_y) - nh

            new_image = Image.new('RGB', (w, h), (128, 128, 128))
            new_image.paste(image, (dx, dy))
            image_data = np.array(new_image)

            index = index + 1
            box_data = []
            #   对box进行重新处理
            if len(box) > 0:
                box[:, [0, 2]] = box[:, [0, 2]] * nw / iw + dx
                box[:, [1, 3]] = box[:, [1, 3]] * nh / ih + dy
                box[:, 0:2][box[:, 0:2] < 0] = 0
                box[:, 2][box[:, 2] > w] = w
                box[:, 3][box[:, 3] > h] = h
                box_w = box[:, 2] - box[:, 0]
                box_h = box[:, 3] - box[:, 1]
                box = box[np.logical_and(box_w > 1, box_h > 1)]
                box_data = np.zeros((len(box), 5))
                box_data[:len(box)] = box


        #   将图片分割,放在一起
        cutx = int(w * min_offset_x)
        cuty = int(h * min_offset_y)

        new_image = np.zeros([h, w, 3])
        new_image[:cuty, :cutx, :] = image_datas[0][:cuty, :cutx, :]
        new_image[cuty:, :cutx, :] = image_datas[1][cuty:, :cutx, :]
        new_image[cuty:, cutx:, :] = image_datas[2][cuty:, cutx:, :]
        new_image[:cuty, cutx:, :] = image_datas[3][:cuty, cutx:, :]

        new_image = np.array(new_image, np.uint8)
        #   对图像进行色域变换
        #   计算色域变换的参数
        r = np.random.uniform(-1, 1, 3) * [hue, sat, val] + 1
        #   将图像转到HSV上
        hue, sat, val = cv2.split(cv2.cvtColor(new_image, cv2.COLOR_RGB2HSV))
        dtype = new_image.dtype
        #   应用变换
        x = np.arange(0, 256, dtype=r.dtype)
        lut_hue = ((x * r[0]) % 180).astype(dtype)
        lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)
        lut_val = np.clip(x * r[2], 0, 255).astype(dtype)

        new_image = cv2.merge(
            (cv2.LUT(hue, lut_hue), cv2.LUT(sat,
                                            lut_sat), cv2.LUT(val, lut_val)))
        new_image = cv2.cvtColor(new_image, cv2.COLOR_HSV2RGB)

        #   对框进行进一步的处理
        new_boxes = self.merge_bboxes(box_datas, cutx, cuty)

        return new_image, new_boxes