コード例 #1
0
    def image_test(self, path, inputs=None, oimg=None):
        bbox_util = BBoxUtility(2)
        if path != None:

            img = cv2.imread(path)
            images = img.copy()

            img = cv2.resize(img, (self.input_shape[0], self.input_shape[1]))

            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

            inputs = image.img_to_array(img)

            inputs = preprocess_input(np.array([inputs]))
        else:
            images = oimg.copy()
        preds = self.model.predict(inputs, batch_size=1, verbose=1)
        results = bbox_util.detection_out(preds)
        print(results)
        if len(results) > 0:
            final = []
            for each in results[0]:

                if each[1] < 0.4: continue
                xmin = int(each[2] * np.shape(images)[1])
                ymin = int(each[3] * np.shape(images)[0])
                xmax = int(each[4] * np.shape(images)[1])
                ymax = int(each[5] * np.shape(images)[0])
                final.append([xmin, ymin, xmax, ymax, each[1]])
            return final

        return None
コード例 #2
0
ファイル: frcnn.py プロジェクト: yanjingke/faster-rcnn-keras
 def __init__(self, **kwargs):
     self.__dict__.update(self._defaults)
     self.class_names = self._get_class()
     self.sess = K.get_session()
     self.config = Config()
     self.generate()
     self.bbox_util = BBoxUtility()
コード例 #3
0
 def __init__(self, **kwargs):
     self.__dict__.update(self._defaults)
     self.class_names = self._get_class()
     self.sess = K.get_session()
     self.generate()
     self.prior = self._get_prior()
     self.bbox_util = BBoxUtility(self.num_classes)
コード例 #4
0
 def __init__(self, **kwargs):
     self.__dict__.update(self._defaults)
     self.class_names = self._get_class()
     self.sess = K.get_session()
     self.config = Config()
     self.generate()
     self.bbox_util = BBoxUtility(classifier_nms=self.iou, top_k=self.config.num_RPN_predict_pre)
コード例 #5
0
ファイル: retinaface.py プロジェクト: ssunguotu/LeNet_face
 def __init__(self, **kwargs):
     self.__dict__.update(self._defaults)
     if self.backbone == "mobilenet":
         self.cfg = cfg_mnet
     else:
         self.cfg = cfg_re50
     self.bbox_util = BBoxUtility()
     self.generate()
コード例 #6
0
 def __init__(self, **kwargs):
     self.__dict__.update(self._defaults)
     self.class_names = self._get_class()
     self.model_image_size = [
         image_sizes[self.phi], image_sizes[self.phi], 3
     ]
     self.generate()
     self.bbox_util = BBoxUtility(self.num_classes, nms_thresh=self.iou)
     self.prior = self._get_prior()
コード例 #7
0
 def __init__(self, **kwargs):
     self.__dict__.update(self._defaults)
     if self.backbone == "mobilenet":
         self.cfg = cfg_mnet
     else:
         self.cfg = cfg_re50
     self.bbox_util = BBoxUtility(nms_thresh=self.nms_iou)
     self.generate()
     self.anchors = Anchors(self.cfg, image_size=(self.input_shape[0], self.input_shape[1])).get_anchors()
コード例 #8
0
    def __init__(self, **kwargs):
        self.__dict__.update(self._defaults)
        for name, value in kwargs.items():
            setattr(self, name, value)

        #---------------------------------------------------#
        #   不同主干网络的config信息
        #---------------------------------------------------#
        if self.backbone == "mobilenet":
            self.cfg = cfg_mnet
        else:
            self.cfg = cfg_re50

        #---------------------------------------------------#
        #   工具箱和先验框的生成
        #---------------------------------------------------#
        self.bbox_util = BBoxUtility(nms_thresh=self.nms_iou)
        self.anchors = Anchors(self.cfg,
                               image_size=(self.input_shape[0],
                                           self.input_shape[1])).get_anchors()
        self.generate()
コード例 #9
0
        callback.writer.add_summary(summary, batch_no)
        callback.writer.flush()


#----------------------------------------------------#
#   检测精度mAP和pr曲线计算参考视频
#   https://www.bilibili.com/video/BV1zE411u7Vw
#----------------------------------------------------#
if __name__ == "__main__":
    config = Config()
    NUM_CLASSES = 21
    # 训练100世代
    EPOCH = 100
    # 开始使用1e-4训练,每过10个世代降低为原来的1/2
    Learning_rate = 1e-4
    bbox_util = BBoxUtility(overlap_threshold=config.rpn_max_overlap,
                            ignore_threshold=config.rpn_min_overlap)
    annotation_path = '2007_train.txt'

    #------------------------------------------------------#
    #   权值文件请看README,百度网盘下载
    #   训练自己的数据集时提示维度不匹配正常
    #   预测的东西都不一样了自然维度不匹配
    #------------------------------------------------------#
    model_rpn, model_classifier, model_all = get_model(config, NUM_CLASSES)
    base_net_weights = "model_data/voc_weights.h5"

    model_all.summary()
    model_rpn.load_weights(base_net_weights, by_name=True)
    model_classifier.load_weights(base_net_weights, by_name=True)

    with open(annotation_path) as f:
コード例 #10
0
class SSD(object):
    _defaults = {
        "model_path": 'model_data/ssd_weights.h5',
        "classes_path": 'model_data/voc_classes.txt',
        "model_image_size": (300, 300, 3),
        "confidence": 0.5,
    }

    @classmethod
    def get_defaults(cls, n):
        if n in cls._defaults:
            return cls._defaults[n]
        else:
            return "Unrecognized attribute name '" + n + "'"

    #---------------------------------------------------#
    #   初始化ssd
    #---------------------------------------------------#
    def __init__(self, **kwargs):
        self.__dict__.update(self._defaults)
        self.class_names = self._get_class()
        self.sess = K.get_session()
        self.generate()
        self.bbox_util = BBoxUtility(self.num_classes)

    #---------------------------------------------------#
    #   获得所有的分类
    #---------------------------------------------------#
    def _get_class(self):
        classes_path = os.path.expanduser(self.classes_path)
        with open(classes_path) as f:
            class_names = f.readlines()
        class_names = [c.strip() for c in class_names]
        return class_names

    #---------------------------------------------------#
    #   获得所有的分类
    #---------------------------------------------------#
    def generate(self):
        model_path = os.path.expanduser(self.model_path)
        assert model_path.endswith(
            '.h5'), 'Keras model or weights must be a .h5 file.'

        # 计算总的种类
        self.num_classes = len(self.class_names) + 1

        # 载入模型
        self.ssd_model = ssd.SSD300(self.model_image_size, self.num_classes)
        self.ssd_model.load_weights(self.model_path, by_name=True)

        self.ssd_model.summary()
        print('{} model, anchors, and classes loaded.'.format(model_path))

        # 画框设置不同的颜色
        hsv_tuples = [(x / len(self.class_names), 1., 1.)
                      for x in range(len(self.class_names))]
        self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
        self.colors = list(
            map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)),
                self.colors))

    #---------------------------------------------------#
    #   检测图片
    #---------------------------------------------------#
    def detect_image(self, image):
        image_shape = np.array(np.shape(image)[0:2])
        crop_img, x_offset, y_offset = letterbox_image(
            image, (self.model_image_size[0], self.model_image_size[1]))
        photo = np.array(crop_img, dtype=np.float64)

        # 图片预处理,归一化
        photo = preprocess_input(
            np.reshape(
                photo,
                [1, self.model_image_size[0], self.model_image_size[1], 3]))
        preds = self.ssd_model.predict(photo)

        # 将预测结果进行解码
        results = self.bbox_util.detection_out(
            preds, confidence_threshold=self.confidence)

        if len(results[0]) <= 0:
            return image

        # 筛选出其中得分高于confidence的框
        det_label = results[0][:, 0]
        det_conf = results[0][:, 1]
        det_xmin, det_ymin, det_xmax, det_ymax = results[0][:, 2], results[
            0][:, 3], results[0][:, 4], results[0][:, 5]
        top_indices = [
            i for i, conf in enumerate(det_conf) if conf >= self.confidence
        ]
        top_conf = det_conf[top_indices]
        top_label_indices = det_label[top_indices].tolist()
        top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(
            det_xmin[top_indices],
            -1), np.expand_dims(det_ymin[top_indices], -1), np.expand_dims(
                det_xmax[top_indices],
                -1), np.expand_dims(det_ymax[top_indices], -1)

        # 去掉灰条
        boxes = ssd_correct_boxes(
            top_ymin, top_xmin, top_ymax, top_xmax,
            np.array([self.model_image_size[0], self.model_image_size[1]]),
            image_shape)

        font = ImageFont.truetype(font='model_data/simhei.ttf',
                                  size=np.floor(3e-2 * np.shape(image)[1] +
                                                0.5).astype('int32'))

        thickness = (np.shape(image)[0] +
                     np.shape(image)[1]) // self.model_image_size[0]

        for i, c in enumerate(top_label_indices):
            predicted_class = self.class_names[int(c) - 1]
            score = top_conf[i]

            top, left, bottom, right = boxes[i]
            top = top - 5
            left = left - 5
            bottom = bottom + 5
            right = right + 5

            top = max(0, np.floor(top + 0.5).astype('int32'))
            left = max(0, np.floor(left + 0.5).astype('int32'))
            bottom = min(
                np.shape(image)[0],
                np.floor(bottom + 0.5).astype('int32'))
            right = min(
                np.shape(image)[1],
                np.floor(right + 0.5).astype('int32'))

            # 画框框
            label = '{} {:.2f}'.format(predicted_class, score)
            draw = ImageDraw.Draw(image)
            label_size = draw.textsize(label, font)
            label = label.encode('utf-8')
            print(label)

            if top - label_size[1] >= 0:
                text_origin = np.array([left, top - label_size[1]])
            else:
                text_origin = np.array([left, top + 1])

            for i in range(thickness):
                draw.rectangle([left + i, top + i, right - i, bottom - i],
                               outline=self.colors[int(c) - 1])
            draw.rectangle(
                [tuple(text_origin),
                 tuple(text_origin + label_size)],
                fill=self.colors[int(c) - 1])
            draw.text(text_origin,
                      str(label, 'UTF-8'),
                      fill=(0, 0, 0),
                      font=font)
            del draw
        return image

    def close_session(self):
        self.sess.close()
コード例 #11
0
class FRCNN(object):
    _defaults = {
        "model_path":
        '/home/gxt/study/faster-rcnn-keras-master/logs/Epoch100-Total_Loss0.8886-Val_Loss1.0822.h5',
        "classes_path": 'model_data/voc_classes.txt',
        "confidence": 0.5,
        "iou": 0.3
    }

    @classmethod
    def get_defaults(cls, n):
        if n in cls._defaults:
            return cls._defaults[n]
        else:
            return "Unrecognized attribute name '" + n + "'"

    #---------------------------------------------------#
    #   Initialize fast RCNN
    #---------------------------------------------------#
    def __init__(self, **kwargs):
        self.__dict__.update(self._defaults)
        self.class_names = self._get_class()
        self.sess = K.get_session()
        self.config = Config()
        self.generate()
        self.bbox_util = BBoxUtility(classifier_nms=self.iou,
                                     top_k=self.config.num_RPN_predict_pre)

    #---------------------------------------------------#
    #   Get all categories
    #---------------------------------------------------#
    def _get_class(self):
        classes_path = os.path.expanduser(self.classes_path)
        with open(classes_path) as f:
            class_names = f.readlines()
        class_names = [c.strip() for c in class_names]
        return class_names

    #---------------------------------------------------#
    #   Load model
    #---------------------------------------------------#
    def generate(self):
        model_path = os.path.expanduser(self.model_path)
        assert model_path.endswith(
            '.h5'), 'Keras model or weights must be a .h5 file.'

        #-------------------------------#
        #   Calculate the total number of classes
        #-------------------------------#
        self.num_classes = len(self.class_names) + 1

        #-------------------------------#
        #   Loading model and weight
        #-------------------------------#
        self.model_rpn, self.model_classifier = frcnn.get_predict_model(
            self.config, self.num_classes)
        self.model_rpn.load_weights(self.model_path, by_name=True)
        self.model_classifier.load_weights(self.model_path, by_name=True)

        print('{} model, anchors, and classes loaded.'.format(model_path))

        # Set different colors for the frame
        hsv_tuples = [(x / len(self.class_names), 1., 1.)
                      for x in range(len(self.class_names))]
        self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
        self.colors = list(
            map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)),
                self.colors))

    #---------------------------------------------------#
    #   Used to calculate the size of shared feature layers
    #---------------------------------------------------#
    def get_img_output_length(self, width, height):
        def get_output_length(input_length):
            filter_sizes = [7, 3, 1, 1]
            padding = [3, 1, 0, 0]
            stride = 2
            for i in range(4):
                # input_length = (input_length - filter_size + stride) // stride
                input_length = (input_length + 2 * padding[i] -
                                filter_sizes[i]) // stride + 1
            return input_length

        return get_output_length(width), get_output_length(height)

    #---------------------------------------------------#
    #   Detect images
    #---------------------------------------------------#
    def detect_image(self, image):
        image_shape = np.array(np.shape(image)[0:2])
        old_width, old_height = image_shape[1], image_shape[0]
        old_image = copy.deepcopy(image)

        #---------------------------------------------------------#
        #   Reset the original image to the size of 600 short edges
        #---------------------------------------------------------#
        width, height = get_new_img_size(old_width, old_height)
        image = image.resize([width, height], Image.BICUBIC)
        photo = np.array(image, dtype=np.float64)

        #-----------------------------------------------------------#
        #   Image preprocessing and normalization.
        #-----------------------------------------------------------#
        photo = preprocess_input(np.expand_dims(photo, 0))
        rpn_pred = self.model_rpn.predict(photo)

        #-----------------------------------------------------------#
        #   The prediction result of the suggestion box network is decoded
        #-----------------------------------------------------------#
        base_feature_width, base_feature_height = self.get_img_output_length(
            width, height)
        anchors = get_anchors([base_feature_width, base_feature_height], width,
                              height)
        rpn_results = self.bbox_util.detection_out_rpn(rpn_pred, anchors)

        #-------------------------------------------------------------#
        #   After obtaining the suggestion box and the shared feature layer, they are passed into the classifier for prediction
        #-------------------------------------------------------------#
        base_layer = rpn_pred[2]
        proposal_box = np.array(rpn_results)[:, :, 1:]
        temp_ROIs = np.zeros_like(proposal_box)
        temp_ROIs[:, :, [0, 1, 2, 3]] = proposal_box[:, :, [1, 0, 3, 2]]
        classifier_pred = self.model_classifier.predict(
            [base_layer, temp_ROIs])

        #-------------------------------------------------------------#
        #   The prediction frame is obtained by decoding the suggestion box by using the prediction results of classifier
        #-------------------------------------------------------------#
        results = self.bbox_util.detection_out_classifier(
            classifier_pred, proposal_box, self.config, self.confidence)

        if len(results[0]) == 0:
            return old_image

        results = np.array(results[0])
        boxes = results[:, :4]
        top_conf = results[:, 4]
        top_label_indices = results[:, 5]
        boxes[:, [0, 2]] = boxes[:, [0, 2]] * old_width
        boxes[:, [1, 3]] = boxes[:, [1, 3]] * old_height

        font = ImageFont.truetype(font='model_data/simhei.ttf',
                                  size=np.floor(3e-2 * np.shape(image)[1] +
                                                0.5).astype('int32'))

        thickness = max(
            (np.shape(old_image)[0] + np.shape(old_image)[1]) // old_width * 2,
            1)

        image = old_image
        for i, c in enumerate(top_label_indices):
            predicted_class = self.class_names[int(c)]
            score = top_conf[i]

            left, top, right, bottom = boxes[i]
            top = top - 5
            left = left - 5
            bottom = bottom + 5
            right = right + 5

            top = max(0, np.floor(top + 0.5).astype('int32'))
            left = max(0, np.floor(left + 0.5).astype('int32'))
            bottom = min(
                np.shape(image)[0],
                np.floor(bottom + 0.5).astype('int32'))
            right = min(
                np.shape(image)[1],
                np.floor(right + 0.5).astype('int32'))

            # 画框框
            label = '{} {:.2f}'.format(predicted_class, score)
            draw = ImageDraw.Draw(image)
            label_size = draw.textsize(label, font)
            label = label.encode('utf-8')
            print(label, top, left, bottom, right)

            if top - label_size[1] >= 0:
                text_origin = np.array([left, top - label_size[1]])
            else:
                text_origin = np.array([left, top + 1])

            for i in range(thickness):
                draw.rectangle([left + i, top + i, right - i, bottom - i],
                               outline=self.colors[int(c)])
            draw.rectangle(
                [tuple(text_origin),
                 tuple(text_origin + label_size)],
                fill=self.colors[int(c)])
            draw.text(text_origin,
                      str(label, 'UTF-8'),
                      fill=(0, 0, 0),
                      font=font)
            del draw
        return image

    def close_session(self):
        self.sess.close()
コード例 #12
0
class Retinaface(object):
    #-------------------------------#
    #   请注意主干网络
    #   与预训练权重的对应
    #   即注意修改model_path
    #   和backbone
    #-------------------------------#
    _defaults = {
        "model_path": 'model_data/retinaface_mobilenet025.h5',
        "backbone": "mobilenet",
        "confidence": 0.5,
    }

    @classmethod
    def get_defaults(cls, n):
        if n in cls._defaults:
            return cls._defaults[n]
        else:
            return "Unrecognized attribute name '" + n + "'"

    #---------------------------------------------------#
    #   初始化Retinaface
    #---------------------------------------------------#
    def __init__(self, **kwargs):
        self.__dict__.update(self._defaults)
        if self.backbone == "mobilenet":
            self.cfg = cfg_mnet
        else:
            self.cfg = cfg_re50
        self.bbox_util = BBoxUtility()
        self.generate()

    #---------------------------------------------------#
    #   获得所有的分类
    #---------------------------------------------------#
    def generate(self):
        model_path = os.path.expanduser(self.model_path)
        assert model_path.endswith(
            '.h5'), 'Keras model or weights must be a .h5 file.'

        # 载入模型
        self.retinaface = RetinaFace(self.cfg, self.backbone)
        self.retinaface.load_weights(self.model_path, by_name=True)

    #---------------------------------------------------#
    #   检测图片
    #---------------------------------------------------#
    def detect_image(self, image):
        old_image = image.copy()

        image = np.array(image, np.float32)
        im_height, im_width, _ = np.shape(image)

        scale = [
            np.shape(image)[1],
            np.shape(image)[0],
            np.shape(image)[1],
            np.shape(image)[0]
        ]
        scale_for_landmarks = [
            np.shape(image)[1],
            np.shape(image)[0],
            np.shape(image)[1],
            np.shape(image)[0],
            np.shape(image)[1],
            np.shape(image)[0],
            np.shape(image)[1],
            np.shape(image)[0],
            np.shape(image)[1],
            np.shape(image)[0]
        ]

        # 图片预处理,归一化
        photo = np.expand_dims(preprocess_input(image), 0)
        anchors = Anchors(self.cfg,
                          image_size=(im_height, im_width)).get_anchors()

        preds = self.retinaface.predict(photo)
        # 将预测结果进行解码和非极大抑制
        results = self.bbox_util.detection_out(
            preds, anchors, confidence_threshold=self.confidence)

        if len(results) <= 0:
            return old_image
        results = np.array(results)
        results[:, :4] = results[:, :4] * scale
        results[:, 5:] = results[:, 5:] * scale_for_landmarks

        for b in results:
            text = "{:.4f}".format(b[4])
            b = list(map(int, b))
            cv2.rectangle(old_image, (b[0], b[1]), (b[2], b[3]), (0, 0, 255),
                          2)
            cx = b[0]
            cy = b[1] + 12
            cv2.putText(old_image, text, (cx, cy), cv2.FONT_HERSHEY_DUPLEX,
                        0.5, (255, 255, 255))

            # landms
            cv2.circle(old_image, (b[5], b[6]), 1, (0, 0, 255), 4)
            cv2.circle(old_image, (b[7], b[8]), 1, (0, 255, 255), 4)
            cv2.circle(old_image, (b[9], b[10]), 1, (255, 0, 255), 4)
            cv2.circle(old_image, (b[11], b[12]), 1, (0, 255, 0), 4)
            cv2.circle(old_image, (b[13], b[14]), 1, (255, 0, 0), 4)
        return old_image
コード例 #13
0
ファイル: ssd_predict.py プロジェクト: omar16100/SSD_tf_keras
    def __init__(self, weight_path=None):

        self.classes = config.CLASSES
        self.input_shape = config.IMAGE_SIZE
        self._load_weigth(weight_path=weight_path)
        self.bbox_util = BBoxUtility(len(self.classes))
コード例 #14
0
class SSD(object):
    _defaults = {
        "model_path":
        'logs/ep106-loss0.207-val_loss0.855.h5',  # 此路径可以修改为自己的模型目录
        "classes_path": 'model_data/voc_classes.txt',
        "model_image_size": (300, 300, 3),
        "confidence": 0.4,
    }

    @classmethod
    def get_defaults(cls, n):
        if n in cls._defaults:
            return cls._defaults[n]
        else:
            return "Unrecognized attribute name '" + n + "'"

    #---------------------------------------------------#
    #   初始化yolo
    #---------------------------------------------------#
    def __init__(self, **kwargs):
        self.__dict__.update(self._defaults)
        self.class_names = self._get_class()
        self.sess = get_session()
        self.generate()
        self.bbox_util = BBoxUtility(self.num_classes)

    #---------------------------------------------------#
    #   获得所有的分类
    #---------------------------------------------------#
    def _get_class(self):
        classes_path = os.path.expanduser(self.classes_path)
        with open(classes_path) as f:
            class_names = f.readlines()
        class_names = [c.strip() for c in class_names]
        return class_names

    #---------------------------------------------------#
    #   获得所有的分类
    #---------------------------------------------------#
    def generate(self):
        model_path = os.path.expanduser(self.model_path)
        assert model_path.endswith(
            '.h5'), 'Keras model or weights must be a .h5 file.'

        # 计算总的种类
        self.num_classes = len(self.class_names) + 1

        # 载入模型,如果原来的模型里已经包括了模型结构则直接载入。
        # 否则先构建模型再载入
        try:
            self.ssd_model = tf.keras.models.load_model(model_path,
                                                        compile=False)
        except:
            self.ssd_model = ssd.SSD300(self.model_image_size,
                                        self.num_classes)
            self.ssd_model.load_weights(self.model_path, by_name=True)
        else:
            num_anchors = 8753
            assert self.ssd_model.layers[-1].output_shape[-1] == \
                num_anchors/len(self.ssd_model.output) * (self.num_classes + 5), \
                'Mismatch between model and given anchor and class sizes'

        #self.ssd_model.summary()
        print('{} model, anchors, and classes loaded.'.format(model_path))

        # 画框设置不同的颜色
        hsv_tuples = [(x / len(self.class_names), 1., 1.)
                      for x in range(len(self.class_names))]
        self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
        self.colors = list(
            map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)),
                self.colors))

    #---------------------------------------------------#
    #   检测图片
    #---------------------------------------------------#
    def detect_image(self, image):
        image_shape = np.array(np.shape(image)[0:2])
        crop_img, x_offset, y_offset = letterbox_image(image, (300, 300))
        photo = np.array(crop_img, dtype=np.float64)
        self.predict_all = []
        # 图片预处理,归一化
        photo = tf.keras.applications.imagenet_utils.preprocess_input(
            np.reshape(photo, [1, 300, 300, 3]))
        #self.ssd_model.summary()
        preds = self.ssd_model.predict(photo)

        # 将预测结果进行解码
        results = self.bbox_util.detection_out(preds)
        if len(results[0]) <= 0:
            return image

        # 筛选出其中得分高于confidence的框
        det_label = results[0][:, 0]
        det_conf = results[0][:, 1]
        det_xmin, det_ymin, det_xmax, det_ymax = results[0][:, 2], results[
            0][:, 3], results[0][:, 4], results[0][:, 5]
        top_indices = [
            i for i, conf in enumerate(det_conf) if conf >= self.confidence
        ]
        top_conf = det_conf[top_indices]
        top_label_indices = det_label[top_indices].tolist()
        top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(
            det_xmin[top_indices],
            -1), np.expand_dims(det_ymin[top_indices], -1), np.expand_dims(
                det_xmax[top_indices],
                -1), np.expand_dims(det_ymax[top_indices], -1)

        boxes = ssd_correct_boxes(top_ymin, top_xmin, top_ymax, top_xmax,
                                  np.array([300, 300]), image_shape)

        font = ImageFont.truetype(font='model_data/simhei.ttf',
                                  size=np.floor(3e-2 * np.shape(image)[1] +
                                                0.5).astype('int32'))

        thickness = (np.shape(image)[0] + np.shape(image)[1]) // 300

        for i, c in enumerate(top_label_indices):
            predicted_class = self.class_names[int(c) - 1]
            score = top_conf[i]

            top, left, bottom, right = boxes[i]
            top = top - 5
            left = left - 5
            bottom = bottom + 5
            right = right + 5

            top = max(0, np.floor(top + 0.5).astype('int32'))
            left = max(0, np.floor(left + 0.5).astype('int32'))
            bottom = min(
                np.shape(image)[0],
                np.floor(bottom + 0.5).astype('int32'))
            right = min(
                np.shape(image)[1],
                np.floor(right + 0.5).astype('int32'))
            self.result_ = '{} {} {} {} {} {}'.format(
                "".join(predicted_class.split(" ")), score, left, top, right,
                bottom)
            self.predict_all.append(self.result_)
            # 画框框
            label = '{} {:.2f}'.format(predicted_class, score)
            draw = ImageDraw.Draw(image)
            label_size = draw.textsize(label, font)
            label = label.encode('utf-8')
            print(label)

            if top - label_size[1] >= 0:
                text_origin = np.array([left, top - label_size[1]])
            else:
                text_origin = np.array([left, top + 1])

            for i in range(thickness):
                draw.rectangle([left + i, top + i, right - i, bottom - i],
                               outline=255)
            draw.rectangle(
                [tuple(text_origin),
                 tuple(text_origin + label_size)],
                fill=255)
            draw.text(text_origin, str(label, 'UTF-8'), fill=0, font=font)
            del draw
        return image
コード例 #15
0
ファイル: frcnn.py プロジェクト: yanjingke/faster-rcnn-keras
class FRCNN(object):
    _defaults = {
        "model_path": 'model_data/voc_weights.h5',
        "classes_path": 'model_data/voc_classes.txt',
        "confidence": 0.5,
    }

    @classmethod
    def get_defaults(cls, n):
        if n in cls._defaults:
            return cls._defaults[n]
        else:
            return "Unrecognized attribute name '" + n + "'"

    #---------------------------------------------------#
    #   初始化faster RCNN
    #---------------------------------------------------#
    def __init__(self, **kwargs):
        self.__dict__.update(self._defaults)
        self.class_names = self._get_class()
        self.sess = K.get_session()
        self.config = Config()
        self.generate()
        self.bbox_util = BBoxUtility()

    #---------------------------------------------------#
    #   获得所有的分类
    #---------------------------------------------------#
    def _get_class(self):
        classes_path = os.path.expanduser(self.classes_path)
        with open(classes_path) as f:
            class_names = f.readlines()
        class_names = [c.strip() for c in class_names]
        return class_names

    #---------------------------------------------------#
    #   获得所有的分类
    #---------------------------------------------------#
    def generate(self):
        model_path = os.path.expanduser(self.model_path)
        assert model_path.endswith(
            '.h5'), 'Keras model or weights must be a .h5 file.'

        # 计算总的种类
        self.num_classes = len(self.class_names) + 1

        # 载入模型,如果原来的模型里已经包括了模型结构则直接载入。
        # 否则先构建模型再载入
        self.model_rpn, self.model_classifier = frcnn.get_predict_model(
            self.config, self.num_classes)
        self.model_rpn.load_weights(self.model_path, by_name=True)
        self.model_classifier.load_weights(self.model_path,
                                           by_name=True,
                                           skip_mismatch=True)

        print('{} model, anchors, and classes loaded.'.format(model_path))

        # 画框设置不同的颜色
        hsv_tuples = [(x / len(self.class_names), 1., 1.)
                      for x in range(len(self.class_names))]
        self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
        self.colors = list(
            map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)),
                self.colors))

    def get_img_output_length(self, width, height):
        def get_output_length(input_length):
            # input_length += 6
            filter_sizes = [7, 3, 1, 1]
            padding = [3, 1, 0, 0]
            stride = 2
            for i in range(4):
                # input_length = (input_length - filter_size + stride) // stride
                input_length = (input_length + 2 * padding[i] -
                                filter_sizes[i]) // stride + 1
            return input_length

        return get_output_length(width), get_output_length(height)

    #---------------------------------------------------#
    #   检测图片
    #---------------------------------------------------#
    def detect_image(self, image):
        image_shape = np.array(np.shape(image)[0:2])
        old_width = image_shape[1]
        old_height = image_shape[0]
        old_image = copy.deepcopy(image)
        width, height = get_new_img_size(old_width, old_height)

        image = image.resize([width, height])
        photo = np.array(image, dtype=np.float64)

        # 图片预处理,归一化
        photo = preprocess_input(np.expand_dims(photo, 0))
        preds = self.model_rpn.predict(photo)
        # 将预测结果进行解码
        anchors = get_anchors(self.get_img_output_length(width, height), width,
                              height)

        rpn_results = self.bbox_util.detection_out(preds,
                                                   anchors,
                                                   1,
                                                   confidence_threshold=0)
        R = rpn_results[0][:, 2:]

        R[:, 0] = np.array(np.round(R[:, 0] * width / self.config.rpn_stride),
                           dtype=np.int32)
        R[:, 1] = np.array(np.round(R[:, 1] * height / self.config.rpn_stride),
                           dtype=np.int32)
        R[:, 2] = np.array(np.round(R[:, 2] * width / self.config.rpn_stride),
                           dtype=np.int32)
        R[:, 3] = np.array(np.round(R[:, 3] * height / self.config.rpn_stride),
                           dtype=np.int32)

        R[:, 2] -= R[:, 0]
        R[:, 3] -= R[:, 1]
        base_layer = preds[2]

        delete_line = []
        for i, r in enumerate(R):
            if r[2] < 1 or r[3] < 1:
                delete_line.append(i)
        R = np.delete(R, delete_line, axis=0)

        bboxes = []
        probs = []
        labels = []
        for jk in range(R.shape[0] // self.config.num_rois + 1):
            ROIs = np.expand_dims(R[self.config.num_rois *
                                    jk:self.config.num_rois * (jk + 1), :],
                                  axis=0)

            if ROIs.shape[1] == 0:
                break

            if jk == R.shape[0] // self.config.num_rois:
                #pad R
                curr_shape = ROIs.shape
                target_shape = (curr_shape[0], self.config.num_rois,
                                curr_shape[2])
                ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype)
                ROIs_padded[:, :curr_shape[1], :] = ROIs
                ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :]
                ROIs = ROIs_padded

            [P_cls, P_regr] = self.model_classifier.predict([base_layer, ROIs])

            for ii in range(P_cls.shape[1]):
                if np.max(P_cls[0, ii, :-1]) < self.confidence:
                    continue

                label = np.argmax(P_cls[0, ii, :-1])

                (x, y, w, h) = ROIs[0, ii, :]

                cls_num = np.argmax(P_cls[0, ii, :-1])

                (tx, ty, tw, th) = P_regr[0, ii, 4 * cls_num:4 * (cls_num + 1)]
                tx /= self.config.classifier_regr_std[0]
                ty /= self.config.classifier_regr_std[1]
                tw /= self.config.classifier_regr_std[2]
                th /= self.config.classifier_regr_std[3]

                cx = x + w / 2.
                cy = y + h / 2.
                cx1 = tx * w + cx
                cy1 = ty * h + cy
                w1 = math.exp(tw) * w
                h1 = math.exp(th) * h

                x1 = cx1 - w1 / 2.
                y1 = cy1 - h1 / 2.

                x2 = cx1 + w1 / 2
                y2 = cy1 + h1 / 2

                x1 = int(round(x1))
                y1 = int(round(y1))
                x2 = int(round(x2))
                y2 = int(round(y2))

                bboxes.append([x1, y1, x2, y2])
                probs.append(np.max(P_cls[0, ii, :-1]))
                labels.append(label)

        if len(bboxes) == 0:
            return old_image

        # 筛选出其中得分高于confidence的框
        labels = np.array(labels)
        probs = np.array(probs)
        boxes = np.array(bboxes, dtype=np.float32)
        boxes[:, 0] = boxes[:, 0] * self.config.rpn_stride / width
        boxes[:, 1] = boxes[:, 1] * self.config.rpn_stride / height
        boxes[:, 2] = boxes[:, 2] * self.config.rpn_stride / width
        boxes[:, 3] = boxes[:, 3] * self.config.rpn_stride / height
        results = np.array(
            self.bbox_util.nms_for_out(np.array(labels), np.array(probs),
                                       np.array(boxes), self.num_classes - 1,
                                       0.4))

        top_label_indices = results[:, 0]
        top_conf = results[:, 1]
        boxes = results[:, 2:]
        boxes[:, 0] = boxes[:, 0] * old_width
        boxes[:, 1] = boxes[:, 1] * old_height
        boxes[:, 2] = boxes[:, 2] * old_width
        boxes[:, 3] = boxes[:, 3] * old_height

        font = ImageFont.truetype(font='model_data/simhei.ttf',
                                  size=np.floor(3e-2 * np.shape(image)[1] +
                                                0.5).astype('int32'))

        thickness = (np.shape(old_image)[0] +
                     np.shape(old_image)[1]) // old_width * 2
        image = old_image
        for i, c in enumerate(top_label_indices):
            predicted_class = self.class_names[int(c)]
            score = top_conf[i]

            left, top, right, bottom = boxes[i]
            top = top - 5
            left = left - 5
            bottom = bottom + 5
            right = right + 5

            top = max(0, np.floor(top + 0.5).astype('int32'))
            left = max(0, np.floor(left + 0.5).astype('int32'))
            bottom = min(
                np.shape(image)[0],
                np.floor(bottom + 0.5).astype('int32'))
            right = min(
                np.shape(image)[1],
                np.floor(right + 0.5).astype('int32'))

            # 画框框
            label = '{} {:.2f}'.format(predicted_class, score)
            draw = ImageDraw.Draw(image)
            label_size = draw.textsize(label, font)
            label = label.encode('utf-8')
            print(label)

            if top - label_size[1] >= 0:
                text_origin = np.array([left, top - label_size[1]])
            else:
                text_origin = np.array([left, top + 1])

            for i in range(thickness):
                draw.rectangle([left + i, top + i, right - i, bottom - i],
                               outline=self.colors[int(c)])
            draw.rectangle(
                [tuple(text_origin),
                 tuple(text_origin + label_size)],
                fill=self.colors[int(c)])
            draw.text(text_origin,
                      str(label, 'UTF-8'),
                      fill=(0, 0, 0),
                      font=font)
            del draw
        return image

    def close_session(self):
        self.sess.close()
コード例 #16
0
class Retinaface(object):
    _defaults = {
        "model_path": 'model_data/retinaface_mobilenet025.h5',
        "backbone": 'mobilenet',
        "confidence": 0.6,
        "nms_iou": 0.4,
        #----------------------------------------------------------------------#
        #   是否需要进行图像大小限制。
        #   开启后,会将输入图像的大小限制为input_shape。否则使用原图进行预测。
        #   keras代码中主干为mobilenet时存在小bug,当输入图像的宽高不为32的倍数
        #   会导致检测结果偏差,主干为resnet50不存在此问题。
        #   可根据输入图像的大小自行调整input_shape,注意为32的倍数,如[640, 640, 3]
        #----------------------------------------------------------------------#
        "input_shape": [1280, 1280, 3],
        "letterbox_image": False
    }

    @classmethod
    def get_defaults(cls, n):
        if n in cls._defaults:
            return cls._defaults[n]
        else:
            return "Unrecognized attribute name '" + n + "'"

    #---------------------------------------------------#
    #   初始化Retinaface
    #---------------------------------------------------#
    def __init__(self, **kwargs):
        self.__dict__.update(self._defaults)
        if self.backbone == "mobilenet":
            self.cfg = cfg_mnet
        else:
            self.cfg = cfg_re50
        self.bbox_util = BBoxUtility(nms_thresh=self.nms_iou)
        self.generate()
        self.anchors = Anchors(self.cfg,
                               image_size=(self.input_shape[0],
                                           self.input_shape[1])).get_anchors()

    def generate(self):
        model_path = os.path.expanduser(self.model_path)
        assert model_path.endswith(
            '.h5'), 'Keras model or weights must be a .h5 file.'

        self.retinaface = RetinaFace(self.cfg, self.backbone)
        self.retinaface.load_weights(self.model_path, by_name=True)
        print('{} model, anchors, and classes loaded.'.format(model_path))

    def detect_image(self, image):
        old_image = image.copy()

        image = np.array(image, np.float32)
        im_height, im_width, _ = np.shape(image)

        scale = [
            np.shape(image)[1],
            np.shape(image)[0],
            np.shape(image)[1],
            np.shape(image)[0]
        ]
        scale_for_landmarks = [
            np.shape(image)[1],
            np.shape(image)[0],
            np.shape(image)[1],
            np.shape(image)[0],
            np.shape(image)[1],
            np.shape(image)[0],
            np.shape(image)[1],
            np.shape(image)[0],
            np.shape(image)[1],
            np.shape(image)[0]
        ]

        if self.letterbox_image:
            image = letterbox_image(image,
                                    [self.input_shape[1], self.input_shape[0]])
        else:
            self.anchors = Anchors(self.cfg,
                                   image_size=(im_height,
                                               im_width)).get_anchors()

        photo = np.expand_dims(preprocess_input(image), 0)

        preds = self.retinaface.predict(photo)
        results = self.bbox_util.detection_out(
            preds, self.anchors, confidence_threshold=self.confidence)

        if len(results) <= 0:
            return old_image, []

        results = np.array(results)
        if self.letterbox_image:
            results = retinaface_correct_boxes(
                results, np.array([self.input_shape[0], self.input_shape[1]]),
                np.array([im_height, im_width]))

        results[:, :4] = results[:, :4] * scale
        results[:, 5:] = results[:, 5:] * scale_for_landmarks

        ans = []
        for b in results:
            confidence = b[4].astype(float)
            each_ans = {'box': [0, 0, 0, 0], 'confidence': 0, 'landmarks': []}
            text = "{:.4f}".format(b[4])
            b = list(map(int, b))

            each_ans['box'][0] = b[0]
            each_ans['box'][1] = b[1]
            each_ans['box'][2] = b[2]
            each_ans['box'][3] = b[3]
            each_ans['confidence'] = confidence

            cv2.rectangle(old_image, (b[0], b[1]), (b[2], b[3]), (0, 0, 255),
                          2)
            cx = b[0]
            cy = b[1] + 12
            cv2.putText(old_image, text, (cx, cy), cv2.FONT_HERSHEY_DUPLEX,
                        0.5, (255, 255, 255))

            print(b[0], b[1], b[2], b[3], b[4])
            cv2.circle(old_image, (b[5], b[6]), 1, (0, 0, 255), 4)
            cv2.circle(old_image, (b[7], b[8]), 1, (0, 255, 255), 4)
            cv2.circle(old_image, (b[9], b[10]), 1, (255, 0, 255), 4)
            cv2.circle(old_image, (b[11], b[12]), 1, (0, 255, 0), 4)
            cv2.circle(old_image, (b[13], b[14]), 1, (255, 0, 0), 4)
            landmarks = [
                (b[5], b[6]),
                (b[7], b[8]),
                (b[9], b[10]),
                (b[11], b[12]),
                (b[13], b[14]),
            ]
            each_ans['landmarks'] = landmarks
            ans.append(each_ans)
        return old_image, ans
コード例 #17
0
ファイル: ssd_predict.py プロジェクト: omar16100/SSD_tf_keras
class detector(object):

    def __init__(self, weight_path=None):

        self.classes = config.CLASSES
        self.input_shape = config.IMAGE_SIZE
        self._load_weigth(weight_path=weight_path)
        self.bbox_util = BBoxUtility(len(self.classes))
    
    def _load_weigth(self, weight_path=None):
        
        weight_path = os.path.expanduser(weight_path)
        assert weight_path.endswith('.h5'), 'Keras model or weights must be a .h5 file.'

        # load weigth file
        self.model = SSD300(config.IMAGE_SIZE, len(self.classes), anchors=config.ANCHORS_SIZE)
        self.model.load_weights(weight_path)

        # Set every class' color
        hsv_tuples = [(x / len(self.classes), 1., 1.) for x in range(len(self.classes))]
        self.colors = list(map(lambda x:colorsys.hsv_to_rgb(*x), hsv_tuples))
        self.colors = list(map(lambda x:(int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), self.colors))
    
    @tf.function
    def get_pred(self, photo):
        preds = self.model(photo, training=False)
        return preds
    
    # Detected Image
    def detect_image(self, image):

        image_shape = np.array(np.shape(image)[0:2])
        crop_image,x_offset,y_offset = letterbox_image(image, (self.input_shape[0], self.input_shape[1]))
        photo = np.array(crop_image, dtype=np.float64)

        # Normalization
        photo = preprocess_input(np.reshape(photo, [1, self.input_shape[0], self.input_shape[1], 3]))
        preds = self.get_pred(photo).numpy()

        # Decode
        results = self.bbox_util.detection_out(preds, confidence_threshold=config.CONFIDENCE)
        
        if len(results[0]) <= 0:
            return image
        
        det_label = results[0][:, 0]
        det_conf = results[0][:, 1]
        det_xmin, det_ymin, det_xmax, det_ymax = results[0][:,2], results[0][:, 3], results[0][:, 4], results[0][:, 5]
        top_indices = [i for i, conf in enumerate(det_conf) if conf >= config.CONFIDENCE]
        top_conf = det_conf[top_indices]
        top_label_indices = det_label[top_indices].tolist()
        top_xmin = np.expand_dims(det_xmin[top_indices], axis=-1)
        top_ymin = np.expand_dims(det_ymin[top_indices], axis=-1)
        top_xmax = np.expand_dims(det_xmax[top_indices], axis=-1)
        top_ymax = np.expand_dims(det_ymax[top_indices], axis=-1)

        boxes = ssd_correct_boxes(top_ymin, top_xmin, top_ymax, top_xmax, np.array((self.input_shape[0], self.input_shape[1])), image_shape)
        
        font = ImageFont.truetype(font='simhei.ttf',size=np.floor(3e-2 * np.shape(image)[1] + 0.5).astype('int32'))
        thickness = (np.shape(image)[0] + np.shape(image)[1]) // self.input_shape[0]
        
        for i, c in enumerate(top_label_indices):

            predicted_class = self.classes[int(c) - 1]
            score = top_conf[i]

            ymin, xmin, ymax, xmax = boxes[i]
            ymin = ymin - 5
            xmin = xmin - 5
            ymax = ymax - 5
            xmax = xmax - 5

            ymin = max(0, np.floor(ymin + 0.5).astype('int32'))
            xmin = max(0, np.floor(xmin + 0.5).astype('int32'))
            ymax = min(np.shape(image)[0], np.floor(ymax + 0.5).astype('int32'))
            xmax = min(np.shape(image)[1], np.floor(xmax + 0.5).astype('int32'))

            # draw Bounding box
            label = "{}:{:.2f}".format(predicted_class, score)
            print(label)
            draw = ImageDraw.Draw(image)
            label_size = draw.textsize(label, font)
            label = label.encode('utf-8')

            if ymin - label_size[1] >= 0:
                text_origin = np.array((xmin, ymin - label_size[1]))
            else:
                text_origin = np.array((xmin, ymin + 1))
            for i in range(thickness):
                draw.rectangle(
                    [xmin + i, ymin + i, xmax - i, ymax - i],
                    outline=self.colors[int(c)-1])
            draw.rectangle(
                [tuple(text_origin), tuple(text_origin + label_size)],
                fill=self.colors[int(c)-1])
            draw.text(text_origin, str(label, 'UTF-8'), fill=(0, 0, 0), font=font)
            del draw
        return image
コード例 #18
0
class ssdT(object):
    def __init__(self, model, classes, input_shape):
        self.classes = classes
        self.num_class = len(classes) + 1
        self.model = model
        self.input_shape = input_shape
        self.bbox_util = BBoxUtility(self.num_class)

    def image_test(self, path, inputs=None, oimg=None):
        bbox_util = BBoxUtility(2)
        if path != None:

            img = cv2.imread(path)
            images = img.copy()

            img = cv2.resize(img, (self.input_shape[0], self.input_shape[1]))

            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

            inputs = image.img_to_array(img)

            inputs = preprocess_input(np.array([inputs]))
        else:
            images = oimg.copy()
        preds = self.model.predict(inputs, batch_size=1, verbose=1)
        results = bbox_util.detection_out(preds)
        print(results)
        if len(results) > 0:
            final = []
            for each in results[0]:

                if each[1] < 0.4: continue
                xmin = int(each[2] * np.shape(images)[1])
                ymin = int(each[3] * np.shape(images)[0])
                xmax = int(each[4] * np.shape(images)[1])
                ymax = int(each[5] * np.shape(images)[0])
                final.append([xmin, ymin, xmax, ymax, each[1]])
            return final

        return None

    def precision(self, test_path):
        data = testdata_load(test_path)
        gnum = 0
        rnum = 0
        for eachline in data:
            res = self.image_test(eachline[0])
            gtlist = []
            temp = []
            for i in range(len(eachline)):
                if i % 5 == 0: continue
                if i % 5 == 1 and i // 5 > 0:
                    gtlist.append(temp)
                    temp = []
                temp.append(int(eachline[i]))
            gtlist.append(temp)
            print(res)
            tnum, pgnum = self.cal_iou(res, gtlist)
            gnum += pgnum
            rnum += tnum
            print("precision:", float(rnum / gnum))

    def cal_iou(self, res, gt):
        if res == None:
            return 0, len(gt)

        tnum = 0
        for each in gt:
            gxmin = each[0]
            gymin = each[1]
            gxmax = each[2]
            gymax = each[3]
            for one in res:
                overlap = (np.min([gxmax, one[2]]) - np.max(
                    [gxmin, one[0]])) * (np.min([gymax, one[3]]) -
                                         np.max([gymin, one[1]]))
                ares = (one[2] - one[0]) * (one[3] - one[1])
                agt = (gxmax - gxmin) * (gymax - gymin)
                wholea = agt + ares - overlap
                ratio = overlap / wholea
                if ratio > 0.7: tnum += 1

        return tnum, len(gt)

    def run(self,
            model_path,
            video_path=None,
            openposeJson=None,
            out_path=None,
            start_frame=0,
            conf_threshold=0.5,
            model2=None,
            model3=None):

        openpose_part = [
            "Nose", "Neck", "RShoulder", "RElbow", "RWrist", "LShoulder",
            "LElbow", "LWrist", "MidHip", "RHip", "RKnee", "RAnkle", "LHip",
            "LKnee", "LAnkle", "REye", "LEye", "REar", "LEar", "LBigToe",
            "LSmallToe", "LHeel", "RBigToe", "RSmallToe", "RHeel", "Background"
        ]

        fingertips = Fingertips(weights='model_data/finmodel.h5')
        if video_path == None: return None
        video = cv2.VideoCapture(video_path)

        timeline = []
        labelline = []
        handStatus = []
        if out_path:
            fourcc = cv2.VideoWriter_fourcc(*'XVID')
            out = cv2.VideoWriter(out_path,
                                  fourcc,
                                  10.0,
                                  (int(video.get(cv2.CAP_PROP_FRAME_WIDTH)),
                                   int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))),
                                  isColor=True)

        vggmodel = load_model(model_path)
        if start_frame > 0:
            video.set(cv2.cv.CV_CAP_PROP_POS_MSEC, start_frame)

        accum_time = 0
        curr_fps = 0
        prev_time = timer()

        feature_params = dict(maxCorners=100,
                              qualityLevel=0.3,
                              minDistance=7,
                              blockSize=7)

        lk_params = dict(winSize=(15, 15),
                         maxLevel=2,
                         criteria=(cv2.TERM_CRITERIA_EPS
                                   | cv2.TERM_CRITERIA_COUNT, 10, 0.03))

        color = np.random.randint(0, 255, (100, 3))
        num_frame = 0
        video_info = {}
        frame_info = []
        lastTime = 0
        while True:
            info, vimage = video.read()
            milliseconds = video.get(cv2.CAP_PROP_POS_MSEC)
            seconds = milliseconds / 1000

            video_info[str(seconds)] = []
            if not info:
                plt.figure(figsize=(100, 20))
                for i in range(len(labelline)):
                    if i == 0 or i == (len(labelline) - 1): continue
                    if labelline[i] != labelline[
                            i - 1] and labelline[i] != labelline[i + 1]:
                        labelline[i] = labelline[i - 1]

                for i in range(len(handStatus)):
                    if i == 0 or i == (len(handStatus) - 1): continue
                    if handStatus[i] != handStatus[
                            i - 1] and handStatus[i] != handStatus[i + 1]:
                        handStatus[i] = handStatus[i - 1]

                #newlabelline = []

                for i in range(len(labelline)):
                    temp = []
                    #if i - 3 >=0: temp.append(handStatus[i-3])
                    if i - 2 >= 0: temp.append(labelline[i - 2])
                    if i - 1 >= 0: temp.append(labelline[i - 1])
                    temp.append(labelline[i])
                    if i + 1 < len(labelline): temp.append(labelline[i + 1])
                    if i + 2 < len(labelline): temp.append(labelline[i + 2])
                    #if i + 3 < len(handStatus): temp.append(handStatus[i+3])
                    labelline[i] = Counter(temp).most_common(1)[0][0]

                for i in range(len(handStatus)):
                    temp = []
                    #if i - 3 >=0: temp.append(handStatus[i-3])
                    if i - 2 >= 0: temp.append(handStatus[i - 2])
                    if i - 1 >= 0: temp.append(handStatus[i - 1])
                    temp.append(handStatus[i])
                    if i + 1 < len(handStatus): temp.append(handStatus[i + 1])
                    if i + 2 < len(handStatus): temp.append(handStatus[i + 2])
                    #if i + 3 < len(handStatus): temp.append(handStatus[i+3])
                    handStatus[i] = Counter(temp).most_common(1)[0][0]

                #np.save("labelline.npy",labelline)
                plt.plot(timeline, labelline, label='hand exist', color='r')
                plt.plot(timeline, handStatus, label="hand status", color='b')
                finaltime = int(float(timeline[-1])) + 2
                plt.hlines("hand exist",
                           0,
                           finaltime,
                           color="green",
                           linestyles="dashed")
                plt.hlines("hand not exist",
                           0,
                           finaltime,
                           color="blue",
                           linestyles="dashed")
                plt.hlines("touch exist",
                           0,
                           finaltime,
                           color="red",
                           linestyles="dashed")
                plt.hlines("no touch exist",
                           0,
                           finaltime,
                           color="green",
                           linestyles="dashed")
                plt.text(finaltime,
                         "hand exist",
                         "hand detected at each time",
                         fontsize=10)
                plt.text(finaltime,
                         "hand not exist",
                         "hand not detected at each time",
                         fontsize=10)
                plt.text(finaltime,
                         "touch exist",
                         "hand detected and touch valid at each time",
                         fontsize=10)
                plt.text(
                    finaltime,
                    "no touch exist",
                    "no hand or no touch valid though hand detected at each time",
                    fontsize=10)
                plt.xlabel("time(ms)/per frame", fontsize=20)
                plt.ylabel(
                    "hand relative label(blue is touch validation label, red is hand detection label)",
                    fontsize=20)
                plt.legend()
                plt.savefig(video_path[:-4] + ".jpg")
                video.release()
                if out_path: out.release()
                cv2.destroyAllWindows()
                with open(video_path[:-4] + ".json", "a") as outfile:
                    json.dump(video_info, outfile, ensure_ascii=False)
                    outfile.write('\n')
                print("Over")
                return
            timeline.append(round(milliseconds, 2))
            input_size = (self.input_shape[0], self.input_shape[1])
            resized = cv2.resize(vimage, input_size)
            rgb = cv2.cvtColor(resized, cv2.COLOR_BGR2RGB)

            inputs = image.img_to_array(rgb)
            input_image = preprocess_input(np.array([inputs]))

            res = [[]]
            #if type(res[0]) != list: res[0] = res[0].tolist()
            if openposeJson:
                #res = [[]]
                video_file_name = os.listdir(openposeJson)
                body_info = json.load(
                    open(openposeJson + video_file_name[num_frame],
                         "r"))["people"]
                for h in range(len(body_info)):
                    for x in range(len(body_info[h]["pose_keypoints_2d"])):
                        if int(body_info[h]["pose_keypoints_2d"][4]) != 0:
                            if int(body_info[h]["pose_keypoints_2d"][25]) != 0:
                                distance = int(
                                    (body_info[h]["pose_keypoints_2d"][25] -
                                     body_info[h]["pose_keypoints_2d"][4]) / 2)
                            else:
                                distance = int(
                                    (np.shape(vimage)[0] -
                                     body_info[h]["pose_keypoints_2d"][4]) / 2)
                        else:
                            distance = 100

                        if x / 3 == 4 or x / 3 == 7:
                            tres = []
                            weightsum = 0
                            xpos = int(body_info[h]["pose_keypoints_2d"][x])
                            ypos = int(body_info[h]["pose_keypoints_2d"][x +
                                                                         1])
                            elxpos = int(body_info[h]["pose_keypoints_2d"][x -
                                                                           3])
                            elypos = int(body_info[h]["pose_keypoints_2d"][x -
                                                                           2])
                            if xpos == 0 and ypos == 0: continue

                            if elxpos >= xpos:
                                xmin = (
                                    xpos -
                                    distance) if (xpos - distance) > 0 else 0
                                xmax = (xpos + int(distance / 2)) if (
                                    xpos + int(distance / 2)) < np.shape(
                                        vimage)[1] else np.shape(vimage)[1]
                            else:
                                xmin = (xpos - int(distance / 2)) if (
                                    xpos - int(distance / 2)) > 0 else 0
                                xmax = (
                                    xpos +
                                    distance) if (xpos + distance) < np.shape(
                                        vimage)[1] else np.shape(vimage)[1]

                            if elypos >= ypos:
                                ymin = (
                                    ypos -
                                    distance) if (ypos - distance) > 0 else 0
                                ymax = (ypos + int(distance / 2)) if (
                                    ypos + int(distance / 2)) < np.shape(
                                        vimage)[0] else np.shape(vimage)[0]

                            else:
                                ymin = (ypos - int(distance / 2)) if (
                                    ypos - int(distance / 2)) > 0 else 0
                                ymax = (
                                    ypos +
                                    distance) if (ypos + distance) < np.shape(
                                        vimage)[0] else np.shape(vimage)[0]
                            print("distance is", distance, "box is",
                                  [xmin, ymin, xmax, ymax])
                            #cv2.rectangle(vimage,(xmin,ymin),(xmax,ymax),(255,0,0),1)
                            crop_image = vimage[ymin:ymax, xmin:xmax]
                            rgb_crop = cv2.cvtColor(
                                cv2.resize(crop_image, input_size),
                                cv2.COLOR_BGR2RGB)
                            input_crop = preprocess_input(
                                np.array([image.img_to_array(rgb_crop)]))
                            if model2 == None or model3 == None:
                                if len(res) > 0:
                                    res[0].append(
                                        self.bbox_util.detection_out(
                                            self.model.predict(input_crop))[0]
                                        [0])
                            else:
                                if len(
                                        combine(self, model2, model3, None,
                                                input_crop, crop_image)) > 0:
                                    #indexpro = np.array(combine(self,model2,model3, None, input_crop,crop_image))[:,1]
                                    #maxindex = np.where(indexpro == np.max(indexpro))[0][0]
                                    #each = combine(self,model2,model3, None, input_crop,crop_image)[maxindex]
                                    for each in combine(
                                            self, model2, model3, None,
                                            input_crop, crop_image):
                                        #print(each)
                                        if each[1] < conf_threshold: continue
                                        #weightsum += each[1]
                                        if each[2] <= 1 and each[
                                                3] <= 1 and each[
                                                    4] <= 1 and each[5] <= 1:
                                            each[2] = int(
                                                each[2] *
                                                np.shape(crop_image)[1]) + xmin
                                            each[3] = int(
                                                each[3] *
                                                np.shape(crop_image)[0]) + ymin
                                            each[4] = int(
                                                each[4] *
                                                np.shape(crop_image)[1]) + xmin
                                            each[5] = int(
                                                each[5] *
                                                np.shape(crop_image)[0]) + ymin
                                        else:
                                            each[2] = int(each[2]) + xmin
                                            each[3] = int(each[3]) + ymin
                                            each[4] = int(each[4]) + xmin
                                            each[5] = int(each[5]) + ymin

                                        res[0].append(each)
                                        print("res is", res)

                                        #tres.append(each)
                                    """    
                                    finalbox = [1,1,0,0,0,0]
                                    for each in tres:
                                        finalbox[2] = int(finalbox[2] + each[2] * each[1]/weightsum)
                                        finalbox[3] = int(finalbox[3] + each[3] * each[1]/weightsum)
                                        finalbox[4] = int(finalbox[4] + each[4] * each[1]/weightsum)
                                        finalbox[5] = int(finalbox[5] + each[5] * each[1]/weightsum)
                                    """

                            #print(xpos, ypos)
            if len(res[0]) == 0:
                if model2 == None or model3 == None:

                    pred = self.model.predict(input_image)

                    res = self.bbox_util.detection_out(pred)
                else:
                    #ssd ensemble learning
                    res = [
                        combine(self, model2, model3, None, input_image,
                                vimage)
                    ]

            if len(res) > 0 and len(res[0]) > 0:
                #labelline.append("hand exist")

                #deal with each frame
                temp = {}
                temp["hand"] = "exist"
                temp["hand status"] = []
                temp["body part"] = []
                temp["hand position"] = []
                for each in res[0]:

                    if each[1] < conf_threshold: continue
                    if each[2] <= 1 and each[3] <= 1 and each[4] <= 1 and each[
                            5] <= 1:
                        xmin = int(each[2] * np.shape(vimage)[1])
                        ymin = int(each[3] * np.shape(vimage)[0])
                        xmax = int(each[4] * np.shape(vimage)[1])
                        ymax = int(each[5] * np.shape(vimage)[0])
                    else:
                        xmin = int(each[2])
                        ymin = int(each[3])
                        xmax = int(each[4])
                        ymax = int(each[5])

                    test_img = vimage[ymin:ymax, xmin:xmax]

                    height, width, _ = test_img.shape

                    if height < 5 or width < 5:
                        finum = 0
                        continue

                    else:

                        temp["hand position"].append([xmin, ymin, xmax, ymax])
                        # gesture classification and fingertips regression
                        prob, pos = fingertips.classify(image=test_img)
                        pos = np.mean(pos, 0)

                        # post-processing
                        prob = np.asarray([(p >= 0.5) * 1.0 for p in prob])
                        for i in range(0, len(pos), 2):
                            pos[i] = pos[i] * width + xmin
                            pos[i + 1] = pos[i + 1] * height + ymin

                        # drawing
                        index = 0
                        color = [(15, 15, 240), (15, 240, 155), (240, 155, 15),
                                 (240, 15, 155), (240, 15, 240)]
                        #image = cv2.rectangle(image, (tl[0], tl[1]), (br[0], br[1]), (235, 26, 158), 2)
                        finum = 0
                        for c, p in enumerate(prob):
                            if p > 0.5:
                                finum += 1
                                vimage = cv2.circle(
                                    vimage,
                                    (int(pos[index]), int(pos[index + 1])),
                                    radius=12,
                                    color=color[c],
                                    thickness=-2)
                            index = index + 2

                    #edge post process
                    """
                    edges = edge(None,test_img)
                    edges = cv2.cvtColor(edges, cv2.COLOR_GRAY2RGB)
                    test_img = cv2.subtract(test_img, edges)
                    
                    
                    test_imgr90 = cv2.flip(cv2.transpose(test_img), 1)
                    test_imgl90 = cv2.flip(cv2.transpose(test_img), 0)
                    #test_imgr90 = cv2.flip(cv2.transpose(test_img), 1)
                    
                    test_imgr90 = cv2.resize(test_imgr90,(224,224))
                    test_imgl90 = cv2.resize(test_imgl90,(224,224))
                    
                    test_imgr90 = preprocess_input(test_imgr90)
                    test_imgl90 = preprocess_input(test_imgl90)
                    
                    
                    
                    
                    test_img = cv2.resize(test_img, (224,224))
                    test_img = preprocess_input(test_img)
                    #vgg submodel detection
                    ans1 = vggmodel.predict(test_img.reshape(1,224,224,3))
                    #ans2 = vggmodel.predict(test_imgr90.reshape(1,224,224,3))
                    #ans3 = vggmodel.predict(test_imgl90.reshape(1,224,224,3))
                    pos = [ans1[0][0]]
                    """

                    body_in = []
                    #for result in pos:
                    #    if result > 0.85: flag += 1
                    #print(flag)
                    cv2.rectangle(vimage, (xmin, ymin), (xmax, ymax),
                                  color=(255, 0, 0),
                                  thickness=2)
                    cv2.putText(vimage, "hand", (xmin, ymin - 3),
                                cv2.FONT_HERSHEY_SIMPLEX, 0.35, (0, 255, 0), 1)
                    """
                    if flag == 0:
                        for result in pos:
                            result = result + 0.1 * (finum - 1)
                            if result > 0.7 and finum >= 2: flag += 1
                            if finum >= 3: flag += 1
                    """
                    flag = 0
                    if flag == 0:
                        vect1 = [xmin, ymin, xmax, ymax]
                        pastTrue = 0
                        #print(frame_info)
                        for framebefore in range(len(frame_info)):
                            if frame_info[len(frame_info) - 1 -
                                          framebefore][0] == lastTime:
                                t = frame_info[len(frame_info) - 1 -
                                               framebefore]
                                vect2 = t[3:]
                                vwidth = np.min([xmax, vect2[2]]) - np.max(
                                    [xmin, vect2[0]]) + 1
                                vheight = np.min([ymax, vect2[3]]) - np.max(
                                    [ymin, vect2[1]]) + 1

                                if vwidth < 0 or vheight < 0: continue
                                nsq = (ymax - ymin + 1) * (xmax - xmin + 1)
                                print("overlap fration:",
                                      vwidth * vheight / nsq)
                                if vwidth * vheight / nsq > 0.6:
                                    pastTrue += 1

                            elif frame_info[len(frame_info) - 1 -
                                            framebefore][0] < lastTime:
                                break

                        if pastTrue > 0 and finum >= 1:
                            flag += 1

                    #flag = 1

                    if openposeJson:

                        video_file_name = os.listdir(openposeJson)
                        body_info = json.load(
                            open(openposeJson + video_file_name[num_frame],
                                 "r"))["people"]
                        for h in range(len(body_info)):
                            partsplit = {
                                "main body": [],
                                "left hand above": [],
                                "left hand below": [],
                                "right hand above": [],
                                "right hand below": [],
                                "left leg above": [],
                                "left leg below": [],
                                "right leg above": [],
                                "right leg below": [],
                                "head": []
                            }
                            detail = body_info[h]["pose_keypoints_2d"]
                            if detail[51] != 0 and detail[54] != 0 and detail[
                                    4] != 0:
                                xminpos = int(
                                    np.minimum(detail[54], detail[51])) - 5
                                yminpos = int(detail[52]) - 50
                                xmaxpos = int(
                                    np.maximum(detail[51], detail[54])) + 5
                                ymaxpos = int(detail[4])
                                partsplit["head"] = [
                                    xminpos, yminpos, xmaxpos, ymaxpos
                                ]

                            if detail[6] != 0 and detail[15] != 0:
                                xminpos = int(np.minimum(
                                    detail[15], detail[6]))
                                yminpos = int(np.minimum(
                                    detail[7], detail[16]))
                                xmaxpos = int(np.maximum(
                                    detail[6], detail[15]))
                                if detail[24] != 0:
                                    ymaxpos = int(detail[25])
                                else:
                                    ymaxpos = np.shape(vimage)[0]
                                partsplit["main body"] = [
                                    xminpos, yminpos, xmaxpos, ymaxpos
                                ]

                                if detail[9] != 0:
                                    xminpos = int(
                                        np.minimum(detail[6], detail[9]))
                                    yminpos = int(
                                        np.minimum(detail[7], detail[10]))
                                    xmaxpos = int(
                                        np.maximum(detail[6], detail[9]))
                                    ymaxpos = int(
                                        np.maximum(detail[7], detail[10]))
                                    partsplit["right hand above"] = [
                                        xminpos, yminpos, xmaxpos, ymaxpos
                                    ]

                                    if detail[12] != 0:
                                        xminpos = int(
                                            np.minimum(detail[12], detail[9]))
                                        yminpos = int(
                                            np.minimum(detail[13], detail[10]))
                                        xmaxpos = int(
                                            np.maximum(detail[12], detail[9]))
                                        ymaxpos = int(
                                            np.maximum(detail[13], detail[10]))
                                        partsplit["right hand below"] = [
                                            xminpos, yminpos, xmaxpos, ymaxpos
                                        ]

                                if detail[18] != 0:
                                    xminpos = int(
                                        np.minimum(detail[15], detail[18]))
                                    yminpos = int(
                                        np.minimum(detail[16], detail[19]))
                                    xmaxpos = int(
                                        np.maximum(detail[15], detail[18]))
                                    ymaxpos = int(
                                        np.maximum(detail[16], detail[19]))
                                    partsplit["left hand above"] = [
                                        xminpos, yminpos, xmaxpos, ymaxpos
                                    ]

                                    if detail[21] != 0:
                                        xminpos = int(
                                            np.minimum(detail[21], detail[18]))
                                        yminpos = int(
                                            np.minimum(detail[22], detail[19]))
                                        xmaxpos = int(
                                            np.maximum(detail[21], detail[18]))
                                        ymaxpos = int(
                                            np.maximum(detail[22], detail[19]))
                                        partsplit["left hand below"] = [
                                            xminpos, yminpos, xmaxpos, ymaxpos
                                        ]

                            if detail[27] != 0 and detail[30] != 0:
                                xminpos = int(
                                    np.minimum(detail[24], detail[30]))
                                yminpos = int(
                                    np.minimum(detail[28], detail[31]))
                                xmaxpos = int(
                                    np.maximum(detail[24], detail[30]))
                                ymaxpos = int(
                                    np.maximum(detail[28], detail[31]))
                                partsplit["right leg above"] = [
                                    xminpos, yminpos, xmaxpos, ymaxpos
                                ]

                                if detail[33] != 0:
                                    xminpos = int(
                                        np.minimum(detail[30], detail[33]))
                                    yminpos = int(
                                        np.minimum(detail[31], detail[34]))
                                    xmaxpos = int(
                                        np.maximum(detail[30], detail[33]))
                                    ymaxpos = int(
                                        np.maximum(detail[31], detail[34]))
                                    partsplit["right leg below"] = [
                                        xminpos, yminpos, xmaxpos, ymaxpos
                                    ]

                            if detail[36] != 0 and detail[39] != 0:
                                xminpos = int(
                                    np.minimum(detail[24], detail[39]))
                                yminpos = int(
                                    np.minimum(detail[37], detail[40]))
                                xmaxpos = int(
                                    np.maximum(detail[24], detail[39]))
                                ymaxpos = int(
                                    np.maximum(detail[37], detail[40]))
                                partsplit["left leg above"] = [
                                    xminpos, yminpos, xmaxpos, ymaxpos
                                ]

                                if detail[42] != 0:
                                    xminpos = int(
                                        np.minimum(detail[39], detail[42]))
                                    yminpos = int(
                                        np.minimum(detail[40], detail[43]))
                                    xmaxpos = int(
                                        np.maximum(detail[39], detail[42]))
                                    ymaxpos = int(
                                        np.maximum(detail[40], detail[43]))
                                    partsplit["left leg below"] = [
                                        xminpos, yminpos, xmaxpos, ymaxpos
                                    ]

                            for x in range(
                                    len(body_info[h]["pose_keypoints_2d"])):

                                if x % 3 == 0 and x / 3 != 4 and x / 3 != 7:
                                    xpos = int(
                                        body_info[h]["pose_keypoints_2d"][x])
                                    ypos = int(
                                        body_info[h]["pose_keypoints_2d"][x +
                                                                          1])
                                    #print(xpos, ypos)
                                    if (xpos >= xmin and xpos <= xmax) and (
                                            ypos >= ymin and ypos <= ymax):
                                        body_in.append(openpose_part[x // 3])

                            if True:
                                for keyname in partsplit.keys():
                                    if partsplit[keyname] != []:
                                        btemp = partsplit[keyname]
                                        #print(btemp)
                                        owidth = np.minimum(
                                            btemp[2], xmax) - np.maximum(
                                                xmin, btemp[0]) + 1
                                        oheight = np.minimum(
                                            btemp[3], ymax) - np.maximum(
                                                ymin, btemp[1]) + 1
                                        wholehand = (ymax - ymin +
                                                     1) * (xmax - xmin + 1)
                                        cv2.rectangle(vimage,
                                                      (btemp[0], btemp[1]),
                                                      (btemp[2], btemp[3]),
                                                      (0, 0, 255), 1)
                                        cv2.putText(
                                            vimage, keyname, (int(
                                                (btemp[2] + btemp[0]) / 2) - 1,
                                                              btemp[1] - 3),
                                            cv2.FONT_HERSHEY_SIMPLEX, 0.35,
                                            (0, 255, 255), 1)
                                        #if keyname == "main body":
                                        #    cv2.putText(vimage,keyname,(btemp[0], btemp[3] + 3), cv2.FONT_HERSHEY_SIMPLEX, 0.35, (0,255,255), 1)
                                        #    print("main body is", btemp,"hand is",[xmin,ymin,xmax,ymax])
                                        if owidth < 0 or oheight < 0: continue
                                        oarea = owidth * oheight
                                        print("keyname is", keyname)
                                        print("flag is", flag)
                                        print("btemp is", btemp, "hand is",
                                              [xmin, ymin, xmax, ymax])
                                        print("fraction is:",
                                              oarea / wholehand)
                                        if oarea / wholehand > 0.2:
                                            body_in.append(keyname)
                                            #print("body",btemp,"hand",[xmin,ymin,xmax,ymax])

                            #print((res))
                            for i in range(len(res[0])):
                                if res[0][i][1] < conf_threshold: continue
                                for j in range(i + 1, len(res[0])):
                                    if res[0][j][1] < conf_threshold: continue
                                    temp1 = res[0][i]
                                    temp2 = res[0][j]
                                    width = np.min([
                                        int(temp1[4]),
                                        int(temp2[4])
                                    ]) - np.max([int(temp1[2]),
                                                 int(temp2[2])]) + 1
                                    height = np.min([
                                        int(temp1[5]),
                                        int(temp2[5])
                                    ]) - np.max([int(temp1[3]),
                                                 int(temp2[3])]) + 1
                                    if width < 0 or height < 0: continue
                                    area1 = (temp1[5] - temp1[3] +
                                             1) * (temp1[4] - temp1[2] + 1)
                                    area2 = (temp2[5] - temp2[3] +
                                             1) * (temp2[4] - temp2[2] + 1)
                                    overlap = width * height
                                    ratio = overlap / (area1 + area2 - overlap)
                                    if ratio > 0.6: body_in.append("hand")

                    print("body part is", body_in)
                    frame_info.append(
                        [milliseconds, flag, finum, xmin, ymin, xmax, ymax])
                    if flag > 0 and len(body_in) != 0:
                        cv2.putText(vimage, "touch", (xmax, ymin - 3),
                                    cv2.FONT_HERSHEY_SIMPLEX, 0.35,
                                    (0, 0, 255), 1)
                        temp["hand status"].append("touch")

                    else:
                        cv2.putText(vimage, "non - touch", (xmax, ymin - 3),
                                    cv2.FONT_HERSHEY_SIMPLEX, 0.35,
                                    (0, 0, 255), 1)
                        temp["hand status"].append("non - touch")

                    if temp["hand status"][-1] == "touch":
                        temp["body part"].append(body_in)
                    else:
                        temp["body part"].append([])

                if len(temp["hand status"]) == 0:
                    video_info[str(seconds)].append("hand not exist")
                    labelline.append("hand not exist")
                else:
                    video_info[str(seconds)].append(temp)
                    labelline.append("hand exist")

                if "touch" in temp["hand status"]:

                    handStatus.append("touch exist")
                else:

                    handStatus.append("no touch exist")

            else:
                video_info[str(seconds)].append("hand not exist")
                labelline.append("hand not exist")
                handStatus.append("no touch exist")

            curr_time = timer()
            exec_time = curr_time - prev_time
            prev_time = curr_time
            accum_time += exec_time
            curr_fps = int(1 / exec_time)

            num_frame += 1
            lastTime = milliseconds
            #print(curr_time, res[0])
            fps = "FPS:" + str(curr_fps)
            curr_fps = 0
            cv2.rectangle(vimage, (0, 0), (50, 17), (255, 255, 255), -1)
            cv2.putText(vimage, fps, (3, 10), cv2.FONT_HERSHEY_SIMPLEX, 0.35,
                        (0, 0, 0), 1)
            cv2.imshow("SSD result", vimage)
            out.write(vimage)
            cv2.waitKey(1)
コード例 #19
0
 def __init__(self, model, classes, input_shape):
     self.classes = classes
     self.num_class = len(classes) + 1
     self.model = model
     self.input_shape = input_shape
     self.bbox_util = BBoxUtility(self.num_class)
コード例 #20
0
model_save_folder = "./logs/"
h5_file_name = "ssd_vgg_512"
########################################################

gpus = tf.config.experimental.list_physical_devices(device_type='GPU')
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)

if not os.path.exists(model_save_folder):
    os.mkdir(model_save_folder)

if __name__ == "__main__":

    priors = get_anchors_512((img_height, img_width))

    bbox_util = BBoxUtility(len(config.CLASSES), priors)

    model = SSD512((img_height, img_width, img_channels),
                   n_classes=len(config.CLASSES),
                   anchors=anchors,
                   variances=variances)

    checkpoint = ModelCheckpoint(model_save_folder +
                                 "ssd_vgg_epoch{epoch:02d}.h5",
                                 monitor='val_loss',
                                 save_weights_only=True,
                                 save_best_only=True)
    reduce_lr = ReduceLROnPlateau(monitor='val_loss',
                                  factor=0.5,
                                  patience=3,
                                  verbose=1)
コード例 #21
0
ファイル: train.py プロジェクト: hablee/efficientdet-tf2
    #   训练自己的数据集时提示维度不匹配正常
    #   预测的东西都不一样了自然维度不匹配
    #------------------------------------------------------#
    model_path = "model_data/efficientdet-d0-voc.h5"

    #------------------------------------------------------#
    #   创建Efficientdet模型
    #------------------------------------------------------#
    model = Efficientdet(phi, num_classes=num_classes)
    model.load_weights(model_path, by_name=True, skip_mismatch=True)

    #-------------------------------#
    #   获得先验框
    #-------------------------------#
    priors = get_anchors(image_sizes[phi])
    bbox_util = BBoxUtility(num_classes, priors)

    #----------------------------------------------------------------------#
    #   验证集的划分在train.py代码里面进行
    #   2007_test.txt和2007_val.txt里面没有内容是正常的。训练不会使用到。
    #   当前划分方式下,验证集和训练集的比例为1:9
    #----------------------------------------------------------------------#
    val_split = 0.1
    with open(annotation_path) as f:
        lines = f.readlines()
    np.random.seed(10101)
    np.random.shuffle(lines)
    np.random.seed(None)
    num_val = int(len(lines) * val_split)
    num_train = len(lines) - num_val
コード例 #22
0
 def __init__(self, **kwargs):
     self.__dict__.update(self._defaults)
     self.class_names = self._get_class()
     self.sess = tf.compat.v1.keras.backend.get_session
     self.generate()
     self.bbox_util = BBoxUtility(self.num_classes)
コード例 #23
0
ファイル: train.py プロジェクト: hangxzzz/faster-rcnn-tf2
    #   视频上为600,600,3,多次训练测试后发现800,800,3更优
    #-----------------------------------------------------#
    input_shape = [800, 800, 3]

    model_rpn, model_all = get_model(config, NUM_CLASSES)
    #------------------------------------------------------#
    #   权值文件请看README,百度网盘下载
    #   训练自己的数据集时提示维度不匹配正常
    #   预测的东西都不一样了自然维度不匹配
    #------------------------------------------------------#
    base_net_weights = "model_data/voc_weights.h5"
    model_rpn.load_weights(base_net_weights, by_name=True)
    model_all.load_weights(base_net_weights, by_name=True)

    bbox_util = BBoxUtility(overlap_threshold=config.rpn_max_overlap,
                            ignore_threshold=config.rpn_min_overlap,
                            top_k=config.num_RPN_train_pre)

    #--------------------------------------------#
    #   训练参数的设置
    #--------------------------------------------#
    callback = tf.summary.create_file_writer("logs")
    loss_history = LossHistory("logs/")

    annotation_path = '2007_train.txt'
    #----------------------------------------------------------------------#
    #   验证集的划分在train.py代码里面进行
    #   2007_test.txt和2007_val.txt里面没有内容是正常的。训练不会使用到。
    #   当前划分方式下,验证集和训练集的比例为1:9
    #----------------------------------------------------------------------#
    val_split = 0.1
コード例 #24
0
ファイル: train.py プロジェクト: Hotsun2020/retinaface-tf2
        raise ValueError('Unsupported backbone - `{}`, Use mobilenet, resnet50.'.format(backbone))

    img_dim = cfg['image_size']

    #-------------------------------#
    #   创立模型
    #-------------------------------#
    model = RetinaFace(cfg, backbone=backbone)
    model_path = "model_data/retinaface_mobilenet025.h5"
    model.load_weights(model_path,by_name=True,skip_mismatch=True)

    #-------------------------------#
    #   获得先验框和工具箱
    #-------------------------------#
    anchors = Anchors(cfg, image_size=(img_dim, img_dim)).get_anchors()
    bbox_util = BBoxUtility(anchors)

    # 训练参数设置
    logging = TensorBoard(log_dir="logs")
    checkpoint = ModelCheckpoint('logs/ep{epoch:03d}-loss{loss:.3f}.h5',
        monitor='loss', save_weights_only=True, save_best_only=False, period=1)
    reduce_lr = ReduceLROnPlateau(monitor='loss', factor=0.5, patience=2, verbose=1)
    early_stopping = EarlyStopping(monitor='loss', min_delta=0, patience=6, verbose=1)

    for i in range(freeze_layers): model.layers[i].trainable = False
    print('Freeze the first {} layers of total {} layers.'.format(freeze_layers, len(model.layers)))

    #------------------------------------------------------#
    #   主干特征提取网络特征通用,冻结训练可以加快训练速度
    #   也可以在训练初期防止权值被破坏。
    #   Init_Epoch为起始世代
コード例 #25
0
class EfficientDet(object):
    _defaults = {
        "model_path": 'model_data/efficientdet-d0-voc.h5',
        "classes_path": 'model_data/voc_classes.txt',
        "phi": 0,
        "confidence": 0.4,
        "iou": 0.3,
    }

    @classmethod
    def get_defaults(cls, n):
        if n in cls._defaults:
            return cls._defaults[n]
        else:
            return "Unrecognized attribute name '" + n + "'"

    #---------------------------------------------------#
    #   初始化efficientdet
    #---------------------------------------------------#
    def __init__(self, **kwargs):
        self.__dict__.update(self._defaults)
        self.class_names = self._get_class()
        self.model_image_size = [
            image_sizes[self.phi], image_sizes[self.phi], 3
        ]
        self.sess = K.get_session()
        self.generate()
        self.bbox_util = BBoxUtility(self.num_classes, nms_thresh=self.iou)
        self.prior = self._get_prior()

    #---------------------------------------------------#
    #   获得所有的分类
    #---------------------------------------------------#
    def _get_class(self):
        classes_path = os.path.expanduser(self.classes_path)
        with open(classes_path) as f:
            class_names = f.readlines()
        class_names = [c.strip() for c in class_names]
        return class_names

    #---------------------------------------------------#
    #   获得先验框
    #---------------------------------------------------#
    def _get_prior(self):
        data = get_anchors(image_sizes[self.phi])
        return data

    #---------------------------------------------------#
    #   载入模型
    #---------------------------------------------------#
    def generate(self):
        model_path = os.path.expanduser(self.model_path)
        assert model_path.endswith(
            '.h5'), 'Keras model or weights must be a .h5 file.'
        #----------------------------------------#
        #   计算种类数量
        #----------------------------------------#
        self.num_classes = len(self.class_names)

        #----------------------------------------#
        #   创建Efficientdet模型
        #----------------------------------------#
        self.Efficientdet = Efficientdet(self.phi, self.num_classes)
        self.Efficientdet.load_weights(self.model_path)

        print('{} model, anchors, and classes loaded.'.format(model_path))

        # 画框设置不同的颜色
        hsv_tuples = [(x / len(self.class_names), 1., 1.)
                      for x in range(len(self.class_names))]
        self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
        self.colors = list(
            map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)),
                self.colors))

    #---------------------------------------------------#
    #   检测图片
    #---------------------------------------------------#
    def detect_image(self, image):
        #---------------------------------------------------------#
        #   在这里将图像转换成RGB图像,防止灰度图在预测时报错。
        #---------------------------------------------------------#
        image = image.convert('RGB')

        image_shape = np.array(np.shape(image)[0:2])
        #---------------------------------------------------------#
        #   给图像增加灰条,实现不失真的resize
        #---------------------------------------------------------#
        crop_img = letterbox_image(
            image, [self.model_image_size[1], self.model_image_size[0]])

        #-----------------------------------------------------------#
        #   图片预处理,归一化。获得的photo的shape为[1, 512, 512, 3]
        #-----------------------------------------------------------#
        photo = np.array(crop_img, dtype=np.float32)
        photo = np.reshape(preprocess_input(photo), [
            1, self.model_image_size[0], self.model_image_size[1],
            self.model_image_size[2]
        ])

        preds = self.Efficientdet.predict(photo)
        #-----------------------------------------------------------#
        #   将预测结果进行解码
        #-----------------------------------------------------------#
        results = self.bbox_util.detection_out(
            preds, self.prior, confidence_threshold=self.confidence)

        #--------------------------------------#
        #   如果没有检测到物体,则返回原图
        #--------------------------------------#
        if len(results[0]) <= 0:
            return image
        results = np.array(results)

        det_label = results[0][:, 5]
        det_conf = results[0][:, 4]
        det_xmin, det_ymin, det_xmax, det_ymax = results[0][:, 0], results[
            0][:, 1], results[0][:, 2], results[0][:, 3]
        #-----------------------------------------------------------#
        #   筛选出其中得分高于confidence的框
        #-----------------------------------------------------------#
        top_indices = [
            i for i, conf in enumerate(det_conf) if conf >= self.confidence
        ]
        top_conf = det_conf[top_indices]
        top_label_indices = det_label[top_indices].tolist()
        top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(
            det_xmin[top_indices],
            -1), np.expand_dims(det_ymin[top_indices], -1), np.expand_dims(
                det_xmax[top_indices],
                -1), np.expand_dims(det_ymax[top_indices], -1)

        #-----------------------------------------------------------#
        #   去掉灰条部分
        #-----------------------------------------------------------#
        boxes = efficientdet_correct_boxes(
            top_ymin, top_xmin, top_ymax, top_xmax,
            np.array([self.model_image_size[0], self.model_image_size[1]]),
            image_shape)

        font = ImageFont.truetype(font='model_data/simhei.ttf',
                                  size=np.floor(3e-2 * np.shape(image)[1] +
                                                0.5).astype('int32'))

        thickness = max((np.shape(image)[0] + np.shape(image)[1]) //
                        self.model_image_size[0], 1)

        for i, c in enumerate(top_label_indices):
            predicted_class = self.class_names[int(c)]
            score = top_conf[i]

            top, left, bottom, right = boxes[i]
            top = top - 5
            left = left - 5
            bottom = bottom + 5
            right = right + 5

            top = max(0, np.floor(top + 0.5).astype('int32'))
            left = max(0, np.floor(left + 0.5).astype('int32'))
            bottom = min(
                np.shape(image)[0],
                np.floor(bottom + 0.5).astype('int32'))
            right = min(
                np.shape(image)[1],
                np.floor(right + 0.5).astype('int32'))

            # 画框框
            label = '{} {:.2f}'.format(predicted_class, score)
            draw = ImageDraw.Draw(image)
            label_size = draw.textsize(label, font)
            label = label.encode('utf-8')
            print(label, top, left, bottom, right)

            if top - label_size[1] >= 0:
                text_origin = np.array([left, top - label_size[1]])
            else:
                text_origin = np.array([left, top + 1])

            for i in range(thickness):
                draw.rectangle([left + i, top + i, right - i, bottom - i],
                               outline=self.colors[int(c)])
            draw.rectangle(
                [tuple(text_origin),
                 tuple(text_origin + label_size)],
                fill=self.colors[int(c)])
            draw.text(text_origin,
                      str(label, 'UTF-8'),
                      fill=(0, 0, 0),
                      font=font)
            del draw
        return image

    def get_FPS(self, image, test_interval):
        image_shape = np.array(np.shape(image)[0:2])
        #---------------------------------------------------------#
        #   给图像增加灰条,实现不失真的resize
        #---------------------------------------------------------#
        crop_img = letterbox_image(
            image, [self.model_image_size[1], self.model_image_size[0]])
        #-----------------------------------------------------------#
        #   图片预处理,归一化。获得的photo的shape为[1, 512, 512, 3]
        #-----------------------------------------------------------#
        photo = np.array(crop_img, dtype=np.float32)
        photo = np.reshape(preprocess_input(photo), [
            1, self.model_image_size[0], self.model_image_size[1],
            self.model_image_size[2]
        ])

        preds = self.Efficientdet.predict(photo)
        #-----------------------------------------------------------#
        #   将预测结果进行解码
        #-----------------------------------------------------------#
        results = self.bbox_util.detection_out(
            preds, self.prior, confidence_threshold=self.confidence)

        if len(results[0]) > 0:
            results = np.array(results)

            det_label = results[0][:, 5]
            det_conf = results[0][:, 4]
            det_xmin, det_ymin, det_xmax, det_ymax = results[0][:, 0], results[
                0][:, 1], results[0][:, 2], results[0][:, 3]
            #-----------------------------------------------------------#
            #   筛选出其中得分高于confidence的框
            #-----------------------------------------------------------#
            top_indices = [
                i for i, conf in enumerate(det_conf) if conf >= self.confidence
            ]
            top_conf = det_conf[top_indices]
            top_label_indices = det_label[top_indices].tolist()
            top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(
                det_xmin[top_indices],
                -1), np.expand_dims(det_ymin[top_indices], -1), np.expand_dims(
                    det_xmax[top_indices],
                    -1), np.expand_dims(det_ymax[top_indices], -1)

            #-----------------------------------------------------------#
            #   去掉灰条部分
            #-----------------------------------------------------------#
            boxes = efficientdet_correct_boxes(
                top_ymin, top_xmin, top_ymax, top_xmax,
                np.array([self.model_image_size[0], self.model_image_size[1]]),
                image_shape)

        t1 = time.time()
        for _ in range(test_interval):
            preds = self.Efficientdet.predict(photo)
            #-----------------------------------------------------------#
            #   将预测结果进行解码
            #-----------------------------------------------------------#
            results = self.bbox_util.detection_out(
                preds, self.prior, confidence_threshold=self.confidence)
            if len(results[0]) > 0:
                results = np.array(results)

                det_label = results[0][:, 5]
                det_conf = results[0][:, 4]
                det_xmin, det_ymin, det_xmax, det_ymax = results[
                    0][:, 0], results[0][:, 1], results[0][:, 2], results[0][:,
                                                                             3]
                #-----------------------------------------------------------#
                #   筛选出其中得分高于confidence的框
                #-----------------------------------------------------------#
                top_indices = [
                    i for i, conf in enumerate(det_conf)
                    if conf >= self.confidence
                ]
                top_conf = det_conf[top_indices]
                top_label_indices = det_label[top_indices].tolist()
                top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(
                    det_xmin[top_indices], -1), np.expand_dims(
                        det_ymin[top_indices], -1), np.expand_dims(
                            det_xmax[top_indices],
                            -1), np.expand_dims(det_ymax[top_indices], -1)

                #-----------------------------------------------------------#
                #   去掉灰条部分
                #-----------------------------------------------------------#
                boxes = efficientdet_correct_boxes(
                    top_ymin, top_xmin, top_ymax, top_xmax,
                    np.array(
                        [self.model_image_size[0], self.model_image_size[1]]),
                    image_shape)

        t2 = time.time()
        tact_time = (t2 - t1) / test_interval
        return tact_time

    def close_session(self):
        self.sess.close()
コード例 #26
0
class Retinaface(object):
    _defaults = {
        "model_path": 'model_data/retinaface_mobilenet025.h5',
        "backbone": 'mobilenet',
        "confidence": 0.5,
        "nms_iou": 0.45,
        #----------------------------------------------------------------------#
        #   是否需要进行图像大小限制。
        #   开启后,会将输入图像的大小限制为input_shape。否则使用原图进行预测。
        #   tf2代码中主干为mobilenet时存在小bug,当输入图像的宽高不为32的倍数
        #   会导致检测结果偏差,主干为resnet50不存在此问题。
        #   可根据输入图像的大小自行调整input_shape,注意为32的倍数,如[640, 640, 3]
        #----------------------------------------------------------------------#
        "input_shape": [1280, 1280, 3],
        "letterbox_image": True
    }

    @classmethod
    def get_defaults(cls, n):
        if n in cls._defaults:
            return cls._defaults[n]
        else:
            return "Unrecognized attribute name '" + n + "'"

    #---------------------------------------------------#
    #   初始化Retinaface
    #---------------------------------------------------#
    def __init__(self, **kwargs):
        self.__dict__.update(self._defaults)
        if self.backbone == "mobilenet":
            self.cfg = cfg_mnet
        else:
            self.cfg = cfg_re50
        self.bbox_util = BBoxUtility(nms_thresh=self.nms_iou)
        self.generate()
        self.anchors = Anchors(self.cfg,
                               image_size=(self.input_shape[0],
                                           self.input_shape[1])).get_anchors()

    #---------------------------------------------------#
    #   载入模型
    #---------------------------------------------------#
    def generate(self):
        model_path = os.path.expanduser(self.model_path)
        assert model_path.endswith(
            '.h5'), 'tensorflow.keras model or weights must be a .h5 file.'

        #-------------------------------#
        #   载入模型与权值
        #-------------------------------#
        self.retinaface = RetinaFace(self.cfg, self.backbone)
        self.retinaface.load_weights(self.model_path)
        print('{} model, anchors loaded.'.format(self.model_path))

    @tf.function
    def get_pred(self, photo):
        preds = self.retinaface(photo, training=False)
        return preds

    #---------------------------------------------------#
    #   检测图片
    #---------------------------------------------------#
    def detect_image(self, image):
        #---------------------------------------------------#
        #   对输入图像进行一个备份,后面用于绘图
        #---------------------------------------------------#
        old_image = image.copy()

        image = np.array(image, np.float32)
        im_height, im_width, _ = np.shape(image)

        #---------------------------------------------------#
        #   计算scale,用于将获得的预测框转换成原图的高宽
        #---------------------------------------------------#
        scale = [
            np.shape(image)[1],
            np.shape(image)[0],
            np.shape(image)[1],
            np.shape(image)[0]
        ]
        scale_for_landmarks = [
            np.shape(image)[1],
            np.shape(image)[0],
            np.shape(image)[1],
            np.shape(image)[0],
            np.shape(image)[1],
            np.shape(image)[0],
            np.shape(image)[1],
            np.shape(image)[0],
            np.shape(image)[1],
            np.shape(image)[0]
        ]

        #---------------------------------------------------------#
        #   letterbox_image可以给图像增加灰条,实现不失真的resize
        #---------------------------------------------------------#
        if self.letterbox_image:
            image = letterbox_image(image,
                                    [self.input_shape[1], self.input_shape[0]])
        else:
            self.anchors = Anchors(self.cfg,
                                   image_size=(im_height,
                                               im_width)).get_anchors()

        #-----------------------------------------------------------#
        #   图片预处理,归一化。
        #-----------------------------------------------------------#
        photo = np.expand_dims(preprocess_input(image), 0)

        preds = self.get_pred(photo)
        preds = [pred.numpy() for pred in preds]
        #-----------------------------------------------------------#
        #   将预测结果进行解码
        #-----------------------------------------------------------#
        results = self.bbox_util.detection_out(
            preds, self.anchors, confidence_threshold=self.confidence)

        #--------------------------------------#
        #   如果没有检测到物体,则返回原图
        #--------------------------------------#
        if len(results) <= 0:
            return old_image

        results = np.array(results)
        #---------------------------------------------------------#
        #   如果使用了letterbox_image的话,要把灰条的部分去除掉。
        #---------------------------------------------------------#
        if self.letterbox_image:
            results = retinaface_correct_boxes(
                results, np.array([self.input_shape[0], self.input_shape[1]]),
                np.array([im_height, im_width]))

        results[:, :4] = results[:, :4] * scale
        results[:, 5:] = results[:, 5:] * scale_for_landmarks

        for b in results:
            text = "{:.4f}".format(b[4])
            b = list(map(int, b))

            # b[0]-b[3]为人脸框的坐标,b[4]为得分
            cv2.rectangle(old_image, (b[0], b[1]), (b[2], b[3]), (0, 0, 255),
                          2)
            cx = b[0]
            cy = b[1] + 12
            cv2.putText(old_image, text, (cx, cy), cv2.FONT_HERSHEY_DUPLEX,
                        0.5, (255, 255, 255))

            print(b[0], b[1], b[2], b[3], b[4])
            # b[5]-b[14]为人脸关键点的坐标
            cv2.circle(old_image, (b[5], b[6]), 1, (0, 0, 255), 4)
            cv2.circle(old_image, (b[7], b[8]), 1, (0, 255, 255), 4)
            cv2.circle(old_image, (b[9], b[10]), 1, (255, 0, 255), 4)
            cv2.circle(old_image, (b[11], b[12]), 1, (0, 255, 0), 4)
            cv2.circle(old_image, (b[13], b[14]), 1, (255, 0, 0), 4)
        return old_image
コード例 #27
0
import keras
from keras.optimizers import Adam
from nets.retinanet_training import Generator
from nets.retinanet_training import focal, smooth_l1
from keras.callbacks import TensorBoard, ModelCheckpoint, ReduceLROnPlateau, EarlyStopping
from utils.utils import BBoxUtility
from utils.anchors import get_anchors

if __name__ == "__main__":
    NUM_CLASSES = 20
    input_shape = (600, 600, 3)
    annotation_path = '2007_train.txt'
    inputs = keras.layers.Input(shape=input_shape)
    model = retinanet.resnet_retinanet(NUM_CLASSES, inputs)
    priors = get_anchors(model)
    bbox_util = BBoxUtility(NUM_CLASSES, priors)

    #-------------------------------------------#
    #   权值文件的下载请看README
    #-------------------------------------------#
    model.load_weights("model_data/resnet50_coco_best_v2.1.0.h5",
                       by_name=True,
                       skip_mismatch=True)

    # 0.1用于验证,0.9用于训练
    val_split = 0.1
    with open(annotation_path) as f:
        lines = f.readlines()
    np.random.seed(10101)
    np.random.shuffle(lines)
    np.random.seed(None)
コード例 #28
0
ファイル: retinaface.py プロジェクト: ssunguotu/LeNet_face
class Retinaface(object):
    _defaults = {
        "model_path": 'model_data/retinaface_mobilenet025.h5',
        "backbone": "mobilenet",
        "confidence": 0.5,
    }

    @classmethod
    def get_defaults(cls, n):
        if n in cls._defaults:
            return cls._defaults[n]
        else:
            return "Unrecognized attribute name '" + n + "'"

    #---------------------------------------------------#
    #   初始化Retinaface
    #---------------------------------------------------#
    def __init__(self, **kwargs):
        self.__dict__.update(self._defaults)
        if self.backbone == "mobilenet":
            self.cfg = cfg_mnet
        else:
            self.cfg = cfg_re50
        self.bbox_util = BBoxUtility()
        self.generate()

    #---------------------------------------------------#
    #   获得所有的分类
    #---------------------------------------------------#
    def generate(self):
        model_path = os.path.expanduser(self.model_path)
        assert model_path.endswith(
            '.h5'), 'tensorflow.keras model or weights must be a .h5 file.'

        # 加快模型训练的效率
        print('Loading weights into state dict...')
        # 载入模型
        self.retinaface = RetinaFace(self.cfg, self.backbone)
        self.retinaface.load_weights(self.model_path)
        print('{} model, anchors loaded.'.format(self.model_path))

    @tf.function
    def get_pred(self, photo):
        preds = self.retinaface(photo, training=False)
        return preds

    #---------------------------------------------------#
    #   检测图片
    #---------------------------------------------------#
    def detect_image(self, image):
        old_image = image.copy()

        image = np.array(image, np.float32)
        im_height, im_width, _ = np.shape(image)

        scale = [im_width, im_height, im_width, im_height]
        scale_for_landmarks = [
            im_width, im_height, im_width, im_height, im_width, im_height,
            im_width, im_height, im_width, im_height
        ]

        # 图片预处理,归一化
        photo = np.expand_dims(preprocess_input(image), 0)
        anchors = Anchors(self.cfg,
                          image_size=(im_height, im_width)).get_anchors()

        preds = self.get_pred(photo)
        preds = [pred.numpy() for pred in preds]

        # 将预测结果进行解码和非极大抑制
        results = self.bbox_util.detection_out(
            preds, anchors, confidence_threshold=self.confidence)

        if len(results) <= 0:
            return old_image, 0, 0
        results = np.array(results)
        results[:, :4] = results[:, :4] * scale
        results[:, 5:] = results[:, 5:] * scale_for_landmarks

        for b in results:
            text = "{:.4f}".format(b[4])
            b = list(map(int, b))
            cv2.rectangle(old_image, (b[0], b[1]), (b[2], b[3]), (0, 0, 255),
                          2)

            #####################
            global cnt, t0, t1
            t1 = time.time()
            image_clip = old_image
            # if t1 - t0 > 1:
            #     t0 = t1
            # image_clip = old_image[b[1]-20:b[3]+20, b[0]-20:b[2]+20]
            image_clip = old_image[b[1]:b[3], b[0]:b[2]]
            image_clip = cv2.cvtColor(image_clip, cv2.COLOR_RGB2BGR)

            # 保存剪切的图片
            # cv2.imshow("clip", image_clip)
            # cv2.imwrite("savedImg/wang/" + str(t1) + ".png", image_clip)
            # cnt += 1
            # print(cnt)
            #####################

            cx = b[0]
            cy = b[1] + 12
            cv2.putText(old_image, text, (cx, cy), cv2.FONT_HERSHEY_DUPLEX,
                        0.5, (255, 255, 255))

            # landms
            cv2.circle(old_image, (b[5], b[6]), 1, (0, 0, 255), 4)
            cv2.circle(old_image, (b[7], b[8]), 1, (0, 255, 255), 4)
            cv2.circle(old_image, (b[9], b[10]), 1, (255, 0, 255), 4)
            cv2.circle(old_image, (b[11], b[12]), 1, (0, 255, 0), 4)
            cv2.circle(old_image, (b[13], b[14]), 1, (255, 0, 0), 4)

        return old_image, image_clip, len(results)
コード例 #29
0
class SSD(object):
    _defaults = {
        "model_path": 'model_data/essay_mobilenet_ssd_weights.h5',
        "classes_path": 'model_data/voc_classes.txt',
        "input_shape": (300, 300, 3),
        "confidence": 0.4,
        "nms_iou": 0.45,
        'anchors_size': [30, 60, 111, 162, 213, 264, 315],
        #---------------------------------------------------------------------#
        #   该变量用于控制是否使用letterbox_image对输入图像进行不失真的resize,
        #   在多次测试后,发现关闭letterbox_image直接resize的效果更好
        #---------------------------------------------------------------------#
        "letterbox_image": False,
    }

    @classmethod
    def get_defaults(cls, n):
        if n in cls._defaults:
            return cls._defaults[n]
        else:
            return "Unrecognized attribute name '" + n + "'"

    #---------------------------------------------------#
    #   初始化ssd
    #---------------------------------------------------#
    def __init__(self, **kwargs):
        self.__dict__.update(self._defaults)
        self.class_names = self._get_class()
        self.sess = K.get_session()
        self.generate()
        self.bbox_util = BBoxUtility(self.num_classes, nms_thresh=self.nms_iou)

    #---------------------------------------------------#
    #   获得所有的分类
    #---------------------------------------------------#
    def _get_class(self):
        classes_path = os.path.expanduser(self.classes_path)
        with open(classes_path) as f:
            class_names = f.readlines()
        class_names = [c.strip() for c in class_names]
        return class_names

    #---------------------------------------------------#
    #   载入模型
    #---------------------------------------------------#
    def generate(self):
        model_path = os.path.expanduser(self.model_path)
        assert model_path.endswith(
            '.h5'), 'Keras model or weights must be a .h5 file.'

        #-------------------------------#
        #   计算总的类的数量
        #-------------------------------#
        self.num_classes = len(self.class_names) + 1

        #-------------------------------#
        #   载入模型与权值
        #-------------------------------#
        self.ssd_model = ssd.SSD300(self.input_shape,
                                    self.num_classes,
                                    anchors_size=self.anchors_size)
        self.ssd_model.load_weights(self.model_path, by_name=True)

        print('{} model, anchors, and classes loaded.'.format(model_path))

        # 画框设置不同的颜色
        hsv_tuples = [(x / len(self.class_names), 1., 1.)
                      for x in range(len(self.class_names))]
        self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
        self.colors = list(
            map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)),
                self.colors))

    #---------------------------------------------------#
    #   检测图片
    #---------------------------------------------------#
    def detect_image(self, image):
        image_shape = np.array(np.shape(image)[0:2])
        #---------------------------------------------------------#
        #   给图像增加灰条,实现不失真的resize
        #   也可以直接resize进行识别
        #---------------------------------------------------------#
        if self.letterbox_image:
            crop_img = np.array(
                letterbox_image(image,
                                (self.input_shape[1], self.input_shape[0])))
        else:
            crop_img = image.convert('RGB')
            crop_img = crop_img.resize(
                (self.input_shape[1], self.input_shape[0]), Image.BICUBIC)
        photo = np.array(crop_img, dtype=np.float64)
        #-----------------------------------------------------------#
        #   图片预处理,归一化。
        #-----------------------------------------------------------#
        photo = preprocess_input(
            np.reshape(photo,
                       [1, self.input_shape[0], self.input_shape[1], 3]))
        preds = self.ssd_model.predict(photo)

        #-----------------------------------------------------------#
        #   将预测结果进行解码
        #-----------------------------------------------------------#
        results = self.bbox_util.detection_out(
            preds, confidence_threshold=self.confidence)

        #--------------------------------------#
        #   如果没有检测到物体,则返回原图
        #--------------------------------------#
        if len(results[0]) <= 0:
            return image

        #-----------------------------------------------------------#
        #   筛选出其中得分高于confidence的框
        #-----------------------------------------------------------#
        det_label = results[0][:, 0]
        det_conf = results[0][:, 1]
        det_xmin, det_ymin, det_xmax, det_ymax = results[0][:, 2], results[
            0][:, 3], results[0][:, 4], results[0][:, 5]
        top_indices = [
            i for i, conf in enumerate(det_conf) if conf >= self.confidence
        ]
        top_conf = det_conf[top_indices]
        top_label_indices = det_label[top_indices].tolist()
        top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(
            det_xmin[top_indices],
            -1), np.expand_dims(det_ymin[top_indices], -1), np.expand_dims(
                det_xmax[top_indices],
                -1), np.expand_dims(det_ymax[top_indices], -1)

        #-----------------------------------------------------------#
        #   去掉灰条部分
        #-----------------------------------------------------------#
        if self.letterbox_image:
            boxes = ssd_correct_boxes(
                top_ymin, top_xmin, top_ymax, top_xmax,
                np.array([self.input_shape[0], self.input_shape[1]]),
                image_shape)
        else:
            top_xmin = top_xmin * image_shape[1]
            top_ymin = top_ymin * image_shape[0]
            top_xmax = top_xmax * image_shape[1]
            top_ymax = top_ymax * image_shape[0]
            boxes = np.concatenate([top_ymin, top_xmin, top_ymax, top_xmax],
                                   axis=-1)

        font = ImageFont.truetype(font='model_data/simhei.ttf',
                                  size=np.floor(3e-2 * np.shape(image)[1] +
                                                0.5).astype('int32'))

        thickness = max(
            (np.shape(image)[0] + np.shape(image)[1]) // self.input_shape[0],
            1)

        for i, c in enumerate(top_label_indices):
            predicted_class = self.class_names[int(c) - 1]
            score = top_conf[i]

            top, left, bottom, right = boxes[i]
            top = top - 5
            left = left - 5
            bottom = bottom + 5
            right = right + 5

            top = max(0, np.floor(top + 0.5).astype('int32'))
            left = max(0, np.floor(left + 0.5).astype('int32'))
            bottom = min(
                np.shape(image)[0],
                np.floor(bottom + 0.5).astype('int32'))
            right = min(
                np.shape(image)[1],
                np.floor(right + 0.5).astype('int32'))

            # 画框框
            label = '{} {:.2f}'.format(predicted_class, score)
            draw = ImageDraw.Draw(image)
            label_size = draw.textsize(label, font)
            label = label.encode('utf-8')
            print(label, top, left, bottom, right)

            if top - label_size[1] >= 0:
                text_origin = np.array([left, top - label_size[1]])
            else:
                text_origin = np.array([left, top + 1])

            for i in range(thickness):
                draw.rectangle([left + i, top + i, right - i, bottom - i],
                               outline=self.colors[int(c) - 1])
            draw.rectangle(
                [tuple(text_origin),
                 tuple(text_origin + label_size)],
                fill=self.colors[int(c) - 1])
            draw.text(text_origin,
                      str(label, 'UTF-8'),
                      fill=(0, 0, 0),
                      font=font)
            del draw
        return image

    def close_session(self):
        self.sess.close()
コード例 #30
0
class FRCNN(object):
    _defaults = {
        "model_path": 'model_data/voc_weights.h5',
        "classes_path": 'model_data/voc_classes.txt',
        "confidence": 0.5,
        "iou": 0.3
    }

    @classmethod
    def get_defaults(cls, n):
        if n in cls._defaults:
            return cls._defaults[n]
        else:
            return "Unrecognized attribute name '" + n + "'"

    #---------------------------------------------------#
    #   初始化faster RCNN
    #---------------------------------------------------#
    def __init__(self, **kwargs):
        self.__dict__.update(self._defaults)
        self.class_names = self._get_class()
        self.config = Config()
        self.generate()
        self.bbox_util = BBoxUtility()

    #---------------------------------------------------#
    #   获得所有的分类
    #---------------------------------------------------#
    def _get_class(self):
        classes_path = os.path.expanduser(self.classes_path)
        with open(classes_path) as f:
            class_names = f.readlines()
        class_names = [c.strip() for c in class_names]
        return class_names

    #---------------------------------------------------#
    #   获得所有的分类
    #---------------------------------------------------#
    def generate(self):
        model_path = os.path.expanduser(self.model_path)
        assert model_path.endswith(
            '.h5'), 'Keras model or weights must be a .h5 file.'

        #-------------------------------#
        #   计算总的类的数量
        #-------------------------------#
        self.num_classes = len(self.class_names) + 1

        #-------------------------------#
        #   载入模型与权值
        #-------------------------------#
        self.model_rpn, self.model_classifier = frcnn.get_predict_model(
            self.config, self.num_classes)
        self.model_rpn.load_weights(self.model_path, by_name=True)
        self.model_classifier.load_weights(self.model_path, by_name=True)

        print('{} model, anchors, and classes loaded.'.format(model_path))

        # 画框设置不同的颜色
        hsv_tuples = [(x / len(self.class_names), 1., 1.)
                      for x in range(len(self.class_names))]
        self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
        self.colors = list(
            map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)),
                self.colors))

    #---------------------------------------------------#
    #   用于计算共享特征层的大小
    #---------------------------------------------------#
    def get_img_output_length(self, width, height):
        def get_output_length(input_length):
            # input_length += 6
            filter_sizes = [7, 3, 1, 1]
            padding = [3, 1, 0, 0]
            stride = 2
            for i in range(4):
                # input_length = (input_length - filter_size + stride) // stride
                input_length = (input_length + 2 * padding[i] -
                                filter_sizes[i]) // stride + 1
            return input_length

        return get_output_length(width), get_output_length(height)

    @tf.function(experimental_relax_shapes=True)
    def model_rpn_get_pred(self, photo):
        preds = self.model_rpn(photo, training=False)
        return preds

    @tf.function(experimental_relax_shapes=True)
    def model_classifier_get_pred(self, photo):
        preds = self.model_classifier(photo, training=False)
        return preds

    #---------------------------------------------------#
    #   检测图片
    #---------------------------------------------------#
    def detect_image(self, image):
        #-------------------------------------#
        #   转换成RGB图片,可以用于灰度图预测。
        #-------------------------------------#
        image = image.convert("RGB")

        image_shape = np.array(np.shape(image)[0:2])
        old_width, old_height = image_shape[1], image_shape[0]
        old_image = copy.deepcopy(image)

        #---------------------------------------------------------#
        #   给原图像进行resize,resize到短边为600的大小上
        #---------------------------------------------------------#
        width, height = get_new_img_size(old_width, old_height)
        image = image.resize([width, height], Image.BICUBIC)
        photo = np.array(image, dtype=np.float64)

        #-----------------------------------------------------------#
        #   图片预处理,归一化。
        #-----------------------------------------------------------#
        photo = preprocess_input(np.expand_dims(photo, 0))
        rpn_pred = self.model_rpn_get_pred(photo)
        rpn_pred = [x.numpy() for x in rpn_pred]

        #-----------------------------------------------------------#
        #   将建议框网络的预测结果进行解码
        #-----------------------------------------------------------#
        base_feature_width, base_feature_height = self.get_img_output_length(
            width, height)
        anchors = get_anchors([base_feature_width, base_feature_height], width,
                              height)
        rpn_results = self.bbox_util.detection_out_rpn(rpn_pred, anchors)

        #-------------------------------------------------------------#
        #   在获得建议框和共享特征层后,将二者传入classifier中进行预测
        #-------------------------------------------------------------#
        base_layer = rpn_pred[2]
        proposal_box = np.array(rpn_results)[:, :, 1:]
        temp_ROIs = np.zeros_like(proposal_box)
        temp_ROIs[:, :, [0, 1, 2, 3]] = proposal_box[:, :, [1, 0, 3, 2]]
        classifier_pred = self.model_classifier_get_pred(
            [base_layer, temp_ROIs])
        classifier_pred = [x.numpy() for x in classifier_pred]

        #-------------------------------------------------------------#
        #   利用classifier的预测结果对建议框进行解码,获得预测框
        #-------------------------------------------------------------#
        results = self.bbox_util.detection_out_classifier(
            classifier_pred, proposal_box, self.config, self.confidence)

        if len(results[0]) == 0:
            return old_image

        results = np.array(results[0])
        boxes = results[:, :4]
        top_conf = results[:, 4]
        top_label_indices = results[:, 5]
        boxes[:, [0, 2]] = boxes[:, [0, 2]] * old_width
        boxes[:, [1, 3]] = boxes[:, [1, 3]] * old_height

        font = ImageFont.truetype(font='model_data/simhei.ttf',
                                  size=np.floor(3e-2 * np.shape(image)[1] +
                                                0.5).astype('int32'))

        thickness = max(
            (np.shape(old_image)[0] + np.shape(old_image)[1]) // old_width * 2,
            1)

        image = old_image
        for i, c in enumerate(top_label_indices):
            predicted_class = self.class_names[int(c)]
            score = top_conf[i]

            left, top, right, bottom = boxes[i]
            top = top - 5
            left = left - 5
            bottom = bottom + 5
            right = right + 5

            top = max(0, np.floor(top + 0.5).astype('int32'))
            left = max(0, np.floor(left + 0.5).astype('int32'))
            bottom = min(
                np.shape(image)[0],
                np.floor(bottom + 0.5).astype('int32'))
            right = min(
                np.shape(image)[1],
                np.floor(right + 0.5).astype('int32'))

            # 画框框
            label = '{} {:.2f}'.format(predicted_class, score)
            draw = ImageDraw.Draw(image)
            label_size = draw.textsize(label, font)
            label = label.encode('utf-8')
            print(label, top, left, bottom, right)

            if top - label_size[1] >= 0:
                text_origin = np.array([left, top - label_size[1]])
            else:
                text_origin = np.array([left, top + 1])

            for i in range(thickness):
                draw.rectangle([left + i, top + i, right - i, bottom - i],
                               outline=self.colors[int(c)])
            draw.rectangle(
                [tuple(text_origin),
                 tuple(text_origin + label_size)],
                fill=self.colors[int(c)])
            draw.text(text_origin,
                      str(label, 'UTF-8'),
                      fill=(0, 0, 0),
                      font=font)
            del draw
        return image