if __name__ == '__main__':
    SET_NAME = params['val_set']
    VAL_GT = f'datasets/{params["project_name"]}/annotations/instances_{SET_NAME}.json'
    VAL_IMGS = f'datasets/{params["project_name"]}/{SET_NAME}/'
    MAX_IMAGES = 10000
    coco_gt = COCO(VAL_GT)
    image_ids = coco_gt.getImgIds()[:MAX_IMAGES]

    if override_prev_results or not os.path.exists(
            f'{SET_NAME}_bbox_results.json'):
        model = EfficientDetBackbone(compound_coef=compound_coef,
                                     num_classes=len(obj_list),
                                     ratios=eval(params['anchors_ratios']),
                                     scales=eval(params['anchors_scales']))
        model.load_state_dict(
            torch.load(weights_path, map_location=torch.device('cpu')))
        model.requires_grad_(False)
        model.eval()

        if use_cuda:
            model.cuda(gpu)

            if use_float16:
                model.half()

        image_ids = evaluate_coco(VAL_IMGS, SET_NAME, image_ids, coco_gt,
                                  model)

    _eval(coco_gt, image_ids, f'{SET_NAME}_bbox_results.json')
class Model():
    def __init__(self, compound_coef=0, force_input_size=512, threshold=0.2, iou_threshold=0.2):
        self.compound_coef = compound_coef
        self.force_input_size = force_input_size  # set None to use default size

        self.threshold = threshold
        self.iou_threshold = iou_threshold

        self.use_cuda = True
        self.use_float16 = False
        cudnn.fastest = True
        cudnn.benchmark = True

        self.obj_list = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
                         'fire hydrant', '', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep',
                         'cow', 'elephant', 'bear', 'zebra', 'giraffe', '', 'backpack', 'umbrella', '', '', 'handbag', 'tie',
                         'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove',
                         'skateboard', 'surfboard', 'tennis racket', 'bottle', '', 'wine glass', 'cup', 'fork', 'knife', 'spoon',
                         'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut',
                         'cake', 'chair', 'couch', 'potted plant', 'bed', '', 'dining table', '', '', 'toilet', '', 'tv',
                         'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink',
                         'refrigerator', '', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier',
                         'toothbrush']

        # tf bilinear interpolation is different from any other's, just make do
        self.input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536]
        self.input_size = self.input_sizes[self.compound_coef] if self.force_input_size is None else self.force_input_size

        self.model = EfficientDetBackbone(
            compound_coef=self.compound_coef, num_classes=len(self.obj_list))
        self.model.load_state_dict(torch.load(
            f'weights/efficientdet-d{self.compound_coef}.pth'))
        self.model.requires_grad_(False)
        self.model.eval()

        if self.use_cuda:
            self.model = self.model.cuda()
        if self.use_float16:
            self.model = self.model.half()


    def predict(self, raw_img):
        self.ori_imgs, self.framed_imgs, self.framed_metas = preprocess_raw(raw_img, max_size=self.input_size)
        if self.use_cuda:
            x = torch.stack([torch.from_numpy(fi).cuda() for fi in self.framed_imgs], 0)
        else:
            x = torch.stack([torch.from_numpy(fi) for fi in self.framed_imgs], 0)
        x = x.to(torch.float32 if not self.use_float16 else torch.float16).permute(0, 3, 1, 2)

        with torch.no_grad():
            self.features, self.regression, self.classification, self.anchors = self.model(x)

            self.regressBoxes = BBoxTransform()
            self.clipBoxes = ClipBoxes()

            out = postprocess(x,
                            self.anchors, self.regression, self.classification,
                            self.regressBoxes, self.clipBoxes,
                            self.threshold, self.iou_threshold)
            pred = invert_affine(self.framed_metas, out)
            return pred



    def label_img(self, preds, imgs):
        for i in range(len(imgs)):
            if len(preds[i]['rois']) == 0:
                continue

            for j in range(len(preds[i]['rois'])):
                (x1, y1, x2, y2) = preds[i]['rois'][j].astype(np.int)
                cv2.rectangle(imgs[i], (x1, y1), (x2, y2), (255, 255, 0), 2)
                obj = self.obj_list[preds[i]['class_ids'][j]]
                score = float(preds[i]['scores'][j])

                cv2.putText(imgs[i], '{}, {:.3f}'.format(obj, score),
                            (x1, y1 + 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5,
                            (255, 255, 0), 1)
        
        return imgs
    

    def run(self, raw_img):
        pred_label = self.predict(raw_img)
        pred_img = self.label_img(pred_label, self.ori_imgs)
        return pred_img[0]
Exemple #3
0
def excuteModel(videoname):
    # Video's path
    # set int to use webcam, set str to read from a video file

    if videoname is not None:
        video_src = os.path.join(r'D:\GitHub\Detection\server\uploads', f"{videoname}.mp4")
    else:
        video_src = 'D:\\GitHub\\Detection\\server\AImodel\\videotest\\default.mp4'

    compound_coef = 2
    trained_weights = 'D:\\GitHub\\Detection\\server\\AImodel\\weights\\efficientdet-video.pth'

    force_input_size = None  # set None to use default size

    threshold = 0.2
    iou_threshold = 0.2

    use_cuda = True
    use_float16 = False
    cudnn.fastest = True
    cudnn.benchmark = True

    obj_list = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
                'fire hydrant', '', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep',
                'cow', 'elephant', 'bear', 'zebra', 'giraffe', '', 'backpack', 'umbrella', '', '', 'handbag', 'tie',
                'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove',
                'skateboard', 'surfboard', 'tennis racket', 'bottle', '', 'wine glass', 'cup', 'fork', 'knife', 'spoon',
                'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut',
                'cake', 'chair', 'couch', 'potted plant', 'bed', '', 'dining table', '', '', 'toilet', '', 'tv',
                'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink',
                'refrigerator', '', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier',
                'toothbrush']

    # tf bilinear interpolation is different from any other's, just make do
    input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536]
    input_size = input_sizes[compound_coef] if force_input_size is None else force_input_size

    # load model
    model = EfficientDetBackbone(
        compound_coef=compound_coef, num_classes=len(obj_list))
    model.load_state_dict(torch.load(trained_weights))

    model.requires_grad_(False)
    model.eval()

    if use_cuda:
        model = model.cuda()
    if use_float16:
        model = model.half()

    # function for display

    # Box
    regressBoxes = BBoxTransform()
    clipBoxes = ClipBoxes()

    # Video capture
    cap = cv2.VideoCapture(video_src)
    length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

    writer = None
    # try to determine the total number of frames in the video file
    try:
        prop = cv2.cv.CV_CAP_PROP_FRAME_COUNT if imutils.is_cv2() \
            else cv2.CAP_PROP_FRAME_COUNT
        total = int(vs.get(prop))
        print("[INFO] {} total frames in video".format(total))

    # an error occurred while trying to determine the total
    # number of frames in the video file
    except:
        print("[INFO] could not determine # of frames in video")
        total = -1

    path_out = os.path.join(os.path.dirname(
        os.path.abspath(__file__)), 'outvideo')

    path_result = r"D:\GitHub\Detection\server\AImodel\videotest\default.mp4"
    path_asset = r"D:\GitHub\Detection\client\src\assets"
    for i in range(0, length):
        ret, frame = cap.read()
        if not ret:
            break

        # frame preprocessing
        ori_imgs, framed_imgs, framed_metas = preprocess_video(
            frame, max_size=input_size)

        if use_cuda:
            x = torch.stack([torch.from_numpy(fi).cuda()
                             for fi in framed_imgs], 0)
        else:
            x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0)

        x = x.to(torch.float32 if not use_float16 else torch.float16).permute(
            0, 3, 1, 2)

        # model predict
        with torch.no_grad():
            features, regression, classification, anchors = model(x)

            out = postprocess(x,
                              anchors, regression, classification,
                              regressBoxes, clipBoxes,
                              threshold, iou_threshold)

        # result
        out = invert_affine(framed_metas, out)
        img_show = display(out, ori_imgs, obj_list)

        if writer is None:

            # initialize our video writer
            fourcc = 0x00000021
            #fourcc = cv2.VideoWriter_fourcc(*'mp4v')
            if videoname is not None:
                path_result = os.path.join(path_out, f"{videoname}.mp4")
            else:
                path_result = os.path.join(path_out, "default.mp4")

            writer = cv2.VideoWriter(path_result, fourcc, 30, (img_show.shape[1], img_show.shape[0]), True)


        # write the output frame to disk
        writer.write(img_show)
        print("Processing data... " + str(round((i+1)/length, 3)*100) + " %")
        # show frame by frame
        #cv2.imshow('frame', img_show)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    print("[INFO] cleaning up...")

    writer.release()
    cap.release()
    cv2.destroyAllWindows()

    if videoname is not None:
        path_asset = os.path.join(path_asset, f"{videoname}.mp4")
    else:
        path_asset = os.path.join(path_asset, "default.mp4")
    copyfile(path_result, path_asset)
    return path_asset
Exemple #4
0
class EfficientDet(object):
    obj_list = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
                'train', 'truck', 'boat', 'traffic light',
                'fire hydrant', '', 'stop sign', 'parking meter', 'bench',
                'bird', 'cat', 'dog', 'horse', 'sheep',
                'cow', 'elephant', 'bear', 'zebra', 'giraffe', '', 'backpack',
                'umbrella', '', '', 'handbag', 'tie',
                'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
                'kite', 'baseball bat', 'baseball glove',
                'skateboard', 'surfboard', 'tennis racket', 'bottle', '',
                'wine glass', 'cup', 'fork', 'knife', 'spoon',
                'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli',
                'carrot', 'hot dog', 'pizza', 'donut',
                'cake', 'chair', 'couch', 'potted plant', 'bed', '',
                'dining table', '', '', 'toilet', '', 'tv',
                'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
                'microwave', 'oven', 'toaster', 'sink',
                'refrigerator', '', 'book', 'clock', 'vase', 'scissors',
                'teddy bear', 'hair drier', 'toothbrush']

    def __init__(self, weightfile, score_thresh,
                 nms_thresh, is_xywh=True, use_cuda=True, use_float16=False):
        print('Loading weights from %s... Done!' % (weightfile))

        # constants
        self.score_thresh = score_thresh
        self.nms_thresh = nms_thresh
        self.use_cuda = use_cuda
        self.is_xywh = is_xywh

        compound_coef = 0
        force_input_size = None  # set None to use default size

        self.use_float16 = False
        cudnn.fastest = True
        cudnn.benchmark = True

        # tf bilinear interpolation is different from any other's, just make do
        input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536]
        self.input_size = input_sizes[compound_coef] if \
            force_input_size is None else force_input_size

        # load model
        self.model = EfficientDetBackbone(compound_coef=compound_coef,
                                          num_classes=len(self.obj_list))
        # f'weights/efficientdet-d{compound_coef}.pth'
        self.model.load_state_dict(torch.load(weightfile))
        self.model.requires_grad_(False)
        self.model.eval()

        if self.use_cuda:
            self.model = self.model.cuda()
        if self.use_float16:
            self.model = self.model.half()

        # Box
        self.regressBoxes = BBoxTransform()
        self.clipBoxes = ClipBoxes()

    def __call__(self, imgs):
        # frame preprocessing
        _, framed_imgs, framed_metas = preprocess(imgs,
                                                  max_size=self.input_size)

        if self.use_cuda:
            x = torch.stack([torch.from_numpy(fi).cuda() for fi in framed_imgs], 0)
        else:
            x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0)

        dtype = torch.float32 if not self.use_float16 else torch.float16
        x = x.to(dtype).permute(0, 3, 1, 2)

        # model predict
        with torch.no_grad():
            features, regression, classification, anchors = self.model(x)

            out = postprocess(x,
                              anchors, regression, classification,
                              self.regressBoxes, self.clipBoxes,
                              self.score_thresh, self.nms_thresh)

        # result
        out = invert_affine(framed_metas, out)

        if len(out) == 0:
            return None, None, None

        rois = [o['rois'] for o in out]
        scores = [o['scores'] for o in out]
        class_ids = [o['class_ids'] for o in out]
        if self.is_xywh:
            return xyxy_to_xywh(rois), scores, class_ids
        else:
            return rois, scores, class_ids
Exemple #5
0
class PTVisionService(PTServingBaseService):
    def __init__(self, model_name, model_path):
        # 调用父类构造方法
        super(PTVisionService, self).__init__(model_name, model_path)
        # 调用自定义函数加载模型
        checkpoint_file = model_path
        params = yaml.safe_load(
            open(f'/home/mind/model/projects/{cfg.project}.yml'))
        self.model = EfficientDetBackbone(
            compound_coef=cfg.compound_coef,
            num_classes=len(cfg.category),
            ratios=eval(params['anchors_ratios']),
            scales=eval(params['anchors_scales']))
        self.model.load_state_dict(
            torch.load(checkpoint_file, map_location=torch.device('cpu')))
        self.model.requires_grad_(False)
        self.model.eval()
        # self.input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536]
        self.input_sizes = [512, 896, 768, 896, 1024, 1280, 1280, 1536]
        self.class_dict = dict([val, key] for key, val in cfg.category.items())

    def _preprocess(self, data):
        # https两种请求形式
        # 1. form-data文件格式的请求对应:data = {"请求key值":{"文件名":<文件io>}}
        # 2. json格式对应:data = json.loads("接口传入的json体")
        imgs_path = []
        for k, v in data.items():
            for file_name, file_content in v.items():
                imgs_path.append(file_content)

        return imgs_path

    def _inference(self, imgs_path):
        results = []
        regressBoxes = BBoxTransform()
        clipBoxes = ClipBoxes()
        for img_path in imgs_path:
            ori_imgs, framed_imgs, framed_metas = preprocess(
                [img_path], max_size=self.input_sizes[cfg.compound_coef])
            x = torch.from_numpy(framed_imgs[0]).float()
            x = x.unsqueeze(0).permute(0, 3, 1, 2)

            features, regression, classification, anchors = self.model(x)
            preds = self._my_postprocess(x, anchors, regression,
                                         classification, regressBoxes,
                                         clipBoxes, cfg.threshold,
                                         cfg.nms_threshold)

            preds = invert_affine(framed_metas, preds)[0]
            scores = preds['scores']
            class_ids = preds['class_ids']
            rois = preds['rois']
            image_result = {
                'detection_classes': [],
                'detection_boxes': [],
                'detection_scores': []
            }
            if rois.shape[0] > 0:
                bbox_score = scores

                for roi_id in range(rois.shape[0]):
                    score = float(bbox_score[roi_id])
                    label = int(class_ids[roi_id])
                    box = rois[roi_id, :]
                    image_result['detection_classes'].append(
                        self.class_dict[label + 1])
                    image_result['detection_boxes'].append(box.tolist())
                    image_result['detection_scores'].append(score)

            results.append(image_result)

        return results

    def _postprocess(self, data):
        if len(data) == 1:
            return data[0]
        else:
            return data

    def _my_postprocess(self, x, anchors, regression, classification,
                        regressBoxes, clipBoxes, threshold, iou_threshold):
        transformed_anchors = regressBoxes(anchors, regression)
        transformed_anchors = clipBoxes(transformed_anchors, x)
        scores = torch.max(classification, dim=2, keepdim=True)[0]
        scores_over_thresh = (scores > threshold)[:, :, 0]
        out = []
        for i in range(x.shape[0]):
            if scores_over_thresh[i].sum() == 0:
                out.append({
                    'rois': np.array(()),
                    'class_ids': np.array(()),
                    'scores': np.array(()),
                })
                continue

            classification_per = classification[i, scores_over_thresh[i, :],
                                                ...].permute(1, 0)
            transformed_anchors_per = transformed_anchors[
                i, scores_over_thresh[i, :], ...]
            scores_per = scores[i, scores_over_thresh[i, :], ...]
            scores_, classes_ = classification_per.max(dim=0)
            anchors_nms_idx = batched_nms(transformed_anchors_per,
                                          scores_per[:, 0],
                                          classes_,
                                          iou_threshold=iou_threshold)

            if anchors_nms_idx.shape[0] != 0:
                classes_ = classes_[anchors_nms_idx]
                scores_ = scores_[anchors_nms_idx]
                boxes_ = transformed_anchors_per[anchors_nms_idx, :]
                boxes_ = boxes_[:, [1, 0, 3, 2]]

                out.append({
                    'rois': boxes_.numpy(),
                    'class_ids': classes_.numpy(),
                    'scores': scores_.numpy(),
                })
            else:
                out.append({
                    'rois': np.array(()),
                    'class_ids': np.array(()),
                    'scores': np.array(()),
                })

        return out
Exemple #6
0
def main(i):
    compound_coef = i
    force_input_size = None  # set None to use default size

    # replace this part with your project's anchor config
    anchor_ratios = [(1.0, 1.0), (1.4, 0.7), (0.7, 1.4)]
    anchor_scales = [2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)]

    threshold = 0.2
    iou_threshold = 0.2

    use_cuda = True
    use_float16 = False
    cudnn.fastest = True
    cudnn.benchmark = True

    obj_list = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
                'fire hydrant', '', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep',
                'cow', 'elephant', 'bear', 'zebra', 'giraffe', '', 'backpack', 'umbrella', '', '', 'handbag', 'tie',
                'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove',
                'skateboard', 'surfboard', 'tennis racket', 'bottle', '', 'wine glass', 'cup', 'fork', 'knife', 'spoon',
                'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut',
                'cake', 'chair', 'couch', 'potted plant', 'bed', '', 'dining table', '', '', 'toilet', '', 'tv',
                'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink',
                'refrigerator', '', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier',
                'toothbrush']

    out_dict = dict()
    input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536, 1536]
    input_size = input_sizes[compound_coef] if force_input_size is None else force_input_size

    model = EfficientDetBackbone(compound_coef=compound_coef, num_classes=len(obj_list),
                                    ratios=anchor_ratios, scales=anchor_scales)
    model.load_state_dict(torch.load(f'weights/efficientdet-d{compound_coef}.pth', map_location='cpu'))
    model.requires_grad_(False)
    model.eval()

    if use_cuda:
        model = model.cuda()

    base_dir = '/data/jiashenc/jackson/'

    print('Processing Det-' + str(i))

    for k in range(1000000, 1100000):
        if k % 1000 == 0:
            print('    Finish {} frames'.format(k + 1))
            
        img_path = os.path.join(base_dir, 'frame{}.jpg'.format(k))
        ori_imgs, framed_imgs, framed_metas = preprocess(img_path, max_size=input_size)

        if use_cuda:
            x = torch.stack([torch.from_numpy(fi).cuda() for fi in framed_imgs], 0)
        else:
            x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0)

        x = x.to(torch.float32 if not use_float16 else torch.float16).permute(0, 3, 1, 2)

        with torch.no_grad():
            features, regression, classification, anchors = model(x)

            regressBoxes = BBoxTransform()
            clipBoxes = ClipBoxes()

            out = postprocess(x,
                              anchors, regression, classification,
                              regressBoxes, clipBoxes,
                              threshold, iou_threshold)


        out = invert_affine(framed_metas, out)
        to_json(out, out_dict)

    with open(os.path.join(base_dir, '10', 'res-{:d}.json'.format(i)), 'w') as f:
        json.dump(out_dict, f)
        out_dict = dict()
def batch_inference(args):
    input_size = input_sizes[args.compound_coef]
    model = EfficientDetBackbone(compound_coef=args.compound_coef,
                                 num_classes=len(obj_list),
                                 ratios=anchor_ratios,
                                 scales=anchor_scales)

    # load pth file
    model.load_state_dict(torch.load(args.pth, map_location='cpu'))
    model.requires_grad_(False)
    model.eval()

    if args.use_cuda:
        model = model.cuda(device=args.device)

    path = args.file_list
    imgpath = args.img_path
    content = []
    with open(path, 'r') as f_in:
        lines = f_in.readlines()
        for idx in range(len(lines)):
            line = lines[idx]
            line = line.strip().split(' ')
            content.append(line[0])

    for i in tqdm(range(len(content)), ncols=88):
        filebasename = content[i]
        img_path = os.path.join(imgpath, filebasename + '.jpg')
        try:
            ori_imgs, framed_imgs, framed_metas = eval_preprocess(
                img_path, max_size=input_size)
        except:
            f'{img_path.split("/")[-1]} is not in {args.img_path}'

        if args.use_cuda:
            x = torch.stack(
                [torch.from_numpy(fi).cuda() for fi in framed_imgs], 0)
        else:
            x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0)

        x = x.to(torch.float32).permute(0, 3, 1, 2)

        with torch.no_grad():
            features, regression, classification, anchors = model(x)

            regressBoxes = Rotation_BBoxTransform()
            clipBoxes = ClipBoxes()
            addBoxes = BBoxAddScores()

            out = postprocess(x, anchors, regression, classification,
                              regressBoxes, clipBoxes, addBoxes,
                              args.score_threshold, args.iou_threshold)
        out = invert_affine(framed_metas, out)
        file_name = ['Task1_large-vehicle.txt', 'Task1_small-vehicle.txt']
        rois = out[0]['rois']
        class_ids = out[0]['class_ids']
        scores = out[0]['scores']

        filecontent = []
        for ii in range(len(scores)):
            xmin, ymin, xmax, ymax, theta = rois[ii]
            rect = OPENCV2xywh([xmin, ymin, xmax, ymax, theta])[0].tolist()
            x1, y1 = float(rect[0][0]), float(rect[0][1])
            x2, y2 = float(rect[1][0]), float(rect[1][1])
            x3, y3 = float(rect[2][0]), float(rect[2][1])
            x4, y4 = float(rect[3][0]), float(rect[3][1])
            single_filecontent = [
                int(class_ids[ii]), filebasename,
                float(scores[ii]), x1, y1, x2, y2, x3, y3, x4, y4
            ]
            filecontent.append(single_filecontent)

        write_into_txt(file_name, filecontent)
def efficientDet_video_inference(video_src,compound_coef = 0,force_input_size=None,
                                 frame_skipping = 3,
                                 threshold=0.2,out_path=None,imshow=False,
                                 display_fps=False):

    #deep-sort variables

    # Definition of the parameters
    max_cosine_distance = 0.3
    nn_budget = None
    nms_max_overlap = 1.0


    model_filename = '/home/shaheryar/Desktop/Projects/Football-Monitoring/deep_sort/model_weights/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
    metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget)
    tracker = Tracker(metric,n_init=5)

    # efficientDet-pytorch variables
    iou_threshold = 0.4
    use_cuda = True
    use_float16 = False
    cudnn.fastest = True
    cudnn.benchmark = True

    input_size = input_sizes[compound_coef] if force_input_size is None else force_input_size

    # load model
    model = EfficientDetBackbone(compound_coef=compound_coef, num_classes=len(obj_list))
    model.load_state_dict(torch.load(f'weights/efficientdet-d{compound_coef}.pth'))
    model.requires_grad_(False)
    model.eval()

    if use_cuda:
        model = model.cuda()
    if use_float16:
        model = model.half()

    regressBoxes = BBoxTransform()
    clipBoxes = ClipBoxes()

    # Video capture
    cap = cv2.VideoCapture(video_src)
    frame_width = int(cap.get(3))
    frame_height = int(cap.get(4))
    fourcc = cv2.VideoWriter_fourcc(*'MPEG')
    fps = cap.get(cv2.CAP_PROP_FPS)
    print("Video fps",fps)
    if(out_path is not None):
        outp = cv2.VideoWriter(out_path, fourcc, fps, (frame_width, frame_height))
    i=0
    start= time.time()
    current_frame_fps=0
    while True:

        ret, frame = cap.read()

        if not ret:
            break
        t1=time.time()
        if (frame_skipping==0 or i%frame_skipping==0):
        # if(True):


            # frame preprocessing (running detections)
            ori_imgs, framed_imgs, framed_metas, t1 = preprocess_video(frame, width=input_size, height=input_size)
            if use_cuda:
                x = torch.stack([fi.cuda() for fi in framed_imgs], 0)
            else:
                x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0)
            # model predict
            t1=time.time()
            with torch.no_grad():
                features, regression, classification, anchors = model(x)

                out = postprocess(x,
                                  anchors, regression, classification,
                                  regressBoxes, clipBoxes,
                                  threshold, iou_threshold)
            # Post processing
            out = invert_affine(framed_metas, out)
            # decoding bbox ,object name and scores
            boxes,classes,scores =decode_predictions(out[0])
            org_boxes = boxes.copy()
            t2 = time.time() - t1

            # feature extraction for deep sort
            boxes = [convert_bbox_to_deep_sort_format(frame.shape, b) for b in boxes]

            features = encoder(frame,boxes)
            detections = [Detection(bbox, 1.0, feature) for bbox, feature in zip(boxes, features)]
            boxes = np.array([d.tlwh for d in detections])
            # print(boxes)
            scores = np.array([d.confidence for d in detections])
            indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores)
            detections = [detections[i] for i in indices]
            tracker.predict()
            tracker.update(detections)



        i = i + 1
        img_show=frame.copy()
        for j in range(len(org_boxes)):
            img_show =drawBoxes(img_show,org_boxes[j],(255,255,0),str(tracker.tracks[j].track_id))

        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            bbox = track.to_tlbr()
            x1=int(bbox[0])
            y1 = int(bbox[1])
            x2 = int(bbox[2])
            y2=int(bbox[3])
            roi= frame[y1:y2,x1:x2]
            cv2.rectangle(img_show, (x1, y1), (x2, y2), update_color_association(roi, track.track_id), 2)
            cv2.putText(img_show, str(track.track_id), (x1, y1), 0, 5e-3 * 100, (255, 255, 0), 1)


        if display_fps:
            current_frame_fps=1/t2
        else:
            current_frame_fps=0

        cv2.putText(img_show, 'FPS: {0:.2f}'.format(current_frame_fps), (30, 50), cv2.FONT_HERSHEY_SIMPLEX, 1,
                    (255, 255, 0),
                    2, cv2.LINE_AA)
        if (i % int(fps) == 0):
            print("Processed ", str(int(i / fps)), "seconds")
            print("Time taken",time.time()-start)
            # print(color_dict)

        if imshow:
            img_show=cv2.resize(img_show,(0,0),fx=0.75,fy=0.75)
            cv2.imshow('Frame',img_show)
            # Press Q on keyboard to  exit
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break
        if out_path is not None:
            outp.write(img_show)

    cap.release()
    outp.release()
Exemple #9
0
def getImageDetections(imagePath, weights, nms_threshold, confidenceParam, coefficient):
    """
    Runs the detections and returns all detection into a single structure.

    Parameters
    ----------
    imagePath : str
        Path to all images.
    weights : str
        path to the weights.
    nms_threshold : float
        non-maximum supression threshold.
    confidenceParam : float
        confidence score for the detections (everything above this threshold is considered a valid detection).
    coefficient : int
        coefficient of the current efficientdet model (from d1 to d7).

    Returns
    -------
    detectionsList : List
        return a list with all predicted bounding-boxes.

    """
    compound_coef = coefficient
    force_input_size = None  # set None to use default size
    img_path  = imagePath

    threshold = confidenceParam
    iou_threshold = nms_threshold

    use_cuda = True
    use_float16 = False
    cudnn.fastest = True
    cudnn.benchmark = True
    obj_list = ['class_name']

    # tf bilinear interpolation is different from any other's, just make do
    input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536]
    input_size = input_sizes[compound_coef] if force_input_size is None else force_input_size
    ori_imgs, framed_imgs, framed_metas = preprocess(img_path, max_size=input_size)

    if use_cuda:
        x = torch.stack([torch.from_numpy(fi).cuda() for fi in framed_imgs], 0)
    else:
        x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0)

    x = x.to(torch.float32 if not use_float16 else torch.float16).permute(0, 3, 1, 2)

    model = EfficientDetBackbone(compound_coef=compound_coef, num_classes=len(obj_list),
                                # replace this part with your project's anchor config
                                ratios=[(1.0, 1.0), (1.4, 0.7), (0.7, 1.4)],
                                scales=[2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)])

    model.load_state_dict(torch.load(rootDir+'logs/' + project + '/' + weights))
    model.requires_grad_(False)
    model.eval()

    if use_cuda:
        model = model.cuda()
    if use_float16:
        model = model.half()

    with torch.no_grad():
        features, regression, classification, anchors = model(x)

        regressBoxes = BBoxTransform()
        clipBoxes = ClipBoxes()

        out = postprocess(x,
                          anchors, regression, classification,
                          regressBoxes, clipBoxes,
                          threshold, iou_threshold)

    out = invert_affine(framed_metas, out)
     
    for i in range(len(ori_imgs)):
        if len(out[i]['rois']) == 0:
            continue
        detectionsList = []
        for j in range(len(out[i]['rois'])):
            (x1, y1, x2, y2) = out[i]['rois'][j].astype(np.int)
            detectionsList.append((float(out[i]['scores'][j]), x1, y1, x2, y2))
        return detectionsList
    def infer(self, image):
        img = np.array(image)
        img = img[:, :, ::-1]  #rgb 2 bgr
        anchor_ratios = [(1.0, 1.0), (1.4, 0.7), (0.7, 1.4)]
        anchor_scales = [2**0, 2**(1.0 / 3.0), 2**(2.0 / 3.0)]

        threshold = 0.25
        iou_threshold = 0.25

        force_input_size = None
        use_cuda = False
        use_float16 = False
        cudnn.fastest = False
        cudnn.benchmark = False

        input_size = 512
        ori_imgs, framed_imgs, framed_metas = preprocess(img,
                                                         max_size=input_size)

        if use_cuda:
            x = torch.stack(
                [torch.from_numpy(fi).cuda() for fi in framed_imgs], 0)
        else:
            x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0)

        x = x.to(torch.float32 if not use_float16 else torch.float16).permute(
            0, 3, 1, 2)

        model = EfficientDetBackbone(compound_coef=0,
                                     num_classes=len(self.labels),
                                     ratios=anchor_ratios,
                                     scales=anchor_scales)

        model.load_state_dict(torch.load(self.path, map_location='cpu'))
        model.requires_grad_(False)
        model.eval()

        if use_cuda:
            model = model.cuda()
        if use_float16:
            model = model.half()

        with torch.no_grad():
            features, regression, classification, anchors = model(x)

            regressBoxes = BBoxTransform()
            clipBoxes = ClipBoxes()

            out = postprocess(x, anchors, regression, classification,
                              regressBoxes, clipBoxes, threshold,
                              iou_threshold)

        pred = invert_affine(framed_metas, out)

        results = []

        for i in range(len(ori_imgs)):
            if len(pred[i]['rois']) == 0:
                continue

            ori_imgs[i] = ori_imgs[i].copy()

            for j in range(len(pred[i]['rois'])):
                xt1, yt1, xbr, ybr = pred[i]['rois'][j].astype(np.float64)
                xt1 = float(xt1)
                yt1 = float(yt1)
                xbr = float(xbr)
                yb4 = float(ybr)
                obj = str(pred[i]['class_ids'][j])
                obj_label = self.labels.get(obj)
                obj_score = str(pred[i]['scores'][j])
                results.append({
                    "confidence": str(obj_score),
                    "label": obj_label,
                    "points": [xt1, yt1, xbr, ybr],
                    "type": "rectangle",
                })

        return results
Exemple #11
0
class ObjectDetectionService(PTServingBaseService):
    def __init__(self, model_name, model_path):
        # effdet
        self.model_name = model_name
        self.model_path = os.path.join(os.path.dirname(__file__),
                                       'models_best.pth')
        self.input_image_key = 'images'
        self.anchor_ratios = [(1.0, 1.0), (1.4, 0.7), (0.7, 1.4)]
        self.anchor_scales = [2**0, 2**(1.0 / 3.0), 2**(2.0 / 3.0)]
        self.compound_coef = 0
        self.threshold = 0.2
        self.iou_threshold = 0.2
        self.obj_list = [
            '一次性快餐盒', '书籍纸张', '充电宝', '剩饭剩菜', '包', '垃圾桶', '塑料器皿', '塑料玩具',
            '塑料衣架', '大骨头', '干电池', '快递纸袋', '插头电线', '旧衣服', '易拉罐', '枕头', '果皮果肉',
            '毛绒玩具', '污损塑料', '污损用纸', '洗护用品', '烟蒂', '牙签', '玻璃器皿', '砧板', '筷子',
            '纸盒纸箱', '花盆', '茶叶渣', '菜帮菜叶', '蛋壳', '调料瓶', '软膏', '过期药物', '酒瓶',
            '金属厨具', '金属器皿', '金属食品罐', '锅', '陶瓷器皿', '鞋', '食用油桶', '饮料瓶', '鱼骨'
        ]
        self.input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536]
        self.input_size = self.input_sizes[self.compound_coef]

        self.model = EfficientDetBackbone(compound_coef=self.compound_coef,
                                          num_classes=len(self.obj_list),
                                          ratios=self.anchor_ratios,
                                          scales=self.anchor_scales)
        self.model.load_state_dict(
            torch.load(self.model_path, map_location='cpu'))
        self.model.requires_grad_(False)
        self.model.eval()

    def _preprocess(self, data):
        preprocessed_data = {}
        for k, v in data.items():
            for file_name, file_content in v.items():
                ori_imgs, framed_imgs, framed_metas = preprocess(
                    file_content, max_size=self.input_size)
                preprocessed_data[k] = [framed_imgs, framed_metas]
        return preprocessed_data

    def _inference(self, data):
        """
        model inference function
        Here are a inference example of resnet, if you use another model, please modify this function
        """
        framed_imgs, framed_metas = data[self.input_image_key]
        if torch.cuda.is_available():
            x = torch.stack(
                [torch.from_numpy(fi).cuda() for fi in framed_imgs], 0)
            self.model = self.model.cuda()
        else:
            x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0)

        x = x.to(torch.float32).permute(0, 3, 1, 2)

        #if use_float16:
        #    model = model.half()

        with torch.no_grad():
            features, regression, classification, anchors = self.model(x)

            regressBoxes = BBoxTransform()
            clipBoxes = ClipBoxes()

            out = postprocess(x, anchors, regression, classification,
                              regressBoxes, clipBoxes, self.threshold,
                              self.iou_threshold)

        out = invert_affine(framed_metas, out)
        result = OrderedDict()
        result['detection_classes'] = []
        result['detection_scores'] = []
        result['detection_boxes'] = []

        for i in range(len(out)):
            if len(out[i]['rois']) == 0:
                continue
            for j in range(len(out[i]['rois'])):
                x1, y1, x2, y2 = out[i]['rois'][j].astype(np.int)
                result['detection_boxes'].append([x1, y1, x2, y2])
                obj = self.obj_list[out[i]['class_ids'][j]]
                result['detection_classes'].append(obj)
                score = float(out[i]['scores'][j])
                result['detection_scores'].append(score)

        return result

    def _postprocess(self, data):
        return data

    def inference(self, data):
        '''
        Wrapper function to run preprocess, inference and postprocess functions.

        Parameters
        ----------
        data : map of object
            Raw input from request.

        Returns
        -------
        list of outputs to be sent back to client.
            data to be sent back
        '''
        pre_start_time = time.time()
        data = self._preprocess(data)
        infer_start_time = time.time()
        # Update preprocess latency metric
        pre_time_in_ms = (infer_start_time - pre_start_time) * 1000
        logger.info('preprocess time: ' + str(pre_time_in_ms) + 'ms')

        if self.model_name + '_LatencyPreprocess' in MetricsManager.metrics:
            MetricsManager.metrics[self.model_name +
                                   '_LatencyPreprocess'].update(pre_time_in_ms)

        data = self._inference(data)
        infer_end_time = time.time()
        infer_in_ms = (infer_end_time - infer_start_time) * 1000

        logger.info('infer time: ' + str(infer_in_ms) + 'ms')
        data = self._postprocess(data)

        # Update inference latency metric
        post_time_in_ms = (time.time() - infer_end_time) * 1000
        logger.info('postprocess time: ' + str(post_time_in_ms) + 'ms')
        if self.model_name + '_LatencyInference' in MetricsManager.metrics:
            MetricsManager.metrics[self.model_name +
                                   '_LatencyInference'].update(post_time_in_ms)

        # Update overall latency metric
        if self.model_name + '_LatencyOverall' in MetricsManager.metrics:
            MetricsManager.metrics[self.model_name +
                                   '_LatencyOverall'].update(pre_time_in_ms +
                                                             post_time_in_ms)

        logger.info('latency: ' +
                    str(pre_time_in_ms + infer_in_ms + post_time_in_ms) + 'ms')
        data['latency_time'] = str(
            round(pre_time_in_ms + infer_in_ms + post_time_in_ms, 1)) + ' ms'
        return data
def EfficientDetNode():
    rospy.init_node('efficient_det_node', anonymous=True)
    rospy.Subscriber('input', String, image_callback, queue_size=1)
    pub = rospy.Publisher('/image_detections', Detection2DArray, queue_size=10)
    rate = rospy.Rate(1)  # 10hz

    path_list = os.listdir(path)
    path_list.sort(key=lambda x: int(x.split('.')[0]))

    stamp_file = open(stamp_path)
    stamp_lines = stamp_file.readlines()
    stamp_i = 0

    for filename in path_list:
        img_path = filename
        cur_frame = img_path[:-4]
        img_path = path + "/" + img_path

        cur_stamp = ((float)(stamp_lines[stamp_i][-13:].strip('\n')))
        # cur_stamp = rospy.Time.from_sec(
        #     ((float)(stamp_lines[stamp_i][-13:].strip('\n'))))
        stamp_i += 1

        detection_results = Detection2DArray()

        # tf bilinear interpolation is different from any other's, just make do
        input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536, 1536]
        input_size = input_sizes[
            compound_coef] if force_input_size is None else force_input_size
        ori_imgs, framed_imgs, framed_metas = preprocess(img_path,
                                                         max_size=input_size)

        if use_cuda:
            x = torch.stack(
                [torch.from_numpy(fi).cuda() for fi in framed_imgs], 0)
        else:
            x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0)

        x = x.to(torch.float32 if not use_float16 else torch.float16).permute(
            0, 3, 1, 2)

        model = EfficientDetBackbone(compound_coef=compound_coef,
                                     num_classes=len(obj_list),
                                     ratios=anchor_ratios,
                                     scales=anchor_scales)
        model.load_state_dict(
            torch.load(f'weights/efficientdet-d{compound_coef}.pth',
                       map_location='cpu'))
        model.requires_grad_(False)
        model.eval()

        if use_cuda:
            model = model.cuda()
        if use_float16:
            model = model.half()

        with torch.no_grad():
            features, regression, classification, anchors = model(x)

            regressBoxes = BBoxTransform()
            clipBoxes = ClipBoxes()

            out = postprocess(x, anchors, regression, classification,
                              regressBoxes, clipBoxes, threshold,
                              iou_threshold)

        out = invert_affine(framed_metas, out)

        display(cur_frame, out, ori_imgs, imshow=False, imwrite=True)

        for i in range(len(out)):
            for j in range(len(out[i]['rois'])):
                x1, y1, x2, y2 = out[i]['rois'][j].astype(np.int)
                obj = obj_list[out[i]['class_ids'][j]]
                score = float(out[i]['scores'][j])

                result = ObjectHypothesisWithPose()
                result.score = score
                if (obj == 'car'):
                    result.id = 0
                if (obj == 'person'):
                    result.id = 1
                if (obj == 'cyclist'):
                    result.id = 2

                detection_msg = Detection2D()
                detection_msg.bbox.center.x = (x1 + x2) / 2
                detection_msg.bbox.center.y = (y1 + y2) / 2
                detection_msg.bbox.size_x = x2 - x1
                detection_msg.bbox.size_y = y2 - y1

                detection_msg.results.append(result)
                detection_results.detections.append(detection_msg)
                rospy.loginfo("%d: %lf", detection_msg.results[0].id,
                              detection_msg.results[0].score)

            detection_results.header.seq = cur_frame
            #detection_results.header.stamp = cur_stamp
            rospy.loginfo(detection_results.header.stamp)
            pub.publish(detection_results)

            if not os.path.exists(txt_path):
                os.makedirs(txt_path)
            #with open(f'txt/{cur_frame}.txt', 'w') as f:
            with open(f'{txt_path}/{cur_frame}.txt', 'w') as f:
                #f.write(str((float)(stamp_lines[stamp_i][-13:].strip('\n'))) + "\n")
                f.write(str(cur_stamp) + "\n")
                for detection in detection_results.detections:
                    f.write(str(detection.bbox.center.x) + " ")
                    f.write(str(detection.bbox.center.y) + " ")
                    f.write(str(detection.bbox.size_x) + " ")
                    f.write(str(detection.bbox.size_y) + " ")
                    f.write(str(detection.results[0].id) + " ")
                    f.write(str(detection.results[0].score) + "\n")
            f.close()

            rate.sleep()

        print('running speed test...')
        with torch.no_grad():
            print('test1: model inferring and postprocessing')
            print('inferring image for 10 times...')
            t1 = time.time()
            for _ in range(10):
                _, regression, classification, anchors = model(x)

                out = postprocess(x, anchors, regression, classification,
                                  regressBoxes, clipBoxes, threshold,
                                  iou_threshold)
                out = invert_affine(framed_metas, out)

            t2 = time.time()
            tact_time = (t2 - t1) / 10
            print(f'{tact_time} seconds, {1 / tact_time} FPS, @batch_size 1')
def test(opt):
    compound_coef = 2
    force_input_size = None  # set None to use default size
    img_id = opt.img_id
    img_path = opt.img_path
    img_path = img_path + str(img_id) + '.jpg'

    # replace this part with your project's anchor config
    anchor_ratios = [(1.0, 1.0), (1.4, 0.7), (0.7, 1.4)]
    anchor_scales = [2**0, 2**(1.0 / 3.0), 2**(2.0 / 3.0)]

    threshold = 0.2
    iou_threshold = 0.2

    use_cuda = True
    use_float16 = False
    cudnn.fastest = True
    cudnn.benchmark = True

    obj_list = ['02010001', '02010002']

    color_list = standard_to_bgr(STANDARD_COLORS)
    input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536, 1536]
    input_size = input_sizes[
        compound_coef] if force_input_size is None else force_input_size
    ori_imgs, framed_imgs, framed_metas = preprocess(img_path,
                                                     max_size=input_size)

    if use_cuda:
        x = torch.stack([torch.from_numpy(fi).cuda() for fi in framed_imgs], 0)
    else:
        x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0)

    x = x.to(torch.float32 if not use_float16 else torch.float16).permute(
        0, 3, 1, 2)

    model = EfficientDetBackbone(compound_coef=compound_coef,
                                 num_classes=len(obj_list),
                                 ratios=anchor_ratios,
                                 scales=anchor_scales)
    model.load_state_dict(torch.load(opt.weights, map_location='cpu'))
    model.requires_grad_(False)
    model.eval()

    if use_cuda:
        model = model.cuda()
    if use_float16:
        model = model.half()

    with torch.no_grad():
        features, regression, classification, anchors = model(x)
        regressBoxes = BBoxTransform()
        clipBoxes = ClipBoxes()

        out = postprocess(x, anchors, regression, classification, regressBoxes,
                          clipBoxes, threshold, iou_threshold)

    def display(preds, imgs, imshow=True, imwrite=False, img_id=1):
        for i in range(len(imgs)):
            if len(preds[i]['rois']) == 0:
                continue

            imgs[i] = imgs[i].copy()
            imgs[i] = cv2.cvtColor(imgs[i], cv2.COLOR_BGR2RGB)

            for j in range(len(preds[i]['rois'])):
                x1, y1, x2, y2 = preds[i]['rois'][j].astype(np.int)
                obj = obj_list[preds[i]['class_ids'][j]]
                score = float(preds[i]['scores'][j])
                plot_one_box(imgs[i], [x1, y1, x2, y2],
                             label=obj,
                             score=score,
                             color=color_list[get_index_label(obj, obj_list)])

            if imshow:
                cv2.imshow('img', imgs[i])
                cv2.waitKey(0)

            if imwrite:

                str1 = 'test/' + str(img_id) + '.jpg'
                cv2.imwrite(str1, imgs[i])

    out = invert_affine(framed_metas, out)
    display(out, ori_imgs, imshow=False, imwrite=True, img_id=img_id)

    print('running speed test...')
    with torch.no_grad():
        print('test1: model inferring and postprocessing')
        print('inferring image for 10 times...')
        t1 = time.time()
        for _ in range(10):
            _, regression, classification, anchors = model(x)
            out = postprocess(x, anchors, regression, classification,
                              regressBoxes, clipBoxes, threshold,
                              iou_threshold)
            out = invert_affine(framed_metas, out)
        tempList = []
        for j in range(len(out[0]['class_ids'])):
            tempout = {}
            tempout['image_id'] = img_id
            if out[0]['class_ids'][j] == 1:
                tempout['category_id'] = 2
            else:
                tempout['category_id'] = 1
            tempout['score'] = out[0]['scores'][j].astype(np.float64)
            tempout['bbox'] = [
                (out[0]['rois'][j][0]).astype(np.float64),
                (out[0]['rois'][j][1]).astype(np.float64),
                (out[0]['rois'][j][2]).astype(np.float64) -
                (out[0]['rois'][j][0]).astype(np.float64),
                (out[0]['rois'][j][3]).astype(np.float64) -
                (out[0]['rois'][j][1]).astype(np.float64),
            ]
            tempList.append(tempout)
        t2 = time.time()
        tact_time = (t2 - t1) / 10
        print(f'{tact_time} seconds, {1 / tact_time} FPS, @batch_size 1')
        with open("test/" + str(img_id) + ".json", "w") as f:
            json.dump(tempList, f)
        print("生成标注后的图片(" + str(img_id) + ".jpg)和json(" + str(img_id) +
              ".json)到test文件夹中...")
Exemple #14
0
model_1.load_state_dict(
    torch.load(
        f'/data/efdet/logs/{project1}/weights/{save_time1}/efficientdet-d{compound_coef}_{number}.pth',
        map_location='cpu'))

# model 2
model_2 = EfficientDetBackbone(compound_coef=compound_coef,
                               num_classes=len(obj_list_2),
                               ratios=anchor_ratios,
                               scales=anchor_scales)
model_2.load_state_dict(
    torch.load(
        f'/data/efdet/logs/{project}/crop/weights/{save_time2}/efficientdet-d{compound_coef}_{number}.pth',
        map_location='cpu'))

model_1.requires_grad_(False)
model_1.eval()

model_2.requires_grad_(False)
model_2.eval()

if use_cuda:
    model_1 = model_1.cuda()
    model_2 = model_2.cuda()
if use_float16:
    model_1 = model_1.half()
    model_2 = model_2.half()


def display(out_1, out_2, imgs, imshow=True, showtime=0, imwrite=False):
    # if len(preds[i]['rois']) == 0:                    # if model dosen't detect object, not show image
Exemple #15
0
def effdet_detection(content, effdet):

    video_src = 0  # set int to use webcam, set str to read from a video file

    compound_coef = 0
    force_input_size = None  # set None to use default size

    threshold = 0.5
    iou_threshold = 0.2

    use_cuda = True
    use_float16 = False
    cudnn.fastest = True
    cudnn.benchmark = True

    obj_list = [
        'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train',
        'truck', 'boat', 'traffic light', 'fire hydrant', '', 'stop sign',
        'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep',
        'cow', 'elephant', 'bear', 'zebra', 'giraffe', '', 'backpack',
        'umbrella', '', '', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis',
        'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove',
        'skateboard', 'surfboard', 'tennis racket', 'bottle', '', 'wine glass',
        'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich',
        'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake',
        'chair', 'couch', 'potted plant', 'bed', '', 'dining table', '', '',
        'toilet', '', 'tv', 'laptop', 'mouse', 'remote', 'keyboard',
        'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator',
        '', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier',
        'toothbrush'
    ]

    # tf bilinear interpolation is different from any other's, just make do
    input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536]
    input_size = input_sizes[
        compound_coef] if force_input_size is None else force_input_size

    # load model
    model = EfficientDetBackbone(compound_coef=compound_coef,
                                 num_classes=len(obj_list))
    model.load_state_dict(
        torch.load(f'weights/efficientdet-d{compound_coef}.pth'))
    model.requires_grad_(False)
    model.eval()

    if use_cuda:
        model = model.cuda()
    if use_float16:
        model = model.half()

    # function for display
    def display(preds, imgs, content, effdet):
        for i in range(len(imgs)):
            if len(preds[i]['rois']) == 0:
                return imgs[i]

            for j in range(len(preds[i]['rois'])):
                (x1, y1, x2, y2) = preds[i]['rois'][j].astype(np.int)
                #cv2.rectangle(imgs[i], (x1, y1), (x2, y2), (255, 255, 0), 2)
                obj = obj_list[preds[i]['class_ids'][j]]
                score = float(preds[i]['scores'][j])

                if obj == content:
                    effdet.send_message_to_scratch(
                        (x1 + x2) * 0.5 * 0.625 - 200)  #发送指定类别的识别框位置到scratch
                    print((x1 + x2) * 0.5 * 0.625 - 200)
                    cv2.rectangle(imgs[i], (x1, y1), (x2, y2), (255, 255, 0),
                                  2)

                    cv2.putText(imgs[i], '{}, {:.3f}'.format(obj, score),
                                (x1, y1 + 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5,
                                (255, 255, 0), 1)

            return imgs[i]

    # Box
    regressBoxes = BBoxTransform()
    clipBoxes = ClipBoxes()

    # Video capture
    cap = cv2.VideoCapture(video_src)

    while True:
        ret, frame = cap.read()
        if not ret:
            break

        # frame preprocessing
        ori_imgs, framed_imgs, framed_metas = preprocess_video(
            frame, max_size=input_size)

        if use_cuda:
            x = torch.stack(
                [torch.from_numpy(fi).cuda() for fi in framed_imgs], 0)
        else:
            x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0)

        x = x.to(torch.float32 if not use_float16 else torch.float16).permute(
            0, 3, 1, 2)

        # model predict
        with torch.no_grad():
            features, regression, classification, anchors = model(x)

            out = postprocess(x, anchors, regression, classification,
                              regressBoxes, clipBoxes, threshold,
                              iou_threshold)

        # result
        out = invert_affine(framed_metas, out)
        img_show = display(out, ori_imgs, content, effdet)

        # show frame by frame
        cv2.imshow('frame', img_show)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()
    def __init__(self,
                 video_src: str,
                 video_output: str,
                 text_output: str,
                 obj_list: list,
                 input_sizes: list,
                 reid_cpkt: str,
                 compound_coef: int,
                 force_input_size=None,
                 threshold=0.2,
                 iou_threshold=0.2,
                 use_cuda=True,
                 use_float16=False,
                 cudnn_fastest=True,
                 cudnn_benchmark=True,

                 max_dist=0.2,
                 min_confidence=0.3,
                 nms_max_overlap=0.5,
                 max_iou_distance=0.7,
                 max_age=70,
                 n_init=3,
                 nn_budget=100,

                 selected_target=None):

        # I/O
        # Video's path
        self.video_src = video_src  # set int to use webcam, set str to read from a video file
        self.video_output = video_output  # output to the specific position
        # text path
        self.text_output = text_output  # output to the file with the csv format

        # DETECTOR
        self.compound_coef = compound_coef
        self.force_input_size = force_input_size  # set None to use default size

        self.threshold = threshold
        self.iou_threshold = iou_threshold

        self.use_cuda = use_cuda
        self.use_float16 = use_float16
        cudnn.fastest = cudnn_fastest
        cudnn.benchmark = cudnn_benchmark

        # coco_name
        self.obj_list = obj_list

        # input size
        self.input_sizes = input_sizes
        self.input_size = input_sizes[self.compound_coef] if force_input_size is None else force_input_size

        # load detector model
        model = EfficientDetBackbone(compound_coef=self.compound_coef, num_classes=len(obj_list))
        model.load_state_dict(torch.load(f'weights/efficientdet-d{self.compound_coef}.pth'))
        model.requires_grad_(False)
        model.eval()

        if self.use_cuda and torch.cuda.is_available():
            self.detector = model.cuda()
        if self.use_float16:
            self.detector = model.half()

        # TRACKER
        self.reid_cpkt = reid_cpkt
        self.max_dist = max_dist
        self.min_confidence = min_confidence
        self.nms_max_overlap = nms_max_overlap
        self.max_iou_distance = max_iou_distance
        self.max_age = max_age
        self.n_init = n_init
        self.nn_budget = nn_budget

        # load tracker model,
        self.trackers = []
        self.selected_target = selected_target
        for num in range(0, len(self.selected_target)):
            self.trackers.append(build_tracker(reid_cpkt,
                                               max_dist,
                                               min_confidence,
                                               nms_max_overlap,
                                               max_iou_distance,
                                               max_age,
                                               n_init,
                                               nn_budget,
                                               use_cuda))
        # video frames
        self.frame_id = 0