コード例 #1
0
def read_images():
    for filename in os.listdir(imgfile_path):
        ori_imgs, framed_imgs, framed_metas = preprocess(os.path.join(
            imgfile_path, filename),
                                                         max_size=input_size)
        if use_cuda:
            x = torch.stack(
                [torch.from_numpy(fi).cuda() for fi in framed_imgs], 0)
        else:
            x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0)

        x = x.to(torch.float32 if not use_float16 else torch.float16).permute(
            0, 3, 1, 2)

        model = EfficientDetBackbone(compound_coef=7,
                                     num_classes=len(obj_list),
                                     ratios=anchor_ratios,
                                     scales=anchor_scales)
        model.load_state_dict(
            torch.load(f'weights/efficientdet-d7/efficientdet-d7.pth')
        )  #place weight path here
        model.requires_grad_(False)
        model.eval()

        if use_cuda:
            model = model.cuda()
        if use_float16:
            model = model.half()

        with torch.no_grad():
            features, regression, classification, anchors = model(x)

            regressBoxes = BBoxTransform()
            clipBoxes = ClipBoxes()

            out = postprocess(x, anchors, regression, classification,
                              regressBoxes, clipBoxes, threshold,
                              iou_threshold)

        out = invert_affine(framed_metas, out)
        display(filename, out, ori_imgs, imshow=False, imwrite=True)

        print('running speed test...')
        with torch.no_grad():
            print('test1: model inferring and postprocessing')
            print('inferring image for 10 times...')
            t1 = time.time()
            for _ in range(10):
                _, regression, classification, anchors = model(x)

                out = postprocess(x, anchors, regression, classification,
                                  regressBoxes, clipBoxes, threshold,
                                  iou_threshold)
                out = invert_affine(framed_metas, out)

            t2 = time.time()
            tact_time = (t2 - t1) / 10
            print(f'{tact_time} seconds, {1 / tact_time} FPS, @batch_size 1')
コード例 #2
0
    def predict(self, img_path, threshold=0.5):
        self.system_dict["params"]["threshold"] = threshold
        ori_imgs, framed_imgs, framed_metas = preprocess(
            img_path, max_size=self.system_dict["local"]["input_size"])

        if self.system_dict["params"]["use_cuda"]:
            x = torch.stack(
                [torch.from_numpy(fi).cuda() for fi in framed_imgs], 0)
        else:
            x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0)

        x = x.to(torch.float32 if not self.system_dict["params"]["use_float16"]
                 else torch.float16).permute(0, 3, 1, 2)

        with torch.no_grad():
            features, regression, classification, anchors = self.system_dict[
                "local"]["model"](x)

            regressBoxes = BBoxTransform()
            clipBoxes = ClipBoxes()

            out = postprocess(x, anchors, regression, classification,
                              regressBoxes, clipBoxes,
                              self.system_dict["params"]["threshold"],
                              self.system_dict["params"]["iou_threshold"])

        out = invert_affine(framed_metas, out)
        scores, labels, bboxes = self.display(out,
                                              ori_imgs,
                                              imshow=False,
                                              imwrite=True)
        return scores, labels, bboxes
コード例 #3
0
def detect(image):
    # convert image to array
    frame = np.array(image)

    # convert to cv format
    frames = frame[:, :, ::-1]

    ori_imgs, framed_imgs, framed_metas = image_preprocess(frames,
                                                           max_size=input_size)

    if use_cuda:
        x = torch.stack([torch.from_numpy(fi).cuda() for fi in framed_imgs], 0)
    else:
        x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0)

    x = x.to(torch.float32 if not use_float16 else torch.float16).permute(
        0, 3, 1, 2)

    with torch.no_grad():
        features, regression, classification, anchors = model(x)

        regressBoxes = BBoxTransform()
        clipBoxes = ClipBoxes()

        out = postprocess(x, anchors, regression, classification, regressBoxes,
                          clipBoxes, threshold, iou_threshold)

        out = invert_affine(framed_metas, out)
        render_frame = display(out, frame, imshow=True, imwrite=False)
        return render_frame
コード例 #4
0
 def detect_image(self,
                  image_path,
                  use_cuda=False,
                  use_float16=False,
                  threshold=0.2,
                  iou_threshold=0.2):
     # replace this part with your project's anchor config
     max_size = self.input_sizes[self.compound_coef]
     anchor_ratios = [(1.0, 1.0), (1.4, 0.7), (0.7, 1.4)]
     anchor_scales = [2**0, 2**(1.0 / 3.0), 2**(2.0 / 3.0)]
     ori_imgs, framed_imgs, framed_metas = preprocess(image_path,
                                                      max_size=max_size)
     if use_cuda:
         x = torch.stack(
             [torch.from_numpy(fi).cuda() for fi in framed_imgs], 0)
     else:
         x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0)
     x = x.to(torch.float32 if not use_float16 else torch.float16).permute(
         0, 3, 1, 2)
     features, regression, classification, anchors = self.forward(x)
     regressBoxes = BBoxTransform()
     clipBoxes = ClipBoxes()
     out = postprocess(x, anchors, regression, classification, regressBoxes,
                       clipBoxes, threshold, iou_threshold)
     out = invert_affine(framed_metas, out)
     self.__save_image(out, ori_imgs, imwrite=True)
コード例 #5
0
    def __call__(self, imgs):
        # frame preprocessing
        _, framed_imgs, framed_metas = preprocess(imgs,
                                                  max_size=self.input_size)

        if self.use_cuda:
            x = torch.stack([torch.from_numpy(fi).cuda() for fi in framed_imgs], 0)
        else:
            x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0)

        dtype = torch.float32 if not self.use_float16 else torch.float16
        x = x.to(dtype).permute(0, 3, 1, 2)

        # model predict
        with torch.no_grad():
            features, regression, classification, anchors = self.model(x)

            out = postprocess(x,
                              anchors, regression, classification,
                              self.regressBoxes, self.clipBoxes,
                              self.score_thresh, self.nms_thresh)

        # result
        out = invert_affine(framed_metas, out)

        if len(out) == 0:
            return None, None, None

        rois = [o['rois'] for o in out]
        scores = [o['scores'] for o in out]
        class_ids = [o['class_ids'] for o in out]
        if self.is_xywh:
            return xyxy_to_xywh(rois), scores, class_ids
        else:
            return rois, scores, class_ids
コード例 #6
0
def main(img_path, base_name, checkpoint_path):
    ori_imgs, framed_imgs, framed_metas = preprocess(img_path, max_size=input_size)

    if use_cuda:
        x = torch.stack([torch.from_numpy(fi).cuda() for fi in framed_imgs], 0)
    else:
        x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0)

    x = x.to(torch.float32 if not use_float16 else torch.float16).permute(0, 3, 1, 2)

    model = EfficientDetBackbone(compound_coef=compound_coef, num_classes=len(obj_list),
                                ratios=anchor_ratios, scales=anchor_scales)
    # model.load_state_dict(torch.load(f'weights/efficientdet-d{compound_coef}.pth'))
    model.load_state_dict(torch.load(checkpoint_path))
    model.requires_grad_(False)
    model.eval()

    if use_cuda:
        model = model.cuda()
    if use_float16:
        model = model.half()

    with torch.no_grad():
        features, regression, classification, anchors = model(x)

        regressBoxes = BBoxTransform()
        clipBoxes = ClipBoxes()

        out = postprocess(x,
                        anchors, regression, classification,
                        regressBoxes, clipBoxes,
                        threshold, iou_threshold)

    out = invert_affine(framed_metas, out)
    display(out, ori_imgs, base_name,imshow=False, imwrite=True)
コード例 #7
0
def detect(img_path):
    #------------------preprocessing------------------------
    ori_imgs, framed_imgs, framed_metas = preprocess(
        img_path, max_size=input_size)  #input_size: 512

    x = torch.stack([torch.from_numpy(fi).cuda() for fi in framed_imgs], 0)

    x = x.to(torch.float32 if not use_float16 else torch.float16).permute(
        0, 3, 1, 2)

    with torch.no_grad():
        start = timeutil.get_epochtime_ms()
        t1 = time.time()

        features, regression, classification, anchors = model(x)

        regressBoxes = BBoxTransform()
        clipBoxes = ClipBoxes()

        out = postprocess(x, anchors, regression, classification, regressBoxes,
                          clipBoxes, threshold, iou_threshold)
    out = invert_affine(framed_metas, out)
    c1, c2 = display(out, ori_imgs, imshow=True, imwrite=False)
    t2 = time.time()
    tact_time = (t2 - t1) / 10
    print(f'{tact_time} seconds, {1 / tact_time} FPS, @batch_size 1')
    print('milisecond is ' + str(t2 - t1))
    print("Latency: %fms" % (timeutil.get_epochtime_ms() - start))

    return c1, c2
コード例 #8
0
def evaluate_coco(img_path, model, threshold=0.05):
    kag_res = ["image_id,PredictionString"]
    included_extensions = ['jpg', 'jpeg', 'bmp', 'png', 'gif']
    imgs_files = [os.path.join(img_path, fn) for fn in os.listdir(img_path)
                  if any(fn.endswith(ext) for ext in included_extensions)]
    regressBoxes = BBoxTransform()
    clipBoxes = ClipBoxes()

    for img_path in tqdm(imgs_files):
        ori_imgs, framed_imgs, framed_metas = preprocess(img_path, max_size=input_sizes[compound_coef])
        x = torch.from_numpy(framed_imgs[0])

        if use_cuda:
            x = x.cuda(gpu)
            if use_float16:
                x = x.half()
            else:
                x = x.float()
        else:
            x = x.float()

        x = x.unsqueeze(0).permute(0, 3, 1, 2)
        features, regression, classification, anchors = model(x)

        preds = postprocess(x,
                            anchors, regression, classification,
                            regressBoxes, clipBoxes,
                            threshold, nms_threshold)

        if not preds:
            continue

        preds = invert_affine(framed_metas, preds)[0]

        scores = preds['scores']
        rois = preds['rois']

        if rois.shape[0] > 0:
            # x1,y1,x2,y2 -> x1,y1,w,h
            rois[:, 2] -= rois[:, 0]
            rois[:, 3] -= rois[:, 1]

            kag_res.append(f"{os.path.basename(img_path).replace('.jpg', '')},{format_prediction_string(rois, scores)}")

    if not len(kag_res):
        raise Exception('the model does not provide any valid output, check model architecture and the data input')

    # write output
    filepath = f'/kaggle/working/submission.csv'
    if os.path.exists(filepath):
        os.remove(filepath)
    with open(filepath, "w") as f:
        for line in kag_res:
            f.write(line)
            f.write("\n")
コード例 #9
0
def single_img_test(img_path, input_size, model, use_cuda=True, use_float16=False):
    # tf bilinear interpolation is different from any other's, just make do
    threshold = 0.05
    iou_threshold = 0.5

    image_name = img_path.replace('\\', '/').split('/')[-1]

    ori_imgs, framed_imgs, framed_metas = preprocess(img_path, max_size=input_size)

    if use_cuda:
        x = torch.stack([torch.from_numpy(fi).cuda() for fi in framed_imgs], 0)
    else:
        x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0)

    x = x.to(torch.float32 if not use_float16 else torch.float16).permute(0, 3, 1, 2)

    with torch.no_grad():
        features, regression, classification, anchors = model(x)

        regressBoxes = BBoxTransform()
        clipBoxes = ClipBoxes()

        out = postprocess(x,
                          anchors, regression, classification,
                          regressBoxes, clipBoxes,
                          threshold, iou_threshold)

    out = invert_affine(framed_metas, out)
    # display(out, ori_imgs, imshow=False, imwrite=True)

    # print('running speed test...')
    # with torch.no_grad():
    #     print('test1: model inferring and postprocessing')
    #     print('inferring image for 10 times...')
    #     t1 = time.time()
    #     for _ in range(10):
    #         _, regression, classification, anchors = model(x)
    #
    #         out = postprocess(x,
    #                           anchors, regression, classification,
    #                           regressBoxes, clipBoxes,
    #                           threshold, iou_threshold)
    #         out = invert_affine(framed_metas, out)
    #
    #     t2 = time.time()
    #     tact_time = (t2 - t1) / 10
    #     print(f'{tact_time} seconds, {1 / tact_time} FPS, @batch_size 1')
    det_num = len(out[0]['class_ids'])
    det = []
    for i in range(det_num):
        det.append([image_name, out[0]['class_ids'][i], out[0]['scores'][i], tuple(out[0]['rois'][i])])
    return det
コード例 #10
0
def evaluate_mAP(imgs, imgs_ids, framed_metas, regressions, \
                 classifications, anchors, threshold=0.05, nms_threshold=0.5):
    '''
    Inputs: Images, Image IDs, Framed Metas (Resizing stats), prredictions
    Output: results
    '''
    results = []  # This is used for storing evaluation results.
    regressBoxes = BBoxTransform()
    clipBoxes = ClipBoxes()
    preds = postprocess(imgs,
                        torch.stack([anchors[0]] * imgs.shape[0], 0).detach(),
                        regressions.detach(), classifications.detach(),
                        regressBoxes, clipBoxes, threshold, nms_threshold)

    if not preds:
        return

    preds = invert_affine(framed_metas, preds)
    for i, _ in enumerate(preds):
        scores = preds[i]['scores']
        class_ids = preds[i]['class_ids']
        rois = preds[i]['rois']

        if rois.shape[0] > 0:
            # x1,y1,x2,y2 -> x1,y1,w,h
            rois[:, 2] -= rois[:, 0]
            rois[:, 3] -= rois[:, 1]

            bbox_score = scores

            for roi_id in range(rois.shape[0]):
                score = float(bbox_score[roi_id])
                label = int(class_ids[roi_id])
                box = rois[roi_id, :]

                if score < threshold:
                    break

                image_result = {
                    'image_id': imgs_ids[i],
                    'category_id': label + 1,
                    'score': float(score),
                    'bbox': box.tolist(),
                }

                results.append(image_result)
    return results
コード例 #11
0
    def predict(self, raw_img):
        self.ori_imgs, self.framed_imgs, self.framed_metas = preprocess_raw(raw_img, max_size=self.input_size)
        if self.use_cuda:
            x = torch.stack([torch.from_numpy(fi).cuda() for fi in self.framed_imgs], 0)
        else:
            x = torch.stack([torch.from_numpy(fi) for fi in self.framed_imgs], 0)
        x = x.to(torch.float32 if not self.use_float16 else torch.float16).permute(0, 3, 1, 2)

        with torch.no_grad():
            self.features, self.regression, self.classification, self.anchors = self.model(x)

            self.regressBoxes = BBoxTransform()
            self.clipBoxes = ClipBoxes()

            out = postprocess(x,
                            self.anchors, self.regression, self.classification,
                            self.regressBoxes, self.clipBoxes,
                            self.threshold, self.iou_threshold)
            pred = invert_affine(self.framed_metas, out)
            return pred
コード例 #12
0
def detect():
    with torch.no_grad():
        t1 = time.time()

        features, regression, classification, anchors = model(x)
        # t1 = time.time()

        regressBoxes = BBoxTransform()
        clipBoxes = ClipBoxes()
        # start = timeutil.get_epochtime_ms()
        out = postprocess(x, anchors, regression, classification, regressBoxes,
                          clipBoxes, threshold, iou_threshold)

    out = invert_affine(framed_metas, out)

    c1, c2 = display(out, ori_imgs, imshow=True, imwrite=False)
    # t2 = time.time()
    # tact_time = (t2 - t1) / 10
    # print(f'{tact_time} seconds, {1 / tact_time} FPS, @batch_size 1')
    # print("Latency: %fms" % (timeutil.get_epochtime_ms() - start))

    return c1, c2
コード例 #13
0
    def get_face_position(fn):
        _, fimg, meta = preprocess(fn, max_size=effdet_input_size)
        x = torch.from_numpy(fimg[0]).float().unsqueeze(0)
        x = x.permute(0, 3, 1, 2)
        if args.cuda:
            x = x.cuda()

        with torch.no_grad():
            _, reg, clss, anchors = model(x)

            rbox = BBoxTransform()
            cbox = ClipBoxes()

            out = postprocess(x, anchors, reg, clss, rbox, cbox, \
                effdet_thr, effdet_iou_thr)
            out = invert_affine(meta, out)
        
        lst_face_bbox = []
        for i_detect in range(len(out[0]["rois"])):
            lst_face_bbox.append(
                [int(val) for val in out[0]["rois"][i_detect]]
            )
        return lst_face_bbox
コード例 #14
0
def predict_fn(data, model):
    """mostly copied from
    https://github.com/zylo117/Yet-Another-EfficientDet-Pytorch/blob/master/efficientdet_test.py

    Args:
        data: tuple of inputs generated by custom input_fn above
        model: PyTorch model loaded in memory by model_fn

    Returns: a prediction

    """
    ori_imgs, framed_imgs, framed_metas, threshold, iou_threshold = data
    x = torch.stack([
        torch.from_numpy(fi).cuda() if USE_CUDA else torch.from_numpy(fi)
        for fi in framed_imgs
    ], 0)

    x = x.to(torch.float32 if not USE_FLOAT16 else torch.float16).permute(
        0, 3, 1, 2)

    with torch.no_grad():
        features, regression, classification, anchors = model(x)

        regress_boxes = BBoxTransform()
        clip_boxes = ClipBoxes()

        out = postprocess(x,
                          anchors=anchors,
                          regression=regression,
                          classification=classification,
                          regressBoxes=regress_boxes,
                          clipBoxes=clip_boxes,
                          threshold=threshold,
                          iou_threshold=iou_threshold)
        out = invert_affine(framed_metas, out)
        return out
コード例 #15
0
                x = x.to(torch.float32 if not use_float16 else torch.float16
                         ).permute(0, 3, 1, 2)
                if use_cuda:
                    model = model.cuda()
                if use_float16:
                    model = model.half()

                with torch.no_grad():
                    features, regression, classification, anchors = model(x)
                    regressBoxes = BBoxTransform()
                    clipBoxes = ClipBoxes()
                    out = postprocess(x, anchors, regression, classification,
                                      regressBoxes, clipBoxes,
                                      config.threshold, config.iou_threshold)
                out = invert_affine(framed_metas, out)

                if opt.debug:
                    display(out,
                            ori_imgs,
                            config,
                            label=os.path.basename(img_id),
                            imshow=True,
                            imwrite=False)

                mark = []  # None for Unknown, True for Pass, False for Reject
                for roi, class_id, score in zip(out[0]['rois'],
                                                out[0]['class_ids'],
                                                out[0]['scores']):
                    iou = compute_overlaps(np.asarray([roi]),
                                           np.asarray([red_box]))
コード例 #16
0
ファイル: detect.py プロジェクト: clover978/vault
def detect(model, dataset, args):
    use_cuda = not args.cpu
    threshold = args.threshold
    iou_threshold = args.iou_threshold
    input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536, 1536]
    input_size = input_sizes[args.compound_coef]

    img_dir = os.path.join(dataset, dataset, 'images')
    bbox_dir = os.path.join(dataset, dataset, 'annotations', 'bboxes')
    vis_dir = os.path.join(dataset, 'det_vis')
    prepare_dirs(bbox_dir, vis_dir)

    img_paths = [os.path.join(img_dir, f) for f in os.listdir(img_dir)]
    for img_path in tqdm(img_paths):
        ori_imgs, framed_imgs, framed_metas = preprocess(img_path,
                                                         max_size=input_size)
        ori_img = ori_imgs[0]
        img_id = os.path.basename(img_path).split('.')[0]

        json_byhand = os.path.join(dataset, 'annotation_byhand',
                                   img_id + '.json')
        if os.path.exists(json_byhand):
            with open(json_byhand) as f:
                annotation_byhand = json.load(f)
                points = annotation_byhand['shapes'][0]['points']
                max_box = points[0] + points[1]
        else:
            if args.update:  # only process annotations by hand
                continue
            if use_cuda:
                x = torch.stack(
                    [torch.from_numpy(fi).cuda() for fi in framed_imgs], 0)
            else:
                x = torch.stack([torch.from_numpy(ft) for fi in framed_imgs],
                                0)

            x = x.to(torch.float32).permute(0, 3, 1, 2)

            with torch.no_grad():
                features, regression, classification, anchors = model(x)

                regressBoxes = BBoxTransform()
                clipBoxes = ClipBoxes()

                preds = postprocess(x, anchors, regression, classification,
                                    regressBoxes, clipBoxes, threshold,
                                    iou_threshold)

                pred = invert_affine(framed_metas, preds)[0]

            max_area, max_box = 0, [0, 0, ori_img.shape[1], ori_img.shape[0]]
            for det, class_id in zip(pred['rois'], pred['class_ids']):
                if not class_id == 0:
                    continue
                x1, y1, x2, y2 = det.astype(np.int)
                w, h = x2 - x1, y2 - y1
                area = w * h
                if area > max_area:
                    max_area = area
                    max_box = [x1, y1, x2, y2]

        plot_one_box(ori_img, max_box, color=[255, 0, 255], line_thickness=2)
        if args.vis:
            cv2.imwrite(os.path.join(vis_dir, img_id + '.jpg'), ori_img)

        bbox_file = os.path.join(bbox_dir, img_id + '.txt')
        with open(bbox_file, 'w') as f:
            bbox_info = ' '.join(map(str, max_box))
            f.write(bbox_info)
コード例 #17
0
with torch.no_grad():
    for indx in range((len(img_paths) + batch_eval - 1) // batch_eval):
        print(indx * batch_eval)
        ori_img_batch, framed_img_batch, metas_batch = preprocess(
            img_paths[indx * batch_eval:min((indx + 1) *
                                            batch_eval, len(img_paths))],
            max_size=input_size)
        img_names = img_paths[indx *
                              batch_eval:min((indx + 1) *
                                             batch_eval, len(img_paths))]

        convert_coco(img_names)

        image_names = []
        for img_name in img_names:
            image_names.append(img_name.replace('/', '_')[:-4])

        if use_cuda:
            x = torch.stack(
                [torch.from_numpy(fi).cuda() for fi in framed_img_batch], 0)
        else:
            x = torch.stack([torch.from_numpy(fi) for fi in framed_img_batch],
                            0)

        x = x.to(torch.float32 if not use_float16 else torch.float16).permute(
            0, 3, 1, 2)
        features, regression, classification, anchors = model(x)
        out = postprocess(x, anchors, regression, classification, regressBoxes,
                          clipBoxes, threshold, iou_threshold)
        out = invert_affine(metas_batch, out)
        display(image_names, out, ori_img_batch, imshow=False, imwrite=True)
コード例 #18
0
def efficientDet_video_inference(video_src,compound_coef = 0,force_input_size=None,
                                 frame_skipping = 3,
                                 threshold=0.2,out_path=None,imshow=False,
                                 display_fps=False):

    #deep-sort variables

    # Definition of the parameters
    max_cosine_distance = 0.3
    nn_budget = None
    nms_max_overlap = 1.0


    model_filename = '/home/shaheryar/Desktop/Projects/Football-Monitoring/deep_sort/model_weights/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
    metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget)
    tracker = Tracker(metric,n_init=5)

    # efficientDet-pytorch variables
    iou_threshold = 0.4
    use_cuda = True
    use_float16 = False
    cudnn.fastest = True
    cudnn.benchmark = True

    input_size = input_sizes[compound_coef] if force_input_size is None else force_input_size

    # load model
    model = EfficientDetBackbone(compound_coef=compound_coef, num_classes=len(obj_list))
    model.load_state_dict(torch.load(f'weights/efficientdet-d{compound_coef}.pth'))
    model.requires_grad_(False)
    model.eval()

    if use_cuda:
        model = model.cuda()
    if use_float16:
        model = model.half()

    regressBoxes = BBoxTransform()
    clipBoxes = ClipBoxes()

    # Video capture
    cap = cv2.VideoCapture(video_src)
    frame_width = int(cap.get(3))
    frame_height = int(cap.get(4))
    fourcc = cv2.VideoWriter_fourcc(*'MPEG')
    fps = cap.get(cv2.CAP_PROP_FPS)
    print("Video fps",fps)
    if(out_path is not None):
        outp = cv2.VideoWriter(out_path, fourcc, fps, (frame_width, frame_height))
    i=0
    start= time.time()
    current_frame_fps=0
    while True:

        ret, frame = cap.read()

        if not ret:
            break
        t1=time.time()
        if (frame_skipping==0 or i%frame_skipping==0):
        # if(True):


            # frame preprocessing (running detections)
            ori_imgs, framed_imgs, framed_metas, t1 = preprocess_video(frame, width=input_size, height=input_size)
            if use_cuda:
                x = torch.stack([fi.cuda() for fi in framed_imgs], 0)
            else:
                x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0)
            # model predict
            t1=time.time()
            with torch.no_grad():
                features, regression, classification, anchors = model(x)

                out = postprocess(x,
                                  anchors, regression, classification,
                                  regressBoxes, clipBoxes,
                                  threshold, iou_threshold)
            # Post processing
            out = invert_affine(framed_metas, out)
            # decoding bbox ,object name and scores
            boxes,classes,scores =decode_predictions(out[0])
            org_boxes = boxes.copy()
            t2 = time.time() - t1

            # feature extraction for deep sort
            boxes = [convert_bbox_to_deep_sort_format(frame.shape, b) for b in boxes]

            features = encoder(frame,boxes)
            detections = [Detection(bbox, 1.0, feature) for bbox, feature in zip(boxes, features)]
            boxes = np.array([d.tlwh for d in detections])
            # print(boxes)
            scores = np.array([d.confidence for d in detections])
            indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores)
            detections = [detections[i] for i in indices]
            tracker.predict()
            tracker.update(detections)



        i = i + 1
        img_show=frame.copy()
        for j in range(len(org_boxes)):
            img_show =drawBoxes(img_show,org_boxes[j],(255,255,0),str(tracker.tracks[j].track_id))

        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            bbox = track.to_tlbr()
            x1=int(bbox[0])
            y1 = int(bbox[1])
            x2 = int(bbox[2])
            y2=int(bbox[3])
            roi= frame[y1:y2,x1:x2]
            cv2.rectangle(img_show, (x1, y1), (x2, y2), update_color_association(roi, track.track_id), 2)
            cv2.putText(img_show, str(track.track_id), (x1, y1), 0, 5e-3 * 100, (255, 255, 0), 1)


        if display_fps:
            current_frame_fps=1/t2
        else:
            current_frame_fps=0

        cv2.putText(img_show, 'FPS: {0:.2f}'.format(current_frame_fps), (30, 50), cv2.FONT_HERSHEY_SIMPLEX, 1,
                    (255, 255, 0),
                    2, cv2.LINE_AA)
        if (i % int(fps) == 0):
            print("Processed ", str(int(i / fps)), "seconds")
            print("Time taken",time.time()-start)
            # print(color_dict)

        if imshow:
            img_show=cv2.resize(img_show,(0,0),fx=0.75,fy=0.75)
            cv2.imshow('Frame',img_show)
            # Press Q on keyboard to  exit
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break
        if out_path is not None:
            outp.write(img_show)

    cap.release()
    outp.release()
コード例 #19
0
def inference():
    compound_coef = 0
    force_input_size = None  # set None to use default size
    img_path = 'test/original_img.jpg'
    
    # replace this part with your project's anchor config
    anchor_ratios = [(1.0, 1.0), (1.4, 0.7), (0.7, 1.4)]
    anchor_scales = [2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)]

    threshold = 0.2
    iou_threshold = 0.2

    use_cuda = True
    use_float16 = False
    cudnn.fastest = True
    cudnn.benchmark = True

    obj_list = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
                'fire hydrant', '', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep',
                'cow', 'elephant', 'bear', 'zebra', 'giraffe', '', 'backpack', 'umbrella', '', '', 'handbag', 'tie',
                'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove',
                'skateboard', 'surfboard', 'tennis racket', 'bottle', '', 'wine glass', 'cup', 'fork', 'knife', 'spoon',
                'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut',
                'cake', 'chair', 'couch', 'potted plant', 'bed', '', 'dining table', '', '', 'toilet', '', 'tv',
                'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink',
                'refrigerator', '', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier',
                'toothbrush']


    color_list = standard_to_bgr(STANDARD_COLORS)
    # tf bilinear interpolation is different from any other's, just make do
    input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536, 1536]
    input_size = input_sizes[2] if force_input_size is None else force_input_size
    ori_imgs, framed_imgs, framed_metas = preprocess(img_path, max_size=input_size)

    if use_cuda:
        x = torch.stack([torch.from_numpy(fi).cuda() for fi in framed_imgs], 0)
    else:
        x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0)

    x = x.to(torch.float32 if not use_float16 else torch.float16).permute(0, 3, 1, 2)

    model = EfficientDet_semanticBackbone(compound_coef=1, num_classes=len(obj_list),
                                ratios=anchor_ratios, scales=anchor_scales)

    model.load_state_dict(torch.load('model_weight/model_1_epoch_80.pth'))

    if use_cuda:
        model = model.cuda()

    with torch.no_grad():
        features, regression, classification, anchors, sem_out = model(x)

        regressBoxes = BBoxTransform()
        clipBoxes = ClipBoxes()

        out = postprocess(x,
                        anchors, regression, classification,
                        regressBoxes, clipBoxes,
                        threshold, iou_threshold)

    out = invert_affine(framed_metas, out)
    out = box(out, ori_imgs, color_list, obj_list, imshow=False, imwrite=False)

    outputs = sem_out.data.cpu().numpy() # (shape: (batch_size, num_classes, img_h, img_w))
    pred_label_imgs = np.argmax(outputs, axis=1) # (shape: (batch_size, img_h, img_w))
    pred_label_imgs = pred_label_imgs.astype(np.uint8)

    z = cv2.resize(pred_label_imgs[0], (ori_imgs[0].shape[1], ori_imgs[0].shape[0]))

    from semantic_utils.utils import label_img_to_color
    pred_label_img_color = label_img_to_color(z)
    overlayed_img = 0.35*out + 0.65*pred_label_img_color

    flag = cv2.imwrite('test/semantic_img_1.jpg', overlayed_img)
    return flag
コード例 #20
0
def getImageDetections(imagePath, weights, nms_threshold, confidenceParam, coefficient):
    """
    Runs the detections and returns all detection into a single structure.

    Parameters
    ----------
    imagePath : str
        Path to all images.
    weights : str
        path to the weights.
    nms_threshold : float
        non-maximum supression threshold.
    confidenceParam : float
        confidence score for the detections (everything above this threshold is considered a valid detection).
    coefficient : int
        coefficient of the current efficientdet model (from d1 to d7).

    Returns
    -------
    detectionsList : List
        return a list with all predicted bounding-boxes.

    """
    compound_coef = coefficient
    force_input_size = None  # set None to use default size
    img_path  = imagePath

    threshold = confidenceParam
    iou_threshold = nms_threshold

    use_cuda = True
    use_float16 = False
    cudnn.fastest = True
    cudnn.benchmark = True
    obj_list = ['class_name']

    # tf bilinear interpolation is different from any other's, just make do
    input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536]
    input_size = input_sizes[compound_coef] if force_input_size is None else force_input_size
    ori_imgs, framed_imgs, framed_metas = preprocess(img_path, max_size=input_size)

    if use_cuda:
        x = torch.stack([torch.from_numpy(fi).cuda() for fi in framed_imgs], 0)
    else:
        x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0)

    x = x.to(torch.float32 if not use_float16 else torch.float16).permute(0, 3, 1, 2)

    model = EfficientDetBackbone(compound_coef=compound_coef, num_classes=len(obj_list),
                                # replace this part with your project's anchor config
                                ratios=[(1.0, 1.0), (1.4, 0.7), (0.7, 1.4)],
                                scales=[2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)])

    model.load_state_dict(torch.load(rootDir+'logs/' + project + '/' + weights))
    model.requires_grad_(False)
    model.eval()

    if use_cuda:
        model = model.cuda()
    if use_float16:
        model = model.half()

    with torch.no_grad():
        features, regression, classification, anchors = model(x)

        regressBoxes = BBoxTransform()
        clipBoxes = ClipBoxes()

        out = postprocess(x,
                          anchors, regression, classification,
                          regressBoxes, clipBoxes,
                          threshold, iou_threshold)

    out = invert_affine(framed_metas, out)
     
    for i in range(len(ori_imgs)):
        if len(out[i]['rois']) == 0:
            continue
        detectionsList = []
        for j in range(len(out[i]['rois'])):
            (x1, y1, x2, y2) = out[i]['rois'][j].astype(np.int)
            detectionsList.append((float(out[i]['scores'][j]), x1, y1, x2, y2))
        return detectionsList
コード例 #21
0
def evaluate_voc(gt_dict, img_paths, model, max_size, config):
    results = []

    regressBoxes = BBoxTransform()
    clipBoxes = ClipBoxes()

    for idx, image_path in enumerate(tqdm(img_paths)):
        
        ori_imgs, framed_imgs, framed_metas = preprocess(image_path, max_size=max_size)
        x = torch.from_numpy(framed_imgs[0])

        if config.eval_use_cuda:
            x = x.cuda(config.eval_gpu)
            if config.eval_use_float16:
                x = x.half()
            else:
                x = x.float()
        else:
            x = x.float()

        x = x.unsqueeze(0).permute(0, 3, 1, 2)
        features, regression, classification, anchors = model(x)
        

        preds = postprocess(x,
                            anchors, regression, classification,
                            regressBoxes, clipBoxes,
                            config.eval_threshold, config.eval_nms_threshold,
                            anchor_free_mode=config.anchor_free_mode)
        
        if not preds:
            continue

        preds = invert_affine(framed_metas, preds)[0]

        scores = preds['scores']
        class_ids = preds['class_ids']
        rois = preds['rois']

        if rois.shape[0] > 0:
            # # x1,y1,x2,y2 -> x1,y1,w,h
            # rois[:, 2] -= rois[:, 0]
            # rois[:, 3] -= rois[:, 1]

            bbox_score = scores

            for roi_id in range(rois.shape[0]):
                score = float(bbox_score[roi_id])
                label = int(class_ids[roi_id])
                box = rois[roi_id, :]

                image_result = [idx, box[0], box[1], box[2], box[3], score, label]

                results.append(image_result)

    if not len(results):
        raise Exception('the model does not provide any valid output, check model architecture and the data input')
    voc_certs = []
    for idx in range(len(config.obj_list)):
        npos, nd, rec, prec, ap = voc_eval(gt_dict, results, idx, iou_thres=0.5, use_07_metric=False)
        voc_certs.append([prec, rec, ap])
    return voc_certs
コード例 #22
0
def excuteModel(videoname):
    # Video's path
    # set int to use webcam, set str to read from a video file

    if videoname is not None:
        video_src = os.path.join(r'D:\GitHub\Detection\server\uploads', f"{videoname}.mp4")
    else:
        video_src = 'D:\\GitHub\\Detection\\server\AImodel\\videotest\\default.mp4'

    compound_coef = 2
    trained_weights = 'D:\\GitHub\\Detection\\server\\AImodel\\weights\\efficientdet-video.pth'

    force_input_size = None  # set None to use default size

    threshold = 0.2
    iou_threshold = 0.2

    use_cuda = True
    use_float16 = False
    cudnn.fastest = True
    cudnn.benchmark = True

    obj_list = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
                'fire hydrant', '', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep',
                'cow', 'elephant', 'bear', 'zebra', 'giraffe', '', 'backpack', 'umbrella', '', '', 'handbag', 'tie',
                'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove',
                'skateboard', 'surfboard', 'tennis racket', 'bottle', '', 'wine glass', 'cup', 'fork', 'knife', 'spoon',
                'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut',
                'cake', 'chair', 'couch', 'potted plant', 'bed', '', 'dining table', '', '', 'toilet', '', 'tv',
                'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink',
                'refrigerator', '', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier',
                'toothbrush']

    # tf bilinear interpolation is different from any other's, just make do
    input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536]
    input_size = input_sizes[compound_coef] if force_input_size is None else force_input_size

    # load model
    model = EfficientDetBackbone(
        compound_coef=compound_coef, num_classes=len(obj_list))
    model.load_state_dict(torch.load(trained_weights))

    model.requires_grad_(False)
    model.eval()

    if use_cuda:
        model = model.cuda()
    if use_float16:
        model = model.half()

    # function for display

    # Box
    regressBoxes = BBoxTransform()
    clipBoxes = ClipBoxes()

    # Video capture
    cap = cv2.VideoCapture(video_src)
    length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

    writer = None
    # try to determine the total number of frames in the video file
    try:
        prop = cv2.cv.CV_CAP_PROP_FRAME_COUNT if imutils.is_cv2() \
            else cv2.CAP_PROP_FRAME_COUNT
        total = int(vs.get(prop))
        print("[INFO] {} total frames in video".format(total))

    # an error occurred while trying to determine the total
    # number of frames in the video file
    except:
        print("[INFO] could not determine # of frames in video")
        total = -1

    path_out = os.path.join(os.path.dirname(
        os.path.abspath(__file__)), 'outvideo')

    path_result = r"D:\GitHub\Detection\server\AImodel\videotest\default.mp4"
    path_asset = r"D:\GitHub\Detection\client\src\assets"
    for i in range(0, length):
        ret, frame = cap.read()
        if not ret:
            break

        # frame preprocessing
        ori_imgs, framed_imgs, framed_metas = preprocess_video(
            frame, max_size=input_size)

        if use_cuda:
            x = torch.stack([torch.from_numpy(fi).cuda()
                             for fi in framed_imgs], 0)
        else:
            x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0)

        x = x.to(torch.float32 if not use_float16 else torch.float16).permute(
            0, 3, 1, 2)

        # model predict
        with torch.no_grad():
            features, regression, classification, anchors = model(x)

            out = postprocess(x,
                              anchors, regression, classification,
                              regressBoxes, clipBoxes,
                              threshold, iou_threshold)

        # result
        out = invert_affine(framed_metas, out)
        img_show = display(out, ori_imgs, obj_list)

        if writer is None:

            # initialize our video writer
            fourcc = 0x00000021
            #fourcc = cv2.VideoWriter_fourcc(*'mp4v')
            if videoname is not None:
                path_result = os.path.join(path_out, f"{videoname}.mp4")
            else:
                path_result = os.path.join(path_out, "default.mp4")

            writer = cv2.VideoWriter(path_result, fourcc, 30, (img_show.shape[1], img_show.shape[0]), True)


        # write the output frame to disk
        writer.write(img_show)
        print("Processing data... " + str(round((i+1)/length, 3)*100) + " %")
        # show frame by frame
        #cv2.imshow('frame', img_show)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    print("[INFO] cleaning up...")

    writer.release()
    cap.release()
    cv2.destroyAllWindows()

    if videoname is not None:
        path_asset = os.path.join(path_asset, f"{videoname}.mp4")
    else:
        path_asset = os.path.join(path_asset, "default.mp4")
    copyfile(path_result, path_asset)
    return path_asset
コード例 #23
0
def main(i):
    compound_coef = i
    force_input_size = None  # set None to use default size

    # replace this part with your project's anchor config
    anchor_ratios = [(1.0, 1.0), (1.4, 0.7), (0.7, 1.4)]
    anchor_scales = [2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)]

    threshold = 0.2
    iou_threshold = 0.2

    use_cuda = True
    use_float16 = False
    cudnn.fastest = True
    cudnn.benchmark = True

    obj_list = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
                'fire hydrant', '', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep',
                'cow', 'elephant', 'bear', 'zebra', 'giraffe', '', 'backpack', 'umbrella', '', '', 'handbag', 'tie',
                'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove',
                'skateboard', 'surfboard', 'tennis racket', 'bottle', '', 'wine glass', 'cup', 'fork', 'knife', 'spoon',
                'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut',
                'cake', 'chair', 'couch', 'potted plant', 'bed', '', 'dining table', '', '', 'toilet', '', 'tv',
                'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink',
                'refrigerator', '', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier',
                'toothbrush']

    out_dict = dict()
    input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536, 1536]
    input_size = input_sizes[compound_coef] if force_input_size is None else force_input_size

    model = EfficientDetBackbone(compound_coef=compound_coef, num_classes=len(obj_list),
                                    ratios=anchor_ratios, scales=anchor_scales)
    model.load_state_dict(torch.load(f'weights/efficientdet-d{compound_coef}.pth', map_location='cpu'))
    model.requires_grad_(False)
    model.eval()

    if use_cuda:
        model = model.cuda()

    base_dir = '/data/jiashenc/jackson/'

    print('Processing Det-' + str(i))

    for k in range(1000000, 1100000):
        if k % 1000 == 0:
            print('    Finish {} frames'.format(k + 1))
            
        img_path = os.path.join(base_dir, 'frame{}.jpg'.format(k))
        ori_imgs, framed_imgs, framed_metas = preprocess(img_path, max_size=input_size)

        if use_cuda:
            x = torch.stack([torch.from_numpy(fi).cuda() for fi in framed_imgs], 0)
        else:
            x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0)

        x = x.to(torch.float32 if not use_float16 else torch.float16).permute(0, 3, 1, 2)

        with torch.no_grad():
            features, regression, classification, anchors = model(x)

            regressBoxes = BBoxTransform()
            clipBoxes = ClipBoxes()

            out = postprocess(x,
                              anchors, regression, classification,
                              regressBoxes, clipBoxes,
                              threshold, iou_threshold)


        out = invert_affine(framed_metas, out)
        to_json(out, out_dict)

    with open(os.path.join(base_dir, '10', 'res-{:d}.json'.format(i)), 'w') as f:
        json.dump(out_dict, f)
        out_dict = dict()
コード例 #24
0
def main(compound_coef=0, model_dir=MODEL_DIR, nms_threshold=0.5, use_cuda=False, use_float16=False,
         image_batch_size=2):
    threshold = 0.05

    cudnn.fastest = True
    cudnn.benchmark = True

    # tf bilinear interpolation is different from any other's, just make do
    input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536, 1536]
    input_size = input_sizes[compound_coef]

    model = model_fn(model_dir=model_dir, compound_coef=compound_coef, use_cuda=use_cuda,
                     use_float16=use_float16)

    image_paths = glob.glob(os.path.join(DATA, '*.jpg'))

    L = len(image_paths)
    print(f'processing {L} in batches of {image_batch_size}')
    results = {}
    loop_start = datetime.datetime.now()
    for image_batch in image_path_batches(image_paths, image_batch_size):
        batch_start = datetime.datetime.now()
        ori_images, framed_images, framed_metas = preprocess(*image_batch, max_size=input_size)

        # build tensor from framed images
        x = torch.stack([(torch.from_numpy(fi).cuda()
                          if use_cuda
                          else torch.from_numpy(fi))
                         for fi in framed_images],
                        0)

        x = x.to(torch.float32 if not use_float16 else torch.float16).permute(0, 3, 1, 2)

        with torch.no_grad():
            features, regression, classification, anchors = model(x)

            regressBoxes = BBoxTransform()
            clipBoxes = ClipBoxes()

            out = postprocess(x,
                              anchors, regression, classification,
                              regressBoxes, clipBoxes,
                              threshold, nms_threshold)

            out = invert_affine(framed_metas, out)

        batch_end = datetime.datetime.now()
        batch_time = (batch_end - batch_start).total_seconds()
        print(f"batch_time = {batch_time} (s)")
        print(f"batch_size = {image_batch_size}")
        print(f"FPS = {image_batch_size / batch_time:0.4f}")
        print(f"SPF = {batch_time / image_batch_size:0.4f}")

        results.update(dict(zip(image_batch, out)))

    loop_end = datetime.datetime.now()
    loop_time = (loop_end - loop_start).total_seconds()
    print('\nfinal summary:')
    print(f"total processing time: {loop_time} (s)")
    print(f"number of frames processed: {len(image_paths)}")
    print(f"batch_size = {image_batch_size}")
    print(f"FPS: {L / loop_time:0.4f}")
    print(f"SPF: {loop_time / L:0.4f}")

    with open(f'results.{compound_coef}.pkl', 'wb') as fp:
        pickle.dump(results, fp)
コード例 #25
0
def img_detect(file, img_dir, model, input_size, regressBoxes, clipBoxes,
               prior_mask, threshold):
    fname, ext = os.path.splitext(file)
    image_id = int(fname.split("_")[-1])

    img_path = os.path.join(img_dir, file)
    ori_imgs, framed_imgs, framed_metas = preprocess(img_path,
                                                     max_size=input_size)
    if use_cuda:
        x = torch.stack([torch.from_numpy(fi).cuda() for fi in framed_imgs], 0)
    else:
        x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0)

    x = x.to(torch.float32 if not use_float16 else torch.float16).permute(
        0, 3, 1, 2)

    if args.flip_test:
        ids = torch.arange(x.shape[-1] - 1, -1, -1).long().cuda()
        x_flip = x[..., ids]
        x_cat = torch.cat([x, x_flip], 0)

    with torch.no_grad():
        if args.flip_test:

            features, union_act_cls, union_sub_reg, union_obj_reg, \
            inst_act_cls, inst_obj_cls, inst_bbox_reg, anchors = model(x_cat)

            anchors = torch.cat([anchors, anchors], 0)
            preds_union = postprocess_dense_union_flip(
                x_cat, anchors, union_act_cls, union_sub_reg, union_obj_reg,
                regressBoxes, clipBoxes, 0.5, 1)
            preds_inst = postprocess_hoi_flip(x_cat,
                                              anchors,
                                              inst_bbox_reg,
                                              inst_obj_cls,
                                              inst_act_cls,
                                              regressBoxes,
                                              clipBoxes,
                                              threshold,
                                              nms_threshold,
                                              mode="object",
                                              classwise=True)
        else:


            features, union_act_cls, union_sub_reg, union_obj_reg, \
            inst_act_cls, inst_obj_cls, inst_bbox_reg, anchors = model(x)

            preds_union = postprocess_dense_union(x,
                                                  anchors,
                                                  union_act_cls,
                                                  union_sub_reg,
                                                  union_obj_reg,
                                                  regressBoxes,
                                                  clipBoxes,
                                                  0.5,
                                                  1,
                                                  classwise=True)
            preds_inst = postprocess_hoi(x,
                                         anchors,
                                         inst_bbox_reg,
                                         inst_obj_cls,
                                         inst_act_cls,
                                         regressBoxes,
                                         clipBoxes,
                                         threshold,
                                         nms_threshold,
                                         mode="object",
                                         classwise=True)

        preds_inst = invert_affine(framed_metas, preds_inst)[0]
        preds_union = invert_affine(framed_metas, preds_union)[0]

        dets = hoi_match(image_id, preds_inst, preds_union, prior_mask)

    return dets
コード例 #26
0
    def infer(self, image):
        img = np.array(image)
        img = img[:, :, ::-1]  #rgb 2 bgr
        anchor_ratios = [(1.0, 1.0), (1.4, 0.7), (0.7, 1.4)]
        anchor_scales = [2**0, 2**(1.0 / 3.0), 2**(2.0 / 3.0)]

        threshold = 0.25
        iou_threshold = 0.25

        force_input_size = None
        use_cuda = False
        use_float16 = False
        cudnn.fastest = False
        cudnn.benchmark = False

        input_size = 512
        ori_imgs, framed_imgs, framed_metas = preprocess(img,
                                                         max_size=input_size)

        if use_cuda:
            x = torch.stack(
                [torch.from_numpy(fi).cuda() for fi in framed_imgs], 0)
        else:
            x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0)

        x = x.to(torch.float32 if not use_float16 else torch.float16).permute(
            0, 3, 1, 2)

        model = EfficientDetBackbone(compound_coef=0,
                                     num_classes=len(self.labels),
                                     ratios=anchor_ratios,
                                     scales=anchor_scales)

        model.load_state_dict(torch.load(self.path, map_location='cpu'))
        model.requires_grad_(False)
        model.eval()

        if use_cuda:
            model = model.cuda()
        if use_float16:
            model = model.half()

        with torch.no_grad():
            features, regression, classification, anchors = model(x)

            regressBoxes = BBoxTransform()
            clipBoxes = ClipBoxes()

            out = postprocess(x, anchors, regression, classification,
                              regressBoxes, clipBoxes, threshold,
                              iou_threshold)

        pred = invert_affine(framed_metas, out)

        results = []

        for i in range(len(ori_imgs)):
            if len(pred[i]['rois']) == 0:
                continue

            ori_imgs[i] = ori_imgs[i].copy()

            for j in range(len(pred[i]['rois'])):
                xt1, yt1, xbr, ybr = pred[i]['rois'][j].astype(np.float64)
                xt1 = float(xt1)
                yt1 = float(yt1)
                xbr = float(xbr)
                yb4 = float(ybr)
                obj = str(pred[i]['class_ids'][j])
                obj_label = self.labels.get(obj)
                obj_score = str(pred[i]['scores'][j])
                results.append({
                    "confidence": str(obj_score),
                    "label": obj_label,
                    "points": [xt1, yt1, xbr, ybr],
                    "type": "rectangle",
                })

        return results
コード例 #27
0
def evaluate_coco_show_res_jss(img_path,
                               set_name,
                               image_ids,
                               coco,
                               model,
                               threshold=0.05):
    results = []

    regressBoxes = BBoxTransform()
    clipBoxes = ClipBoxes()
    count = 0
    for image_id in tqdm(image_ids):
        count = count + 1
        if count > 21:
            break
        image_info = coco.loadImgs(image_id)[0]
        image_path = img_path + image_info['file_name']
        print('image path:', image_path)

        ori_imgs, framed_imgs, framed_metas = preprocess(
            image_path, max_size=input_sizes[compound_coef])
        x = torch.from_numpy(framed_imgs[0])

        if use_cuda:
            x = x.cuda(gpu)
            if use_float16:
                x = x.half()
            else:
                x = x.float()
        else:
            x = x.float()

        x = x.unsqueeze(0).permute(0, 3, 1, 2)
        features, regression, classification, anchors = model(x)

        preds = postprocess(x, anchors, regression, classification,
                            regressBoxes, clipBoxes, threshold, nms_threshold)

        if not preds:
            continue

        preds = invert_affine(framed_metas, preds)[0]

        scores = preds['scores']
        class_ids = preds['class_ids']
        rois = preds['rois']

        if rois.shape[0] > 0:
            # x1,y1,x2,y2 -> x1,y1,w,h
            rois[:, 2] -= rois[:, 0]
            rois[:, 3] -= rois[:, 1]

            bbox_score = scores

            for roi_id in range(rois.shape[0]):
                score = float(bbox_score[roi_id])
                label = int(class_ids[roi_id])
                box = rois[roi_id, :]

                image_result = {
                    'image_id': image_id,
                    'category_id': label + 1,
                    'score': float(score),
                    'bbox': box.tolist(),
                }
                score = float(score)
                category_id = label + 1
                box = box.tolist()
                # print('box:',box)
                xmin, ymin, w, h, score = int(box[0]), int(box[1]), int(
                    box[2]), int(box[3]), score
                if score > 0.2:
                    cv2.rectangle(ori_imgs[0], (xmin, ymin),
                                  (xmin + w, ymin + h), (0, 255, 0), 6)
                    cv2.putText(ori_imgs[0],
                                '{}:{:.2f}'.format(category_id,
                                                   score), (xmin, ymin),
                                cv2.FONT_HERSHEY_SIMPLEX, 4.0, (0, 255, 0), 6)
                results.append(image_result)
        cv2.imwrite(
            './test_result/zhongchui_d3_epoch200_1124/' + 'tmp' +
            '{}'.format(count) + '.jpeg', ori_imgs[0])

    if not len(results):
        raise Exception(
            'the model does not provide any valid output, check model architecture and the data input'
        )

    # write output
    # filepath = f'{set_name}_bbox_results.json'
    filepath = det_save_json
    if os.path.exists(filepath):
        os.remove(filepath)
    json.dump(results, open(filepath, 'w'), indent=4)
コード例 #28
0
def predict(images: List[Union[str, os.PathLike]],
        model: EfficientDetBackbone,
        compound_coef: float, 
        resize: Optional[Union[int, Tuple[int, int]]] = None,
        confidence: Optional[float] = 0.5,
        nms_threshold: Optional[float] = 0.5,
        output_path: Union[str, os.PathLike] = "../",
    ) -> None:
    """Generate Predictions on test images in a folder.

    Args:
        images (List[Union[str, os.PathLike]]): List of test image path to run predictions.
        model (EfficientDetBackbone): EfficientDet model.
        compound_coef (float): Compund scaling coefficient.
        resize (Optional[Union[int, Tuple[int, int]]], optional): Resize of test images. Defaults to None.
        confidence (Optional[float], optional): confidence score to filter detections. Defaults to 0.5.
        nms_threshold (Optional[float], optional): IOU threshold to filter duplicate detections. Defaults to 0.5.
        output_path (Union[str, os.PathLike], optional): Output path/file where final output needs to be stored. Defaults to "../".

    Raises:
        IOError: Raises when output_path do not exist.
    """      

    #Initializaing results
    results = {}
    regressBoxes = BBoxTransform()
    clipBoxes = ClipBoxes()

    
    #Iterating over all images
    for image_path in tqdm(images):
        #Initalize and Get image name.
        img_result = []
        img_name = image_path.split('/')[-1]

        #Preprocess image
        ori_imgs, framed_imgs, framed_metas = preprocess(image_path, max_size=INPUT_SIZES[compound_coef])
        x = torch.from_numpy(framed_imgs[0])
        
        #Convert to CUDA or CPU.
        if USE_CUDA:
            x = x.cuda()
            x = x.float()
        else:
            x = x.float()

        #Batching
        x = x.unsqueeze(0).permute(0, 3, 1, 2)

        #Run model
        features, regression, classification, anchors = model(x)

        #Applying threshold and NMS on predictions
        preds = postprocess(x,
                            anchors, regression, classification,
                            regressBoxes, clipBoxes,
                            confidence, nms_threshold)
        
        #Continue if there are no predictions for this image.
        if not preds:
            results[img_name] = img_result
            continue
        
        #Convert predictions.
        preds = invert_affine(framed_metas, preds)[0]

        scores = preds['scores']
        class_ids = preds['class_ids']
        rois = preds['rois']

        #Convert bbox and others to required format.
        if rois.shape[0] > 0:
            # x1,y1,x2,y2 -> x1,y1,w,h
            #rois[:, 2] -= rois[:, 0]
            #rois[:, 3] -= rois[:, 1]

            bbox_score = scores

            for roi_id in range(rois.shape[0]):
                score = float(bbox_score[roi_id])
                label = int(class_ids[roi_id])
                box = rois[roi_id, :]

                img_result.append({
                    'class_index': label,
                    'bbox': box.tolist(),
                    'confidence': float(score)
                })

                results[img_name] = img_result

    if not len(results):
        print('The model does not provide any valid output, check model architecture and the data input')

    # Write output
    if output_path.endswith(".json"):
        if os.path.exists(os.path.dirname(output_path)):
            output_file = output_path
        else:
            os.makedirs(os.path.dirname(output_path), exist_ok=True)
            output_file = output_path
    elif os.path.isdir(output_path):
        output_file = os.path.join(
            output_path, "yolov5_predictions_" + str(time.time()).split(".")[0] + ".json"
        )

    else:
        raise IOError(
            f"{Fore.RED} no such directory {os.path.dirname(output_path)} {Style.RESET_ALL}"
        )

    with open(output_file, "w") as f:
        json.dump(results, f, indent=2)
    print(f"Detections are written to {output_file}.")
コード例 #29
0
def evaluate_coco(img_path, set_name, image_ids, coco, model, threshold=0.05):
    results = []

    regressBoxes = BBoxTransform()
    clipBoxes = ClipBoxes()

    for image_id in tqdm(image_ids):
        image_info = coco.loadImgs(image_id)[0]
        image_path = img_path + '/' + image_info['file_name']
        ori_imgs, framed_imgs, framed_metas = preprocess(
            image_path, max_size=input_sizes[compound_coef])
        x = torch.from_numpy(framed_imgs[0])

        if use_cuda:
            x = x.cuda(gpu)
            if use_float16:
                x = x.half()
            else:
                x = x.float()
        else:
            x = x.float()

        x = x.unsqueeze(0).permute(0, 3, 1, 2)
        features, regression, classification, anchors = model(x)

        preds = postprocess(x, anchors, regression, classification,
                            regressBoxes, clipBoxes, threshold, nms_threshold)

        if not preds:
            continue

        preds = invert_affine(framed_metas, preds)[0]

        scores = preds['scores']
        class_ids = preds['class_ids']
        rois = preds['rois']

        if rois.shape[0] > 0:
            # x1,y1,x2,y2 -> x1,y1,w,h
            rois[:, 2] -= rois[:, 0]
            rois[:, 3] -= rois[:, 1]

            bbox_score = scores
            for roi_id in range(rois.shape[0]):
                score = float(bbox_score[roi_id])
                label = int(class_ids[roi_id])
                box = rois[roi_id, :]

                image_result = {
                    'image_id': image_id,
                    'category_id': label + 1,
                    'score': float(score),
                    'bbox': box.tolist(),
                }

                results.append(image_result)

    if not len(results):
        raise Exception(
            'the model does not provide any valid output, check model architecture and the data input'
        )

    # write output
    filepath = f'{set_name}_bbox_results.json'
    if os.path.exists(filepath):
        os.remove(filepath)
    json.dump(results, open(filepath, 'w'), indent=4)
コード例 #30
0


with torch.no_grad():
    features, regression, classification, anchors = model(x)

    regressBoxes = BBoxTransform()
    clipBoxes = ClipBoxes()

    out = postprocess(x,
                      anchors, regression, classification,
                      regressBoxes, clipBoxes,
                      threshold, iou_threshold)


preds = invert_affine(framed_metas, out)

if(savefig):
    imgs = ori_imgs;
    i = 0;
    for j in range(len(preds[i]['rois'])):
        x1, y1, x2, y2 = preds[i]['rois'][j].astype(np.int)
        obj = obj_list[preds[i]['class_ids'][j]]
        score = float(preds[i]['scores'][j])
        if(score >= threshold and obj in classes):
            plot_one_box(imgs[i], [x1, y1, x2, y2], label=obj,score=score,color=color_list[get_index_label(obj, obj_list)])


    cv2.imwrite(output_name, imgs[i])