Beispiel #1
0
def detect(image):
    # convert image to array
    frame = np.array(image)

    # convert to cv format
    frames = frame[:, :, ::-1]

    ori_imgs, framed_imgs, framed_metas = image_preprocess(frames,
                                                           max_size=input_size)

    if use_cuda:
        x = torch.stack([torch.from_numpy(fi).cuda() for fi in framed_imgs], 0)
    else:
        x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0)

    x = x.to(torch.float32 if not use_float16 else torch.float16).permute(
        0, 3, 1, 2)

    with torch.no_grad():
        features, regression, classification, anchors = model(x)

        regressBoxes = BBoxTransform()
        clipBoxes = ClipBoxes()

        out = postprocess(x, anchors, regression, classification, regressBoxes,
                          clipBoxes, threshold, iou_threshold)

        out = invert_affine(framed_metas, out)
        render_frame = display(out, frame, imshow=True, imwrite=False)
        return render_frame
Beispiel #2
0
    def forward(self, inputs):
        max_size = inputs.shape[-1]
        # print("is", inputs.shape)

        _, p3, p4, p5 = self.backbone_net(inputs)

        features = (p3, p4, p5)
        features = self.bifpn(features)

        regression = self.regressor(features)
        classification = self.classifier(features)
        objectness = self.objectness(features)
        anchors = self.anchors(inputs, inputs.dtype)
        regressBoxes = BBoxTransform()
        clipBoxes = ClipBoxes()
        out = process_metadata(features, anchors, regression, classification,
                               objectness, regressBoxes, clipBoxes,
                               inputs.shape)
        # print("AS", anchors.shape)
        # print("FS", features[1].shape)
        # print("FS", features[2].shape)
        # print("FS", features[4].shape)
        # print("OS", out[0]["features"].shape)
        # batch_feats = torch.stack([img["features"] for img in out], dim=0)
        # batch_emb_idx = torch.stack([img["emb_idx"] for img in out], dim=0)
        # print("BEIS", batch_emb_idx.shape)
        # embeddings = self.embedder(batch_feats)
        # print("ES", embeddings.shape)
        # emb_idx = out[0]["emb_idx"]

        return features, regression, classification, anchors, objectness, None, None  # embeddings, batch_emb_idx
Beispiel #3
0
    def forward(self, inputs):
        max_size = inputs.shape[-1]

        _, p3, p4, p5 = self.backbone_net(inputs)

        features = (p3, p4, p5)
        features = self.bifpn(features)

        regression = self.regressor(features)
        classification = self.classifier(features)

        # if you just want to convert to onnx, you can cancel the two lines of comments
        # or, if you want convert to tvm, just return regression and classification

        anchors = self.anchors(inputs, inputs.dtype)
        regressBoxes = BBoxTransform()
        clipBoxes = ClipBoxes()

        #features, regression, classification, anchors
        nms_threshold = 0.4
        threshold = 0.4
        preds = postprocess(inputs, anchors, regression, classification,
                            regressBoxes, clipBoxes, threshold, nms_threshold)

        return preds
Beispiel #4
0
def detect(img_path):
    #------------------preprocessing------------------------
    ori_imgs, framed_imgs, framed_metas = preprocess(
        img_path, max_size=input_size)  #input_size: 512

    x = torch.stack([torch.from_numpy(fi).cuda() for fi in framed_imgs], 0)

    x = x.to(torch.float32 if not use_float16 else torch.float16).permute(
        0, 3, 1, 2)

    with torch.no_grad():
        start = timeutil.get_epochtime_ms()
        t1 = time.time()

        features, regression, classification, anchors = model(x)

        regressBoxes = BBoxTransform()
        clipBoxes = ClipBoxes()

        out = postprocess(x, anchors, regression, classification, regressBoxes,
                          clipBoxes, threshold, iou_threshold)
    out = invert_affine(framed_metas, out)
    c1, c2 = display(out, ori_imgs, imshow=True, imwrite=False)
    t2 = time.time()
    tact_time = (t2 - t1) / 10
    print(f'{tact_time} seconds, {1 / tact_time} FPS, @batch_size 1')
    print('milisecond is ' + str(t2 - t1))
    print("Latency: %fms" % (timeutil.get_epochtime_ms() - start))

    return c1, c2
Beispiel #5
0
    def predict(self, img_path, threshold=0.5):
        self.system_dict["params"]["threshold"] = threshold
        ori_imgs, framed_imgs, framed_metas = preprocess(
            img_path, max_size=self.system_dict["local"]["input_size"])

        if self.system_dict["params"]["use_cuda"]:
            x = torch.stack(
                [torch.from_numpy(fi).cuda() for fi in framed_imgs], 0)
        else:
            x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0)

        x = x.to(torch.float32 if not self.system_dict["params"]["use_float16"]
                 else torch.float16).permute(0, 3, 1, 2)

        with torch.no_grad():
            features, regression, classification, anchors = self.system_dict[
                "local"]["model"](x)

            regressBoxes = BBoxTransform()
            clipBoxes = ClipBoxes()

            out = postprocess(x, anchors, regression, classification,
                              regressBoxes, clipBoxes,
                              self.system_dict["params"]["threshold"],
                              self.system_dict["params"]["iou_threshold"])

        out = invert_affine(framed_metas, out)
        scores, labels, bboxes = self.display(out,
                                              ori_imgs,
                                              imshow=False,
                                              imwrite=True)
        return scores, labels, bboxes
Beispiel #6
0
def main(img_path, base_name, checkpoint_path):
    ori_imgs, framed_imgs, framed_metas = preprocess(img_path, max_size=input_size)

    if use_cuda:
        x = torch.stack([torch.from_numpy(fi).cuda() for fi in framed_imgs], 0)
    else:
        x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0)

    x = x.to(torch.float32 if not use_float16 else torch.float16).permute(0, 3, 1, 2)

    model = EfficientDetBackbone(compound_coef=compound_coef, num_classes=len(obj_list),
                                ratios=anchor_ratios, scales=anchor_scales)
    # model.load_state_dict(torch.load(f'weights/efficientdet-d{compound_coef}.pth'))
    model.load_state_dict(torch.load(checkpoint_path))
    model.requires_grad_(False)
    model.eval()

    if use_cuda:
        model = model.cuda()
    if use_float16:
        model = model.half()

    with torch.no_grad():
        features, regression, classification, anchors = model(x)

        regressBoxes = BBoxTransform()
        clipBoxes = ClipBoxes()

        out = postprocess(x,
                        anchors, regression, classification,
                        regressBoxes, clipBoxes,
                        threshold, iou_threshold)

    out = invert_affine(framed_metas, out)
    display(out, ori_imgs, base_name,imshow=False, imwrite=True)
 def detect_image(self,
                  image_path,
                  use_cuda=False,
                  use_float16=False,
                  threshold=0.2,
                  iou_threshold=0.2):
     # replace this part with your project's anchor config
     max_size = self.input_sizes[self.compound_coef]
     anchor_ratios = [(1.0, 1.0), (1.4, 0.7), (0.7, 1.4)]
     anchor_scales = [2**0, 2**(1.0 / 3.0), 2**(2.0 / 3.0)]
     ori_imgs, framed_imgs, framed_metas = preprocess(image_path,
                                                      max_size=max_size)
     if use_cuda:
         x = torch.stack(
             [torch.from_numpy(fi).cuda() for fi in framed_imgs], 0)
     else:
         x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0)
     x = x.to(torch.float32 if not use_float16 else torch.float16).permute(
         0, 3, 1, 2)
     features, regression, classification, anchors = self.forward(x)
     regressBoxes = BBoxTransform()
     clipBoxes = ClipBoxes()
     out = postprocess(x, anchors, regression, classification, regressBoxes,
                       clipBoxes, threshold, iou_threshold)
     out = invert_affine(framed_metas, out)
     self.__save_image(out, ori_imgs, imwrite=True)
def read_images():
    for filename in os.listdir(imgfile_path):
        ori_imgs, framed_imgs, framed_metas = preprocess(os.path.join(
            imgfile_path, filename),
                                                         max_size=input_size)
        if use_cuda:
            x = torch.stack(
                [torch.from_numpy(fi).cuda() for fi in framed_imgs], 0)
        else:
            x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0)

        x = x.to(torch.float32 if not use_float16 else torch.float16).permute(
            0, 3, 1, 2)

        model = EfficientDetBackbone(compound_coef=7,
                                     num_classes=len(obj_list),
                                     ratios=anchor_ratios,
                                     scales=anchor_scales)
        model.load_state_dict(
            torch.load(f'weights/efficientdet-d7/efficientdet-d7.pth')
        )  #place weight path here
        model.requires_grad_(False)
        model.eval()

        if use_cuda:
            model = model.cuda()
        if use_float16:
            model = model.half()

        with torch.no_grad():
            features, regression, classification, anchors = model(x)

            regressBoxes = BBoxTransform()
            clipBoxes = ClipBoxes()

            out = postprocess(x, anchors, regression, classification,
                              regressBoxes, clipBoxes, threshold,
                              iou_threshold)

        out = invert_affine(framed_metas, out)
        display(filename, out, ori_imgs, imshow=False, imwrite=True)

        print('running speed test...')
        with torch.no_grad():
            print('test1: model inferring and postprocessing')
            print('inferring image for 10 times...')
            t1 = time.time()
            for _ in range(10):
                _, regression, classification, anchors = model(x)

                out = postprocess(x, anchors, regression, classification,
                                  regressBoxes, clipBoxes, threshold,
                                  iou_threshold)
                out = invert_affine(framed_metas, out)

            t2 = time.time()
            tact_time = (t2 - t1) / 10
            print(f'{tact_time} seconds, {1 / tact_time} FPS, @batch_size 1')
Beispiel #9
0
def test(threshold=0.2):
    with open("datasets/vcoco/new_prior_mask.pkl", "rb") as file:
        prior_mask = pickle.load(file, encoding="bytes")

    model = EfficientDetBackbone(num_classes=len(eval(params["obj_list"])),
                                 num_union_classes=25,
                                 num_inst_classes=51,
                                 compound_coef=args.compound_coef,
                                 ratios=eval(params["anchors_ratios"]),
                                 scales=eval(params["anchors_scales"]))
    model.load_state_dict(
        torch.load(weights_path, map_location=torch.device('cpu')))
    model.requires_grad_(False)
    model.eval()

    if args.cuda:
        model = model.cuda()
    if args.float16:
        model = model.half()

    regressBoxes = BBoxTransform()
    clipBoxes = ClipBoxes()

    img_dir = os.path.join(data_dir, "vcoco/coco/images/%s" % "val2014")

    with open(os.path.join(data_dir, 'vcoco/data/splits/vcoco_test.ids'),
              'r') as f:
        image_ids = f.readlines()
    image_ids = [int(id) for id in image_ids]

    _t = {'im_detect': Timer(), 'misc': Timer()}
    detection = []

    for i, image_id in enumerate(image_ids):

        _t['im_detect'].tic()

        file = "COCO_val2014_" + (str(image_id)).zfill(12) + '.jpg'

        img_detection = img_detect(file,
                                   img_dir,
                                   model,
                                   input_size,
                                   regressBoxes,
                                   clipBoxes,
                                   prior_mask,
                                   threshold=threshold)
        detection.extend(img_detection)
        if need_visual:
            visual(img_detection, image_id)
        _t['im_detect'].toc()

        print('im_detect: {:d}/{:d}, average time: {:.3f}s'.format(
            i + 1, len(image_ids), _t['im_detect'].average_time))

    with open(detection_path, "wb") as file:
        pickle.dump(detection, file)
Beispiel #10
0
def test(threshold=0.2):
    model = EfficientDetBackbone(num_classes=num_objects,
                                 num_union_classes=num_union_actions,
                                 num_inst_classes=num_inst_actions,
                                 compound_coef=args.compound_coef,
                                 ratios=eval(params["anchors_ratios"]),
                                 scales=eval(params["anchors_scales"]))
    model.load_state_dict(
        torch.load(weights_path, map_location=torch.device('cpu')))
    model.requires_grad_(False)
    model.eval()

    if args.cuda:
        model = model.cuda()
    if args.float16:
        model = model.half()

    regressBoxes = BBoxTransform()
    clipBoxes = ClipBoxes()

    img_dir = os.path.join(data_dir,
                           "hico_20160224_det/images/%s" % "test2015")

    _t = {'im_detect': Timer(), 'misc': Timer()}
    detection = {}

    count = 0
    for line in glob.iglob(img_dir + '/' + '*.jpg'):
        count += 1

        _t['im_detect'].tic()
        image_id = int(line[-9:-4])

        file = "HICO_test2015_" + (str(image_id)).zfill(8) + ".jpg"

        # if file != "COCO_val2014_000000001987.jpg":
        #     continue

        dets = img_detect(file,
                          img_dir,
                          model,
                          input_size,
                          regressBoxes,
                          clipBoxes,
                          threshold=threshold)

        detection[image_id] = dets
        # detection.extend(img_detection)
        _t['im_detect'].toc()

        print('im_detect: {:d}/{:d}, average time: {:.3f}s'.format(
            count, 9658, _t['im_detect'].average_time))

    with open(detection_path, "wb") as file:
        pickle.dump(detection, file)
Beispiel #11
0
def evaluate_coco(img_path, model, threshold=0.05):
    kag_res = ["image_id,PredictionString"]
    included_extensions = ['jpg', 'jpeg', 'bmp', 'png', 'gif']
    imgs_files = [os.path.join(img_path, fn) for fn in os.listdir(img_path)
                  if any(fn.endswith(ext) for ext in included_extensions)]
    regressBoxes = BBoxTransform()
    clipBoxes = ClipBoxes()

    for img_path in tqdm(imgs_files):
        ori_imgs, framed_imgs, framed_metas = preprocess(img_path, max_size=input_sizes[compound_coef])
        x = torch.from_numpy(framed_imgs[0])

        if use_cuda:
            x = x.cuda(gpu)
            if use_float16:
                x = x.half()
            else:
                x = x.float()
        else:
            x = x.float()

        x = x.unsqueeze(0).permute(0, 3, 1, 2)
        features, regression, classification, anchors = model(x)

        preds = postprocess(x,
                            anchors, regression, classification,
                            regressBoxes, clipBoxes,
                            threshold, nms_threshold)

        if not preds:
            continue

        preds = invert_affine(framed_metas, preds)[0]

        scores = preds['scores']
        rois = preds['rois']

        if rois.shape[0] > 0:
            # x1,y1,x2,y2 -> x1,y1,w,h
            rois[:, 2] -= rois[:, 0]
            rois[:, 3] -= rois[:, 1]

            kag_res.append(f"{os.path.basename(img_path).replace('.jpg', '')},{format_prediction_string(rois, scores)}")

    if not len(kag_res):
        raise Exception('the model does not provide any valid output, check model architecture and the data input')

    # write output
    filepath = f'/kaggle/working/submission.csv'
    if os.path.exists(filepath):
        os.remove(filepath)
    with open(filepath, "w") as f:
        for line in kag_res:
            f.write(line)
            f.write("\n")
def single_img_test(img_path, input_size, model, use_cuda=True, use_float16=False):
    # tf bilinear interpolation is different from any other's, just make do
    threshold = 0.05
    iou_threshold = 0.5

    image_name = img_path.replace('\\', '/').split('/')[-1]

    ori_imgs, framed_imgs, framed_metas = preprocess(img_path, max_size=input_size)

    if use_cuda:
        x = torch.stack([torch.from_numpy(fi).cuda() for fi in framed_imgs], 0)
    else:
        x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0)

    x = x.to(torch.float32 if not use_float16 else torch.float16).permute(0, 3, 1, 2)

    with torch.no_grad():
        features, regression, classification, anchors = model(x)

        regressBoxes = BBoxTransform()
        clipBoxes = ClipBoxes()

        out = postprocess(x,
                          anchors, regression, classification,
                          regressBoxes, clipBoxes,
                          threshold, iou_threshold)

    out = invert_affine(framed_metas, out)
    # display(out, ori_imgs, imshow=False, imwrite=True)

    # print('running speed test...')
    # with torch.no_grad():
    #     print('test1: model inferring and postprocessing')
    #     print('inferring image for 10 times...')
    #     t1 = time.time()
    #     for _ in range(10):
    #         _, regression, classification, anchors = model(x)
    #
    #         out = postprocess(x,
    #                           anchors, regression, classification,
    #                           regressBoxes, clipBoxes,
    #                           threshold, iou_threshold)
    #         out = invert_affine(framed_metas, out)
    #
    #     t2 = time.time()
    #     tact_time = (t2 - t1) / 10
    #     print(f'{tact_time} seconds, {1 / tact_time} FPS, @batch_size 1')
    det_num = len(out[0]['class_ids'])
    det = []
    for i in range(det_num):
        det.append([image_name, out[0]['class_ids'][i], out[0]['scores'][i], tuple(out[0]['rois'][i])])
    return det
def evaluate_mAP(imgs, imgs_ids, framed_metas, regressions, \
                 classifications, anchors, threshold=0.05, nms_threshold=0.5):
    '''
    Inputs: Images, Image IDs, Framed Metas (Resizing stats), prredictions
    Output: results
    '''
    results = []  # This is used for storing evaluation results.
    regressBoxes = BBoxTransform()
    clipBoxes = ClipBoxes()
    preds = postprocess(imgs,
                        torch.stack([anchors[0]] * imgs.shape[0], 0).detach(),
                        regressions.detach(), classifications.detach(),
                        regressBoxes, clipBoxes, threshold, nms_threshold)

    if not preds:
        return

    preds = invert_affine(framed_metas, preds)
    for i, _ in enumerate(preds):
        scores = preds[i]['scores']
        class_ids = preds[i]['class_ids']
        rois = preds[i]['rois']

        if rois.shape[0] > 0:
            # x1,y1,x2,y2 -> x1,y1,w,h
            rois[:, 2] -= rois[:, 0]
            rois[:, 3] -= rois[:, 1]

            bbox_score = scores

            for roi_id in range(rois.shape[0]):
                score = float(bbox_score[roi_id])
                label = int(class_ids[roi_id])
                box = rois[roi_id, :]

                if score < threshold:
                    break

                image_result = {
                    'image_id': imgs_ids[i],
                    'category_id': label + 1,
                    'score': float(score),
                    'bbox': box.tolist(),
                }

                results.append(image_result)
    return results
Beispiel #14
0
    def _inference(self, data):
        """
        model inference function
        Here are a inference example of resnet, if you use another model, please modify this function
        """
        framed_imgs, framed_metas = data[self.input_image_key]
        if torch.cuda.is_available():
            x = torch.stack(
                [torch.from_numpy(fi).cuda() for fi in framed_imgs], 0)
            self.model = self.model.cuda()
        else:
            x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0)

        x = x.to(torch.float32).permute(0, 3, 1, 2)

        #if use_float16:
        #    model = model.half()

        with torch.no_grad():
            features, regression, classification, anchors = self.model(x)

            regressBoxes = BBoxTransform()
            clipBoxes = ClipBoxes()

            out = postprocess(x, anchors, regression, classification,
                              regressBoxes, clipBoxes, self.threshold,
                              self.iou_threshold)

        out = invert_affine(framed_metas, out)
        result = OrderedDict()
        result['detection_classes'] = []
        result['detection_scores'] = []
        result['detection_boxes'] = []

        for i in range(len(out)):
            if len(out[i]['rois']) == 0:
                continue
            for j in range(len(out[i]['rois'])):
                x1, y1, x2, y2 = out[i]['rois'][j].astype(np.int)
                result['detection_boxes'].append([x1, y1, x2, y2])
                obj = self.obj_list[out[i]['class_ids'][j]]
                result['detection_classes'].append(obj)
                score = float(out[i]['scores'][j])
                result['detection_scores'].append(score)

        return result
Beispiel #15
0
    def predict(self, raw_img):
        self.ori_imgs, self.framed_imgs, self.framed_metas = preprocess_raw(raw_img, max_size=self.input_size)
        if self.use_cuda:
            x = torch.stack([torch.from_numpy(fi).cuda() for fi in self.framed_imgs], 0)
        else:
            x = torch.stack([torch.from_numpy(fi) for fi in self.framed_imgs], 0)
        x = x.to(torch.float32 if not self.use_float16 else torch.float16).permute(0, 3, 1, 2)

        with torch.no_grad():
            self.features, self.regression, self.classification, self.anchors = self.model(x)

            self.regressBoxes = BBoxTransform()
            self.clipBoxes = ClipBoxes()

            out = postprocess(x,
                            self.anchors, self.regression, self.classification,
                            self.regressBoxes, self.clipBoxes,
                            self.threshold, self.iou_threshold)
            pred = invert_affine(self.framed_metas, out)
            return pred
Beispiel #16
0
def scores_loss(model, img):
    # img tensor类型,cuda
    img = img / 255
    img = img.unsqueeze(0).permute(0, 3, 1, 2)
    mean = (0.406, 0.456, 0.485)
    std = (0.225, 0.224, 0.229)
    for i in range(3):
        img[:, i, :, :] -= mean[i]
        img[:, i, :, :] /= std[i]
    x = resize(img)
    features, regression, classification, anchors = model(x)  # 推理结果
    regressBoxes = BBoxTransform()  # box转换器
    clipBoxes = ClipBoxes()  # box过滤函数
    # 检测结果的后期处理
    scores = post_YAN(x, anchors, regression, classification, regressBoxes,
                      clipBoxes, 0.25, 0.2)
    if len(scores) > 0:
        loss = torch.sum(scores)  # 取200归一化
    else:
        loss = 0.0
    return loss
Beispiel #17
0
    def _inference(self, imgs_path):
        results = []
        regressBoxes = BBoxTransform()
        clipBoxes = ClipBoxes()
        for img_path in imgs_path:
            ori_imgs, framed_imgs, framed_metas = preprocess(
                [img_path], max_size=self.input_sizes[cfg.compound_coef])
            x = torch.from_numpy(framed_imgs[0]).float()
            x = x.unsqueeze(0).permute(0, 3, 1, 2)

            features, regression, classification, anchors = self.model(x)
            preds = self._my_postprocess(x, anchors, regression,
                                         classification, regressBoxes,
                                         clipBoxes, cfg.threshold,
                                         cfg.nms_threshold)

            preds = invert_affine(framed_metas, preds)[0]
            scores = preds['scores']
            class_ids = preds['class_ids']
            rois = preds['rois']
            image_result = {
                'detection_classes': [],
                'detection_boxes': [],
                'detection_scores': []
            }
            if rois.shape[0] > 0:
                bbox_score = scores

                for roi_id in range(rois.shape[0]):
                    score = float(bbox_score[roi_id])
                    label = int(class_ids[roi_id])
                    box = rois[roi_id, :]
                    image_result['detection_classes'].append(
                        self.class_dict[label + 1])
                    image_result['detection_boxes'].append(box.tolist())
                    image_result['detection_scores'].append(score)

            results.append(image_result)

        return results
Beispiel #18
0
def detect():
    with torch.no_grad():
        t1 = time.time()

        features, regression, classification, anchors = model(x)
        # t1 = time.time()

        regressBoxes = BBoxTransform()
        clipBoxes = ClipBoxes()
        # start = timeutil.get_epochtime_ms()
        out = postprocess(x, anchors, regression, classification, regressBoxes,
                          clipBoxes, threshold, iou_threshold)

    out = invert_affine(framed_metas, out)

    c1, c2 = display(out, ori_imgs, imshow=True, imwrite=False)
    # t2 = time.time()
    # tact_time = (t2 - t1) / 10
    # print(f'{tact_time} seconds, {1 / tact_time} FPS, @batch_size 1')
    # print("Latency: %fms" % (timeutil.get_epochtime_ms() - start))

    return c1, c2
Beispiel #19
0
    def __init__(self, weightfile, score_thresh,
                 nms_thresh, is_xywh=True, use_cuda=True, use_float16=False):
        print('Loading weights from %s... Done!' % (weightfile))

        # constants
        self.score_thresh = score_thresh
        self.nms_thresh = nms_thresh
        self.use_cuda = use_cuda
        self.is_xywh = is_xywh

        compound_coef = 0
        force_input_size = None  # set None to use default size

        self.use_float16 = False
        cudnn.fastest = True
        cudnn.benchmark = True

        # tf bilinear interpolation is different from any other's, just make do
        input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536]
        self.input_size = input_sizes[compound_coef] if \
            force_input_size is None else force_input_size

        # load model
        self.model = EfficientDetBackbone(compound_coef=compound_coef,
                                          num_classes=len(self.obj_list))
        # f'weights/efficientdet-d{compound_coef}.pth'
        self.model.load_state_dict(torch.load(weightfile))
        self.model.requires_grad_(False)
        self.model.eval()

        if self.use_cuda:
            self.model = self.model.cuda()
        if self.use_float16:
            self.model = self.model.half()

        # Box
        self.regressBoxes = BBoxTransform()
        self.clipBoxes = ClipBoxes()
Beispiel #20
0
    def get_face_position(fn):
        _, fimg, meta = preprocess(fn, max_size=effdet_input_size)
        x = torch.from_numpy(fimg[0]).float().unsqueeze(0)
        x = x.permute(0, 3, 1, 2)
        if args.cuda:
            x = x.cuda()

        with torch.no_grad():
            _, reg, clss, anchors = model(x)

            rbox = BBoxTransform()
            cbox = ClipBoxes()

            out = postprocess(x, anchors, reg, clss, rbox, cbox, \
                effdet_thr, effdet_iou_thr)
            out = invert_affine(meta, out)
        
        lst_face_bbox = []
        for i_detect in range(len(out[0]["rois"])):
            lst_face_bbox.append(
                [int(val) for val in out[0]["rois"][i_detect]]
            )
        return lst_face_bbox
def eval(pretrained_weights: Path, inputs_splitted_into_lists: list,
         compound_coef: int, use_cuda: bool) -> list:
    threshold = 0.2
    iou_threshold = 0.2
    # replace this part with your project's anchor config
    anchor_ratios = [(1.0, 1.0), (1.4, 0.7), (0.7, 1.4)]
    anchor_scales = [2**0, 2**(1.0 / 3.0), 2**(2.0 / 3.0)]

    model = EfficientDetBackbone(compound_coef=compound_coef,
                                 num_classes=1,
                                 ratios=anchor_ratios,
                                 scales=anchor_scales)
    model.load_state_dict(torch.load(pretrained_weights, map_location='cpu'))
    model.requires_grad_(False)
    model.eval()

    if use_cuda:
        model = model.cuda()
    if use_float16:
        model = model.half()

    predictions = []

    for inputs_split in inputs_splitted_into_lists:
        with torch.no_grad():
            features, regression, classification, anchors = model(inputs_split)

            regressBoxes = BBoxTransform()
            clipBoxes = ClipBoxes()

            out = postprocess(inputs_split, anchors, regression,
                              classification, regressBoxes, clipBoxes,
                              threshold, iou_threshold)

            predictions += out

    return predictions
def predict_fn(data, model):
    """mostly copied from
    https://github.com/zylo117/Yet-Another-EfficientDet-Pytorch/blob/master/efficientdet_test.py

    Args:
        data: tuple of inputs generated by custom input_fn above
        model: PyTorch model loaded in memory by model_fn

    Returns: a prediction

    """
    ori_imgs, framed_imgs, framed_metas, threshold, iou_threshold = data
    x = torch.stack([
        torch.from_numpy(fi).cuda() if USE_CUDA else torch.from_numpy(fi)
        for fi in framed_imgs
    ], 0)

    x = x.to(torch.float32 if not USE_FLOAT16 else torch.float16).permute(
        0, 3, 1, 2)

    with torch.no_grad():
        features, regression, classification, anchors = model(x)

        regress_boxes = BBoxTransform()
        clip_boxes = ClipBoxes()

        out = postprocess(x,
                          anchors=anchors,
                          regression=regression,
                          classification=classification,
                          regressBoxes=regress_boxes,
                          clipBoxes=clip_boxes,
                          threshold=threshold,
                          iou_threshold=iou_threshold)
        out = invert_affine(framed_metas, out)
        return out
def evaluate_coco(img_path, set_name, image_ids, coco, model, threshold=0.05):
    results = []

    regressBoxes = BBoxTransform()
    clipBoxes = ClipBoxes()

    for image_id in tqdm(image_ids):
        image_info = coco.loadImgs(image_id)[0]
        image_path = img_path + '/' + image_info['file_name']
        ori_imgs, framed_imgs, framed_metas = preprocess(
            image_path, max_size=input_sizes[compound_coef])
        x = torch.from_numpy(framed_imgs[0])

        if use_cuda:
            x = x.cuda(gpu)
            if use_float16:
                x = x.half()
            else:
                x = x.float()
        else:
            x = x.float()

        x = x.unsqueeze(0).permute(0, 3, 1, 2)
        features, regression, classification, anchors = model(x)

        preds = postprocess(x, anchors, regression, classification,
                            regressBoxes, clipBoxes, threshold, nms_threshold)

        if not preds:
            continue

        preds = invert_affine(framed_metas, preds)[0]

        scores = preds['scores']
        class_ids = preds['class_ids']
        rois = preds['rois']

        if rois.shape[0] > 0:
            # x1,y1,x2,y2 -> x1,y1,w,h
            rois[:, 2] -= rois[:, 0]
            rois[:, 3] -= rois[:, 1]

            bbox_score = scores
            for roi_id in range(rois.shape[0]):
                score = float(bbox_score[roi_id])
                label = int(class_ids[roi_id])
                box = rois[roi_id, :]

                image_result = {
                    'image_id': image_id,
                    'category_id': label + 1,
                    'score': float(score),
                    'bbox': box.tolist(),
                }

                results.append(image_result)

    if not len(results):
        raise Exception(
            'the model does not provide any valid output, check model architecture and the data input'
        )

    # write output
    filepath = f'{set_name}_bbox_results.json'
    if os.path.exists(filepath):
        os.remove(filepath)
    json.dump(results, open(filepath, 'w'), indent=4)
model = EfficientDetBackbone(compound_coef=compound_coef, num_classes=len(obj_list),
                             ratios=anchor_ratios, scales=anchor_scales)
model.load_state_dict(torch.load(f'weights/efficientdet-d{compound_coef}.pth'))
model.requires_grad_(False)
model.eval()

if use_cuda:
    model = model.cuda()
if use_float16:
    model = model.half()

with torch.no_grad():
    features, regression, classification, anchors = model(x)

    regressBoxes = BBoxTransform()
    clipBoxes = ClipBoxes()

    out = postprocess(x,
                      anchors, regression, classification,
                      regressBoxes, clipBoxes,
                      threshold, iou_threshold)


def display(preds, imgs, imshow=True, imwrite=False):
    for i in range(len(imgs)):
        if len(preds[i]['rois']) == 0:
            continue

        for j in range(len(preds[i]['rois'])):
            (x1, y1, x2, y2) = preds[i]['rois'][j].astype(np.int)
            cv2.rectangle(imgs[i], (x1, y1), (x2, y2), (255, 255, 0), 2)
Beispiel #25
0
def train(opt):
    params = Params(f'projects/{opt.project}_crop.yml')

    if params.num_gpus == 0:
        os.environ['CUDA_VISIBLE_DEVICES'] = '1-'

    if torch.cuda.is_available():
        torch.cuda.manual_seed(42)
    else:
        torch.manual_seed(42)

    save_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
    opt.saved_path = opt.saved_path + f'/{params.project_name}/crop/weights/{save_time}'
    opt.log_path = opt.log_path + f'/{params.project_name}/crop/tensorboard/'
    os.makedirs(opt.log_path, exist_ok=True)
    os.makedirs(opt.saved_path, exist_ok=True)
    print('save_path :', opt.saved_path)
    print('log_path :', opt.log_path)

    training_params = {
        'batch_size': opt.batch_size,
        'shuffle': True,
        'drop_last': True,
        'collate_fn': collater,
        'num_workers': opt.num_workers
    }

    val_params = {
        'batch_size': opt.batch_size,
        'shuffle': False,
        'drop_last': True,
        'collate_fn': collater,
        'num_workers': opt.num_workers
    }

    input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536, 1536]
    training_set = Project42Dataset(root_dir=os.path.join(
        opt.data_path, params.project_name, 'crop'),
                                    set=params.train_set,
                                    params=params,
                                    transform=transforms.Compose([
                                        Normalizer(mean=params.mean,
                                                   std=params.std),
                                        Augmenter(),
                                        Resizer(input_sizes[opt.compound_coef])
                                    ]))
    training_generator = DataLoader(training_set, **training_params)

    val_set = Project42Dataset(root_dir=os.path.join(opt.data_path,
                                                     params.project_name,
                                                     'crop'),
                               set=params.val_set,
                               params=params,
                               transform=transforms.Compose([
                                   Normalizer(mean=params.mean,
                                              std=params.std),
                                   Resizer(input_sizes[opt.compound_coef])
                               ]))
    val_generator = DataLoader(val_set, **val_params)

    # labels
    labels = training_set.labels
    print('label:', labels)

    model = EfficientDetBackbone(num_classes=len(params.obj_list),
                                 compound_coef=opt.compound_coef,
                                 ratios=eval(params.anchors_ratios),
                                 scales=eval(params.anchors_scales))

    # load last weights
    if opt.load_weights is not None:
        if opt.load_weights.endswith('.pth'):
            weights_path = opt.load_weights
        else:
            weights_path = get_last_weights(opt.saved_path)
        try:
            last_step = int(
                os.path.basename(weights_path).split('_')[-1].split('.')[0])
        except:
            last_step = 0

        try:
            ret = model.load_state_dict(torch.load(weights_path), strict=False)
        except RuntimeError as e:
            print(f'[Warning] Ignoring {e}')
            print(
                '[Warning] Don\'t panic if you see this, this might be because you load a pretrained weights with different number of classes. The rest of the weights should be loaded already.'
            )

        print(
            f'[Info] loaded weights: {os.path.basename(weights_path)}, resuming checkpoint from step: {last_step}'
        )
    else:
        last_step = 0
        print('[Info] initializing weights...')
        init_weights(model)

    # freeze backbone if train head_only
    if opt.head_only:

        def freeze_backbone(m):
            classname = m.__class__.__name__
            for ntl in ['EfficientNet', 'BiFPN']:
                if ntl in classname:
                    for param in m.parameters():
                        param.requires_grad = False

        model.apply(freeze_backbone)
        print('[Info] freezed backbone')

    # https://github.com/vacancy/Synchronized-BatchNorm-PyTorch
    # apply sync_bn when using multiple gpu and batch_size per gpu is lower than 4
    #  useful when gpu memory is limited.
    # because when bn is disable, the training will be very unstable or slow to converge,
    # apply sync_bn can solve it,
    # by packing all mini-batch across all gpus as one batch and normalize, then send it back to all gpus.
    # but it would also slow down the training by a little bit.
    if params.num_gpus > 1 and opt.batch_size // params.num_gpus < 4:
        model.apply(replace_w_sync_bn)
        use_sync_bn = True
    else:
        use_sync_bn = False

    writer = SummaryWriter(opt.log_path + f'/{save_time}/')

    # warp the model with loss function, to reduce the memory usage on gpu0 and speedup
    model = ModelWithLoss(model, debug=opt.debug)

    if params.num_gpus > 0:
        model = model.cuda()
        if params.num_gpus > 1:
            model = CustomDataParallel(model, params.num_gpus)
            if use_sync_bn:
                patch_replication_callback(model)

    if opt.optim == 'adamw':
        optimizer = torch.optim.AdamW(model.parameters(), opt.lr)
    else:
        optimizer = torch.optim.SGD(model.parameters(),
                                    opt.lr,
                                    momentum=0.9,
                                    nesterov=True)

    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                           patience=3,
                                                           verbose=True)

    epoch = 0
    best_loss = 1e5
    best_epoch = 0
    step = max(0, last_step)
    model.train()

    num_iter_per_epoch = len(training_generator)

    try:
        for epoch in range(opt.num_epochs):
            last_epoch = step // num_iter_per_epoch
            if epoch < last_epoch:
                continue

            epoch_loss = []
            progress_bar = tqdm(training_generator)
            for iter, data in enumerate(progress_bar):
                if iter < step - last_epoch * num_iter_per_epoch:
                    progress_bar.update()
                    continue
                try:
                    imgs = data['img']
                    annot = data['annot']

                    ## train image show
                    # for idx in range(len(imgs)):
                    #     showshow = imgs[idx].numpy()
                    #     print(showshow.shape)
                    #     showshow = showshow.transpose(1, 2, 0)
                    #     a = annot[idx].numpy().reshape(5, )
                    #     img_show = cv2.rectangle(showshow, (a[0],a[1]), (a[2],a[3]), (0, 0, 0), 3)
                    #     cv2.imshow(f'{idx}_{params.obj_list[int(a[4])]}', img_show)
                    #     cv2.waitKey(1000)
                    #     cv2.destroyAllWindows()

                    if params.num_gpus == 1:
                        # if only one gpu, just send it to cuda:0
                        # elif multiple gpus, send it to multiple gpus in CustomDataParallel, not here
                        imgs = imgs.cuda()
                        annot = annot.cuda()

                    optimizer.zero_grad()
                    cls_loss, reg_loss, regression, classification, anchors = model(
                        imgs, annot, obj_list=params.obj_list)

                    cls_loss = cls_loss.mean()
                    reg_loss = reg_loss.mean()

                    loss = cls_loss + reg_loss
                    if loss == 0 or not torch.isfinite(loss):
                        continue

                    loss.backward()
                    # torch.nn.utils.clip_grad_norm_(model.parameters(), 0.1)
                    optimizer.step()

                    # loss
                    epoch_loss.append(float(loss))

                    # mAP
                    threshold = 0.2
                    iou_threshold = 0.2

                    regressBoxes = BBoxTransform()
                    clipBoxes = ClipBoxes()

                    out = postprocess(imgs, anchors, regression,
                                      classification, regressBoxes, clipBoxes,
                                      threshold, iou_threshold)

                    mAP = mAP_score(annot, out, labels)
                    mAP = mAP.results['mAP']

                    progress_bar.set_description(
                        'Step: {}. Epoch: {}/{}. Iteration: {}/{}. Cls loss: {:.5f}. Reg loss: {:.5f}. Total loss: {:.5f}. mAP: {:.2f}'
                        .format(step, epoch + 1, opt.num_epochs, iter + 1,
                                num_iter_per_epoch, cls_loss.item(),
                                reg_loss.item(), loss.item(), mAP))

                    writer.add_scalars('Loss', {'train': loss}, step)
                    writer.add_scalars('Regression_loss', {'train': reg_loss},
                                       step)
                    writer.add_scalars('Classfication_loss',
                                       {'train': cls_loss}, step)
                    writer.add_scalars('mAP', {'train': mAP}, step)

                    # log learning_rate
                    current_lr = optimizer.param_groups[0]['lr']
                    writer.add_scalar('learning_rate', current_lr, step)

                    step += 1

                    if step % opt.save_interval == 0 and step > 0:
                        save_checkpoint(
                            model,
                            f'efficientdet-d{opt.compound_coef}_{epoch}.pth')
                        print('checkpoint...')

                except Exception as e:
                    print('[Error]', traceback.format_exc())
                    print(e)
                    continue
            scheduler.step(np.mean(epoch_loss))

            if epoch % opt.val_interval == 0:
                model.eval()
                loss_regression_ls = []
                loss_classification_ls = []

                for iter, data in enumerate(val_generator):
                    with torch.no_grad():
                        imgs = data['img']
                        annot = data['annot']

                        if params.num_gpus == 1:
                            imgs = imgs.cuda()
                            annot = annot.cuda()

                        cls_loss, reg_loss, regression, classification, anchors = model(
                            imgs, annot, obj_list=params.obj_list)
                        cls_loss = cls_loss.mean()
                        reg_loss = reg_loss.mean()

                        loss = cls_loss + reg_loss
                        if loss == 0 or not torch.isfinite(loss):
                            continue

                        loss_classification_ls.append(cls_loss.item())
                        loss_regression_ls.append(reg_loss.item())

                cls_loss = np.mean(loss_classification_ls)
                reg_loss = np.mean(loss_regression_ls)
                loss = cls_loss + reg_loss

                # mAP
                threshold = 0.2
                iou_threshold = 0.2

                regressBoxes = BBoxTransform()
                clipBoxes = ClipBoxes()

                out = postprocess(imgs, anchors, regression, classification,
                                  regressBoxes, clipBoxes, threshold,
                                  iou_threshold)

                mAP = mAP_score(annot, out, labels)
                mAP = mAP.results['mAP']

                print(
                    'Val. Epoch: {}/{}. Classification loss: {:1.5f}. Regression loss: {:1.5f}. Total loss: {:1.5f}. mAP: {:.2f}'
                    .format(epoch + 1, opt.num_epochs, cls_loss, reg_loss,
                            loss, mAP))
                writer.add_scalars('Loss', {'val': loss}, step)
                writer.add_scalars('Regression_loss', {'val': reg_loss}, step)
                writer.add_scalars('Classfication_loss', {'val': cls_loss},
                                   step)
                writer.add_scalars('mAP', {'val': mAP}, step)

                if loss + opt.es_min_delta < best_loss:
                    best_loss = loss
                    best_epoch = epoch

                    save_checkpoint(
                        model,
                        f'efficientdet-d{opt.compound_coef}_{epoch}_{step}.pth'
                    )

                model.train()

                # Early stopping
                if epoch - best_epoch > opt.es_patience > 0:
                    print(
                        '[Info] Stop training at epoch {}. The lowest loss achieved is {}'
                        .format(epoch, best_loss))
                    break
    except KeyboardInterrupt:
        save_checkpoint(
            model, f'efficientdet-d{opt.compound_coef}_{epoch}_{step}.pth')
        writer.close()
    writer.close()
Beispiel #26
0
    def forward(self, classifications, regressions, anchors, annotations,
                **kwargs):
        alpha = 0.25
        gamma = 2.0
        batch_size = classifications.shape[0]
        classification_losses = []
        regression_losses = []

        anchor = anchors[
            0, :, :]  # assuming all image sizes are the same, which it is
        dtype = anchors.dtype

        anchor_widths = anchor[:, 3] - anchor[:, 1]
        anchor_heights = anchor[:, 2] - anchor[:, 0]
        anchor_ctr_x = anchor[:, 1] + 0.5 * anchor_widths
        anchor_ctr_y = anchor[:, 0] + 0.5 * anchor_heights

        for j in range(batch_size):

            classification = classifications[j, :, :]
            regression = regressions[j, :, :]

            bbox_annotation = annotations[j]
            bbox_annotation = bbox_annotation[bbox_annotation[:, 4] != -1]

            classification = torch.clamp(classification, 1e-4, 1.0 - 1e-4)

            if bbox_annotation.shape[0] == 0:
                if torch.cuda.is_available():

                    alpha_factor = torch.ones_like(classification) * alpha
                    alpha_factor = alpha_factor.cuda(self.gpu_id)
                    alpha_factor = 1. - alpha_factor
                    focal_weight = classification
                    focal_weight = alpha_factor * torch.pow(
                        focal_weight, gamma)

                    bce = -(torch.log(1.0 - classification))

                    cls_loss = focal_weight * bce

                    regression_losses.append(
                        torch.tensor(0).to(dtype).cuda(self.gpu_id))
                    classification_losses.append(cls_loss.sum())
                else:

                    alpha_factor = torch.ones_like(classification) * alpha
                    alpha_factor = 1. - alpha_factor
                    focal_weight = classification
                    focal_weight = alpha_factor * torch.pow(
                        focal_weight, gamma)

                    bce = -(torch.log(1.0 - classification))

                    cls_loss = focal_weight * bce

                    regression_losses.append(torch.tensor(0).to(dtype))
                    classification_losses.append(cls_loss.sum())

                continue

            IoU = calc_iou(anchor[:, :], bbox_annotation[:, :4])

            IoU_max, IoU_argmax = torch.max(IoU, dim=1)

            # compute the loss for classification
            targets = torch.ones_like(classification) * -1
            if torch.cuda.is_available():
                targets = targets.cuda(self.gpu_id)

            targets[torch.lt(IoU_max, 0.4), :] = 0

            positive_indices = torch.ge(IoU_max, 0.5)

            num_positive_anchors = positive_indices.sum()

            assigned_annotations = bbox_annotation[IoU_argmax, :]

            targets[positive_indices, :] = 0
            targets[positive_indices, assigned_annotations[positive_indices,
                                                           4].long()] = 1

            alpha_factor = torch.ones_like(targets) * alpha
            if torch.cuda.is_available():
                alpha_factor = alpha_factor.cuda(self.gpu_id)

            alpha_factor = torch.where(torch.eq(targets, 1.), alpha_factor,
                                       1. - alpha_factor)
            focal_weight = torch.where(torch.eq(targets, 1.),
                                       1. - classification, classification)
            focal_weight = alpha_factor * torch.pow(focal_weight, gamma)

            bce = -(targets * torch.log(classification) +
                    (1.0 - targets) * torch.log(1.0 - classification))

            cls_loss = focal_weight * bce

            zeros = torch.zeros_like(cls_loss)
            if torch.cuda.is_available():
                zeros = zeros.cuda(self.gpu_id)
            cls_loss = torch.where(torch.ne(targets, -1.0), cls_loss, zeros)

            classification_losses.append(
                cls_loss.sum() /
                torch.clamp(num_positive_anchors.to(dtype), min=1.0))

            if positive_indices.sum() > 0:
                assigned_annotations = assigned_annotations[
                    positive_indices, :]

                anchor_widths_pi = anchor_widths[positive_indices]
                anchor_heights_pi = anchor_heights[positive_indices]
                anchor_ctr_x_pi = anchor_ctr_x[positive_indices]
                anchor_ctr_y_pi = anchor_ctr_y[positive_indices]

                gt_widths = assigned_annotations[:,
                                                 2] - assigned_annotations[:,
                                                                           0]
                gt_heights = assigned_annotations[:,
                                                  3] - assigned_annotations[:,
                                                                            1]
                gt_ctr_x = assigned_annotations[:, 0] + 0.5 * gt_widths
                gt_ctr_y = assigned_annotations[:, 1] + 0.5 * gt_heights

                # efficientdet style
                gt_widths = torch.clamp(gt_widths, min=1)
                gt_heights = torch.clamp(gt_heights, min=1)

                targets_dx = (gt_ctr_x - anchor_ctr_x_pi) / anchor_widths_pi
                targets_dy = (gt_ctr_y - anchor_ctr_y_pi) / anchor_heights_pi
                targets_dw = torch.log(gt_widths / anchor_widths_pi)
                targets_dh = torch.log(gt_heights / anchor_heights_pi)

                targets = torch.stack(
                    (targets_dy, targets_dx, targets_dh, targets_dw))
                targets = targets.t()

                regression_diff = torch.abs(targets -
                                            regression[positive_indices, :])

                regression_loss = torch.where(
                    torch.le(regression_diff, 1.0 / 9.0),
                    0.5 * 9.0 * torch.pow(regression_diff, 2),
                    regression_diff - 0.5 / 9.0)
                regression_losses.append(regression_loss.mean())
            else:
                if torch.cuda.is_available():
                    regression_losses.append(
                        torch.tensor(0).to(dtype).cuda(self.gpu_id))
                else:
                    regression_losses.append(torch.tensor(0).to(dtype))

        # debug
        imgs = kwargs.get('imgs', None)
        if imgs is not None:
            regressBoxes = BBoxTransform()
            clipBoxes = ClipBoxes()
            obj_list = kwargs.get('obj_list', None)
            out = postprocess(
                imgs.detach(),
                torch.stack([anchors[0]] * imgs.shape[0], 0).detach(),
                regressions.detach(), classifications.detach(), regressBoxes,
                clipBoxes, 0.5, 0.3)
            imgs = imgs.permute(0, 2, 3, 1).cpu().numpy()
            imgs = ((imgs * [0.229, 0.224, 0.225] + [0.485, 0.456, 0.406]) *
                    255).astype(np.uint8)
            imgs = [cv2.cvtColor(img, cv2.COLOR_RGB2BGR) for img in imgs]
            display(out, imgs, obj_list, imshow=False, imwrite=True)

        return torch.stack(classification_losses).mean(dim=0, keepdim=True), \
               torch.stack(regression_losses).mean(dim=0, keepdim=True) * 50  # https://github.com/google/automl/blob/6fdd1de778408625c1faf368a327fe36ecd41bf7/efficientdet/hparams_config.py#L233
Beispiel #27
0
def evaluate_coco_show_res_jss(img_path,
                               set_name,
                               image_ids,
                               coco,
                               model,
                               threshold=0.05):
    results = []

    regressBoxes = BBoxTransform()
    clipBoxes = ClipBoxes()
    count = 0
    for image_id in tqdm(image_ids):
        count = count + 1
        if count > 21:
            break
        image_info = coco.loadImgs(image_id)[0]
        image_path = img_path + image_info['file_name']
        print('image path:', image_path)

        ori_imgs, framed_imgs, framed_metas = preprocess(
            image_path, max_size=input_sizes[compound_coef])
        x = torch.from_numpy(framed_imgs[0])

        if use_cuda:
            x = x.cuda(gpu)
            if use_float16:
                x = x.half()
            else:
                x = x.float()
        else:
            x = x.float()

        x = x.unsqueeze(0).permute(0, 3, 1, 2)
        features, regression, classification, anchors = model(x)

        preds = postprocess(x, anchors, regression, classification,
                            regressBoxes, clipBoxes, threshold, nms_threshold)

        if not preds:
            continue

        preds = invert_affine(framed_metas, preds)[0]

        scores = preds['scores']
        class_ids = preds['class_ids']
        rois = preds['rois']

        if rois.shape[0] > 0:
            # x1,y1,x2,y2 -> x1,y1,w,h
            rois[:, 2] -= rois[:, 0]
            rois[:, 3] -= rois[:, 1]

            bbox_score = scores

            for roi_id in range(rois.shape[0]):
                score = float(bbox_score[roi_id])
                label = int(class_ids[roi_id])
                box = rois[roi_id, :]

                image_result = {
                    'image_id': image_id,
                    'category_id': label + 1,
                    'score': float(score),
                    'bbox': box.tolist(),
                }
                score = float(score)
                category_id = label + 1
                box = box.tolist()
                # print('box:',box)
                xmin, ymin, w, h, score = int(box[0]), int(box[1]), int(
                    box[2]), int(box[3]), score
                if score > 0.2:
                    cv2.rectangle(ori_imgs[0], (xmin, ymin),
                                  (xmin + w, ymin + h), (0, 255, 0), 6)
                    cv2.putText(ori_imgs[0],
                                '{}:{:.2f}'.format(category_id,
                                                   score), (xmin, ymin),
                                cv2.FONT_HERSHEY_SIMPLEX, 4.0, (0, 255, 0), 6)
                results.append(image_result)
        cv2.imwrite(
            './test_result/zhongchui_d3_epoch200_1124/' + 'tmp' +
            '{}'.format(count) + '.jpeg', ori_imgs[0])

    if not len(results):
        raise Exception(
            'the model does not provide any valid output, check model architecture and the data input'
        )

    # write output
    # filepath = f'{set_name}_bbox_results.json'
    filepath = det_save_json
    if os.path.exists(filepath):
        os.remove(filepath)
    json.dump(results, open(filepath, 'w'), indent=4)
Beispiel #28
0
def excuteModel(videoname):
    # Video's path
    # set int to use webcam, set str to read from a video file

    if videoname is not None:
        video_src = os.path.join(r'D:\GitHub\Detection\server\uploads', f"{videoname}.mp4")
    else:
        video_src = 'D:\\GitHub\\Detection\\server\AImodel\\videotest\\default.mp4'

    compound_coef = 2
    trained_weights = 'D:\\GitHub\\Detection\\server\\AImodel\\weights\\efficientdet-video.pth'

    force_input_size = None  # set None to use default size

    threshold = 0.2
    iou_threshold = 0.2

    use_cuda = True
    use_float16 = False
    cudnn.fastest = True
    cudnn.benchmark = True

    obj_list = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
                'fire hydrant', '', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep',
                'cow', 'elephant', 'bear', 'zebra', 'giraffe', '', 'backpack', 'umbrella', '', '', 'handbag', 'tie',
                'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove',
                'skateboard', 'surfboard', 'tennis racket', 'bottle', '', 'wine glass', 'cup', 'fork', 'knife', 'spoon',
                'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut',
                'cake', 'chair', 'couch', 'potted plant', 'bed', '', 'dining table', '', '', 'toilet', '', 'tv',
                'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink',
                'refrigerator', '', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier',
                'toothbrush']

    # tf bilinear interpolation is different from any other's, just make do
    input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536]
    input_size = input_sizes[compound_coef] if force_input_size is None else force_input_size

    # load model
    model = EfficientDetBackbone(
        compound_coef=compound_coef, num_classes=len(obj_list))
    model.load_state_dict(torch.load(trained_weights))

    model.requires_grad_(False)
    model.eval()

    if use_cuda:
        model = model.cuda()
    if use_float16:
        model = model.half()

    # function for display

    # Box
    regressBoxes = BBoxTransform()
    clipBoxes = ClipBoxes()

    # Video capture
    cap = cv2.VideoCapture(video_src)
    length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

    writer = None
    # try to determine the total number of frames in the video file
    try:
        prop = cv2.cv.CV_CAP_PROP_FRAME_COUNT if imutils.is_cv2() \
            else cv2.CAP_PROP_FRAME_COUNT
        total = int(vs.get(prop))
        print("[INFO] {} total frames in video".format(total))

    # an error occurred while trying to determine the total
    # number of frames in the video file
    except:
        print("[INFO] could not determine # of frames in video")
        total = -1

    path_out = os.path.join(os.path.dirname(
        os.path.abspath(__file__)), 'outvideo')

    path_result = r"D:\GitHub\Detection\server\AImodel\videotest\default.mp4"
    path_asset = r"D:\GitHub\Detection\client\src\assets"
    for i in range(0, length):
        ret, frame = cap.read()
        if not ret:
            break

        # frame preprocessing
        ori_imgs, framed_imgs, framed_metas = preprocess_video(
            frame, max_size=input_size)

        if use_cuda:
            x = torch.stack([torch.from_numpy(fi).cuda()
                             for fi in framed_imgs], 0)
        else:
            x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0)

        x = x.to(torch.float32 if not use_float16 else torch.float16).permute(
            0, 3, 1, 2)

        # model predict
        with torch.no_grad():
            features, regression, classification, anchors = model(x)

            out = postprocess(x,
                              anchors, regression, classification,
                              regressBoxes, clipBoxes,
                              threshold, iou_threshold)

        # result
        out = invert_affine(framed_metas, out)
        img_show = display(out, ori_imgs, obj_list)

        if writer is None:

            # initialize our video writer
            fourcc = 0x00000021
            #fourcc = cv2.VideoWriter_fourcc(*'mp4v')
            if videoname is not None:
                path_result = os.path.join(path_out, f"{videoname}.mp4")
            else:
                path_result = os.path.join(path_out, "default.mp4")

            writer = cv2.VideoWriter(path_result, fourcc, 30, (img_show.shape[1], img_show.shape[0]), True)


        # write the output frame to disk
        writer.write(img_show)
        print("Processing data... " + str(round((i+1)/length, 3)*100) + " %")
        # show frame by frame
        #cv2.imshow('frame', img_show)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    print("[INFO] cleaning up...")

    writer.release()
    cap.release()
    cv2.destroyAllWindows()

    if videoname is not None:
        path_asset = os.path.join(path_asset, f"{videoname}.mp4")
    else:
        path_asset = os.path.join(path_asset, "default.mp4")
    copyfile(path_result, path_asset)
    return path_asset
    def forward(self, classifications, regressions, anchors, annotations,
                **kwargs):
        alpha = 0.25
        gamma = 2.0
        batch_size = classifications.shape[0]
        classification_losses = []
        regression_losses = []

        anchor = anchors[
            0, :, :]  # assuming all image sizes are the same, which it is
        dtype = anchors.dtype

        anchor_widths = anchor[:, 3] - anchor[:, 1]
        anchor_heights = anchor[:, 2] - anchor[:, 0]
        anchor_ctr_x = anchor[:, 1] + 0.5 * anchor_widths
        anchor_ctr_y = anchor[:, 0] + 0.5 * anchor_heights

        for j in range(batch_size):

            classification = classifications[j, :, :]
            regression = regressions[j, :, :]

            bbox_annotation = annotations[j]
            bbox_annotation = bbox_annotation[bbox_annotation[:, 4] != -1]

            if bbox_annotation.shape[0] == 0:
                if torch.cuda.is_available():

                    alpha_factor = torch.ones_like(classification) * alpha
                    alpha_factor = alpha_factor.cuda()
                    alpha_factor = 1. - alpha_factor
                    focal_weight = classification
                    focal_weight = alpha_factor * torch.pow(
                        focal_weight, gamma)

                    bce = -(torch.log(1.0 - classification))

                    cls_loss = focal_weight * bce

                    regression_losses.append(torch.tensor(0).to(dtype).cuda())
                    classification_losses.append(cls_loss.sum())
                else:

                    alpha_factor = torch.ones_like(classification) * alpha
                    alpha_factor = 1. - alpha_factor
                    focal_weight = classification
                    focal_weight = alpha_factor * torch.pow(
                        focal_weight, gamma)

                    bce = -(torch.log(1.0 - classification))

                    cls_loss = focal_weight * bce

                    regression_losses.append(torch.tensor(0).to(dtype))
                    classification_losses.append(cls_loss.sum())

                continue

            IoU = calc_iou(anchor[:, :], bbox_annotation[:, :4])

            IoU_max, IoU_argmax = torch.max(IoU, dim=1)

            # compute the loss for classification
            targets = torch.ones_like(classification) * -1
            if torch.cuda.is_available():
                targets = targets.cuda()

            targets[torch.lt(IoU_max, 0.4), :] = 0

            positive_indices = torch.ge(IoU_max, 0.5)

            num_positive_anchors = positive_indices.sum()

            assigned_annotations = bbox_annotation[IoU_argmax, :]

            targets[positive_indices, :] = 0
            targets[positive_indices, assigned_annotations[positive_indices,
                                                           4].long()] = 1

            alpha_factor = torch.ones_like(targets) * alpha
            if torch.cuda.is_available():
                alpha_factor = alpha_factor.cuda()

            alpha_factor = torch.where(torch.eq(targets, 1.), alpha_factor,
                                       1. - alpha_factor)
            focal_weight = torch.where(torch.eq(targets, 1.),
                                       1. - classification, classification)
            focal_weight = alpha_factor * torch.pow(focal_weight, gamma)

            bce = -(targets * torch.log(classification) +
                    (1.0 - targets) * torch.log(1.0 - classification))

            cls_loss = focal_weight * bce

            zeros = torch.zeros_like(cls_loss)
            if torch.cuda.is_available():
                zeros = zeros.cuda()
            cls_loss = torch.where(torch.ne(targets, -1.0), cls_loss, zeros)

            classification_losses.append(
                cls_loss.sum() /
                torch.clamp(num_positive_anchors.to(dtype), min=1.0))

            if positive_indices.sum() > 0:
                assigned_annotations = assigned_annotations[
                    positive_indices, :]

                anchor_widths_pi = anchor_widths[positive_indices]
                anchor_heights_pi = anchor_heights[positive_indices]
                anchor_ctr_x_pi = anchor_ctr_x[positive_indices]
                anchor_ctr_y_pi = anchor_ctr_y[positive_indices]

                gt_widths = assigned_annotations[:,
                                                 2] - assigned_annotations[:,
                                                                           0]
                gt_heights = assigned_annotations[:,
                                                  3] - assigned_annotations[:,
                                                                            1]
                gt_ctr_x = assigned_annotations[:, 0] + 0.5 * gt_widths
                gt_ctr_y = assigned_annotations[:, 1] + 0.5 * gt_heights

                # efficientdet style
                gt_widths = torch.clamp(gt_widths, min=1)
                gt_heights = torch.clamp(gt_heights, min=1)

                targets_dx = (gt_ctr_x - anchor_ctr_x_pi) / anchor_widths_pi
                targets_dy = (gt_ctr_y - anchor_ctr_y_pi) / anchor_heights_pi
                targets_dw = torch.log(gt_widths / anchor_widths_pi)
                targets_dh = torch.log(gt_heights / anchor_heights_pi)

                targets = torch.stack(
                    (targets_dy, targets_dx, targets_dh, targets_dw))
                targets = targets.t()

                regression_diff = torch.abs(targets -
                                            regression[positive_indices, :])

                regression_loss = torch.where(
                    torch.le(regression_diff, 1.0 / 9.0),
                    0.5 * 9.0 * torch.pow(regression_diff, 2),
                    regression_diff - 0.5 / 9.0)
                regression_losses.append(regression_loss.mean())
            else:
                if torch.cuda.is_available():
                    regression_losses.append(torch.tensor(0).to(dtype).cuda())
                else:
                    regression_losses.append(torch.tensor(0).to(dtype))

        # debug
        imgs = kwargs.get('imgs', None)
        if imgs is not None:
            regressBoxes = BBoxTransform()
            clipBoxes = ClipBoxes()
            obj_list = kwargs.get('obj_list', None)
            out = postprocess(
                imgs.detach(),
                torch.stack([anchors[0]] * imgs.shape[0], 0).detach(),
                regressions.detach(), classifications.detach(), regressBoxes,
                clipBoxes, 0.5, 0.3)
            imgs = imgs.permute(0, 2, 3, 1).cpu().numpy()
            imgs = ((imgs * [0.229, 0.224, 0.225] + [0.485, 0.456, 0.406]) *
                    255).astype(np.uint8)
            # imgs = [cv2.cvtColor(img, cv2.COLOR_RGB2BGR) for img in imgs]
            # Uncomment the above line if you're storing the images using opencv.

            for i, _ in enumerate(imgs):
                if len(out[i]['rois']) == 0:
                    continue

                for j in range(len(out[i]['rois'])):
                    (x1, y1, x2, y2) = out[i]['rois'][j].astype(np.int)
                    cv2.rectangle(imgs[i], (x1, y1), (x2, y2), (255, 255, 0),
                                  2)
                    obj = obj_list[out[i]['class_ids'][j]]
                    score = float(out[i]['scores'][j])

                    cv2.putText(imgs[i], '{}, {:.3f}'.format(obj, score),
                                (x1, y1 + 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5,
                                (255, 255, 0), 1)

            return torch.stack(classification_losses).mean(dim=0, keepdim=True), \
               torch.stack(regression_losses).mean(dim=0, keepdim=True), imgs

        return torch.stack(classification_losses).mean(dim=0, keepdim=True), \
               torch.stack(regression_losses).mean(dim=0, keepdim=True)
Beispiel #30
0
def detect(model, dataset, args):
    use_cuda = not args.cpu
    threshold = args.threshold
    iou_threshold = args.iou_threshold
    input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536, 1536]
    input_size = input_sizes[args.compound_coef]

    img_dir = os.path.join(dataset, dataset, 'images')
    bbox_dir = os.path.join(dataset, dataset, 'annotations', 'bboxes')
    vis_dir = os.path.join(dataset, 'det_vis')
    prepare_dirs(bbox_dir, vis_dir)

    img_paths = [os.path.join(img_dir, f) for f in os.listdir(img_dir)]
    for img_path in tqdm(img_paths):
        ori_imgs, framed_imgs, framed_metas = preprocess(img_path,
                                                         max_size=input_size)
        ori_img = ori_imgs[0]
        img_id = os.path.basename(img_path).split('.')[0]

        json_byhand = os.path.join(dataset, 'annotation_byhand',
                                   img_id + '.json')
        if os.path.exists(json_byhand):
            with open(json_byhand) as f:
                annotation_byhand = json.load(f)
                points = annotation_byhand['shapes'][0]['points']
                max_box = points[0] + points[1]
        else:
            if args.update:  # only process annotations by hand
                continue
            if use_cuda:
                x = torch.stack(
                    [torch.from_numpy(fi).cuda() for fi in framed_imgs], 0)
            else:
                x = torch.stack([torch.from_numpy(ft) for fi in framed_imgs],
                                0)

            x = x.to(torch.float32).permute(0, 3, 1, 2)

            with torch.no_grad():
                features, regression, classification, anchors = model(x)

                regressBoxes = BBoxTransform()
                clipBoxes = ClipBoxes()

                preds = postprocess(x, anchors, regression, classification,
                                    regressBoxes, clipBoxes, threshold,
                                    iou_threshold)

                pred = invert_affine(framed_metas, preds)[0]

            max_area, max_box = 0, [0, 0, ori_img.shape[1], ori_img.shape[0]]
            for det, class_id in zip(pred['rois'], pred['class_ids']):
                if not class_id == 0:
                    continue
                x1, y1, x2, y2 = det.astype(np.int)
                w, h = x2 - x1, y2 - y1
                area = w * h
                if area > max_area:
                    max_area = area
                    max_box = [x1, y1, x2, y2]

        plot_one_box(ori_img, max_box, color=[255, 0, 255], line_thickness=2)
        if args.vis:
            cv2.imwrite(os.path.join(vis_dir, img_id + '.jpg'), ori_img)

        bbox_file = os.path.join(bbox_dir, img_id + '.txt')
        with open(bbox_file, 'w') as f:
            bbox_info = ' '.join(map(str, max_box))
            f.write(bbox_info)