Exemple #1
0
    def find_heads(self, img_path: str, cfg: dict) -> []:
        file_id = utils.get_file_id(img_path)
        img, img_raw, scale = read_img(img_path)
        head_detector = Head_Detector_VGG16(ratios=[1], anchor_scales=[2, 4])
        trainer = Head_Detector_Trainer(head_detector).cuda()
        trainer.load(MODEL_PATH)
        img = at.totensor(img)
        img = img[None, :, :, :]
        img = img.cuda().float()
        st = time.time()
        pred_bboxes_, _ = head_detector.predict(img,
                                                scale,
                                                mode='evaluate',
                                                thresh=THRESH)
        et = time.time()
        tt = et - st

        result = []
        for i in range(pred_bboxes_.shape[0]):
            ymin, xmin, ymax, xmax = pred_bboxes_[i, :]
            res = [
                int(xmin / scale),
                int(ymin / scale),
                int(xmax / scale),
                int(ymax / scale)
            ]
            result.append(res)
        return result
    def processFrame(self, f):
        f = cv2.cvtColor(f, cv2.COLOR_BGR2RGB)
        img_raw = np.asarray(f, dtype=np.uint8)
        img_raw_final = img_raw.copy()
        img = np.asarray(f, dtype=np.float32)
        _, H, W = img.shape
        img = img.transpose((2, 0, 1))
        img = preprocess(img)
        _, o_H, o_W = img.shape
        scale = o_H / H
        img = at.totensor(img)
        img = img[None, :, :, :]
        img = img.cuda().float()

        pred_bboxes_, _ = self.head_detector.predict(img,
                                                     scale,
                                                     mode='evaluate',
                                                     thresh=THRESH)

        k = f.shape[0] / img.shape[2]
        for i in range(pred_bboxes_.shape[0]):
            ymin, xmin, ymax, xmax = map(lambda x: x * k, pred_bboxes_[i, :])
            utils.draw_bounding_box_on_image_array(f, ymin, xmin, ymax, xmax)
        f = cv2.cvtColor(f, cv2.COLOR_RGB2BGR)
        cv2.imwrite(f'processed_frames//{self.current_qid}.jpg', f)

        return pred_bboxes_.shape[0]
Exemple #3
0
    def forward(self, imgs, bboxs, scale):
        n, _, _ = bboxs.size()
        if n != 1:
            raise ValueError('Currently only batch size 1 is supported.')
        _, _, H, W = imgs.size()
        img_size = (H, W)
        features = self.head_detector.extractor(imgs)
        rpn_locs, rpn_scores, rois, rois_scores, anchor = self.head_detector.rpn(
            features, img_size, scale)
        bbox = bboxs[0]
        rpn_score = rpn_scores[0]
        rpn_loc = rpn_locs[0]

        # ------------------ RPN losses -------------------#
        gt_rpn_loc, gt_rpn_label = self.anchor_target_creator(
            at.tonumpy(bbox), anchor, img_size)
        gt_rpn_label = at.tovariable(gt_rpn_label).long()
        gt_rpn_loc = at.tovariable(gt_rpn_loc)
        rpn_loc_loss = head_detector_loss(rpn_loc, gt_rpn_loc,
                                          gt_rpn_label.data, self.rpn_sigma)

        rpn_cls_loss = F.cross_entropy(rpn_score,
                                       gt_rpn_label.cuda(),
                                       ignore_index=-1)
        _gt_rpn_label = gt_rpn_label[gt_rpn_label > -1]
        _rpn_score = at.tonumpy(rpn_score)[at.tonumpy(gt_rpn_label) > -1]
        self.rpn_cm.add(at.totensor(_rpn_score, False),
                        _gt_rpn_label.data.long())
        losses = [rpn_loc_loss, rpn_cls_loss]
        losses = losses + [sum(losses)]

        return LossTuple(*losses), rois, rois_scores
Exemple #4
0
    def predict(self, x, scale=1., mode='evaluate', thresh=0.01):

        if mode == 'evaluate':
            nms_thresh = 0.3
            score_thresh = thresh
        elif mode == 'visualize':
            nms_thresh = 0.3
            score_thresh = thresh

        _, _, rois, rois_scores, _ = self.forward(x, scale=scale)
        roi = at.totensor(rois)
        probabilities = at.tonumpy(F.softmax(at.tovariable(rois_scores)))
        _, _, H, W = x.size()
        size = (H, W)
        roi[:, 0::2] = (roi[:, 0::2]).clamp(min=0, max=size[0])
        roi[:, 1::2] = (roi[:, 1::2]).clamp(min=0, max=size[1])
        roi_raw = at.tonumpy(roi)
        probabilities = np.squeeze(probabilities)
        bbox, score = self._suppress(roi_raw, probabilities, nms_thresh,
                                     score_thresh)
        return bbox, score