def find_heads(self, img_path: str, cfg: dict) -> []: file_id = utils.get_file_id(img_path) img, img_raw, scale = read_img(img_path) head_detector = Head_Detector_VGG16(ratios=[1], anchor_scales=[2, 4]) trainer = Head_Detector_Trainer(head_detector).cuda() trainer.load(MODEL_PATH) img = at.totensor(img) img = img[None, :, :, :] img = img.cuda().float() st = time.time() pred_bboxes_, _ = head_detector.predict(img, scale, mode='evaluate', thresh=THRESH) et = time.time() tt = et - st result = [] for i in range(pred_bboxes_.shape[0]): ymin, xmin, ymax, xmax = pred_bboxes_[i, :] res = [ int(xmin / scale), int(ymin / scale), int(xmax / scale), int(ymax / scale) ] result.append(res) return result
def processFrame(self, f): f = cv2.cvtColor(f, cv2.COLOR_BGR2RGB) img_raw = np.asarray(f, dtype=np.uint8) img_raw_final = img_raw.copy() img = np.asarray(f, dtype=np.float32) _, H, W = img.shape img = img.transpose((2, 0, 1)) img = preprocess(img) _, o_H, o_W = img.shape scale = o_H / H img = at.totensor(img) img = img[None, :, :, :] img = img.cuda().float() pred_bboxes_, _ = self.head_detector.predict(img, scale, mode='evaluate', thresh=THRESH) k = f.shape[0] / img.shape[2] for i in range(pred_bboxes_.shape[0]): ymin, xmin, ymax, xmax = map(lambda x: x * k, pred_bboxes_[i, :]) utils.draw_bounding_box_on_image_array(f, ymin, xmin, ymax, xmax) f = cv2.cvtColor(f, cv2.COLOR_RGB2BGR) cv2.imwrite(f'processed_frames//{self.current_qid}.jpg', f) return pred_bboxes_.shape[0]
def forward(self, imgs, bboxs, scale): n, _, _ = bboxs.size() if n != 1: raise ValueError('Currently only batch size 1 is supported.') _, _, H, W = imgs.size() img_size = (H, W) features = self.head_detector.extractor(imgs) rpn_locs, rpn_scores, rois, rois_scores, anchor = self.head_detector.rpn( features, img_size, scale) bbox = bboxs[0] rpn_score = rpn_scores[0] rpn_loc = rpn_locs[0] # ------------------ RPN losses -------------------# gt_rpn_loc, gt_rpn_label = self.anchor_target_creator( at.tonumpy(bbox), anchor, img_size) gt_rpn_label = at.tovariable(gt_rpn_label).long() gt_rpn_loc = at.tovariable(gt_rpn_loc) rpn_loc_loss = head_detector_loss(rpn_loc, gt_rpn_loc, gt_rpn_label.data, self.rpn_sigma) rpn_cls_loss = F.cross_entropy(rpn_score, gt_rpn_label.cuda(), ignore_index=-1) _gt_rpn_label = gt_rpn_label[gt_rpn_label > -1] _rpn_score = at.tonumpy(rpn_score)[at.tonumpy(gt_rpn_label) > -1] self.rpn_cm.add(at.totensor(_rpn_score, False), _gt_rpn_label.data.long()) losses = [rpn_loc_loss, rpn_cls_loss] losses = losses + [sum(losses)] return LossTuple(*losses), rois, rois_scores
def predict(self, x, scale=1., mode='evaluate', thresh=0.01): if mode == 'evaluate': nms_thresh = 0.3 score_thresh = thresh elif mode == 'visualize': nms_thresh = 0.3 score_thresh = thresh _, _, rois, rois_scores, _ = self.forward(x, scale=scale) roi = at.totensor(rois) probabilities = at.tonumpy(F.softmax(at.tovariable(rois_scores))) _, _, H, W = x.size() size = (H, W) roi[:, 0::2] = (roi[:, 0::2]).clamp(min=0, max=size[0]) roi[:, 1::2] = (roi[:, 1::2]).clamp(min=0, max=size[1]) roi_raw = at.tonumpy(roi) probabilities = np.squeeze(probabilities) bbox, score = self._suppress(roi_raw, probabilities, nms_thresh, score_thresh) return bbox, score