def eval(dataloader, resnet, test_num=10000): pred_bboxes, pred_labels, pred_scores = list(), list(), list() gt_bboxes, gt_labels, gt_difficults = list(), list(), list() for ii, data in enumerate(dataloader): (imgs, sizes, gt_bboxes_, gt_labels_, gt_difficults_) = data nms_scores, sorted_labels, sorted_cls_bboxes = resnet( imgs.cuda().float()) if not (nms_scores is None): test = np.reshape(np.argwhere(nms_scores > 0.5), -1) nms_scores = nms_scores[test] sorted_labels = sorted_labels[test] sorted_cls_bboxes = sorted_cls_bboxes[test] pred_bboxes.append( np.reshape(tonumpy(sorted_cls_bboxes), (-1, 4)).copy()) pred_labels.append(np.reshape(tonumpy(sorted_labels), (-1)).copy()) pred_scores.append(np.reshape(tonumpy(nms_scores), (-1)).copy()) else: pred_bboxes.append(np.array([])) pred_labels.append(np.array([])) pred_scores.append(np.array([])) gt_bboxes += list(gt_bboxes_.numpy()) gt_labels += list(gt_labels_.numpy()) gt_difficults += list(gt_difficults_.numpy()) if ii == test_num: break result = eval_detection_voc(pred_bboxes, pred_labels, pred_scores, gt_bboxes, gt_labels, gt_difficults, use_07_metric=True) return result
def forward(self, inputs, scale=1.): if self.training: img_batch, bboxes, labels, _ = inputs else: img_batch = inputs _, _, H, W = img_batch.shape img_size = (H, W) x = self.conv1(img_batch) x = self.bn1(x) x = self.relu(x) x = self.maxpool(x) x1 = self.layer1(x) x2 = self.layer2(x1) x3 = self.layer3(x2) x4 = self.layer4(x3) #features = self.fpn([x2, x3, x4]) features = self.conv2(x4) rpn_locs, rpn_scores, rois, roi_indices, anchor = self.rpn( features, img_size, scale) if self.training: gt_rpn_loc, gt_rpn_label = self.anchor_target_creator( at.tonumpy(bboxes[0]), anchor, img_size) sample_roi, gt_roi_loc, gt_roi_label = self.proposal_target_creator( rois, at.tonumpy(bboxes[0]), at.tonumpy(labels[0]), self.loc_normalize_mean, self.loc_normalize_std) sample_roi_index = t.zeros(len(sample_roi)) roi_cls_loc, roi_score, appearance_features = self.roi_head( features, sample_roi, sample_roi_index) nms_scores, sorted_labels, sorted_cls_bboxes = self.duplicate_remover( sample_roi, roi_cls_loc, roi_score, appearance_features, img_size) if (nms_scores is None): return self.Loss(gt_rpn_loc, gt_rpn_label, gt_roi_loc, gt_roi_label, roi_cls_loc, roi_score, rpn_locs, rpn_scores) else: result_loss = self.Loss(gt_rpn_loc, gt_rpn_label, gt_roi_loc, gt_roi_label, roi_cls_loc, roi_score, rpn_locs, rpn_scores) result_loss[4] += self.nmsLoss(bboxes, labels, nms_scores, sorted_labels, sorted_cls_bboxes) return result_loss else: roi_cls_loc, roi_score, appearance_features = self.roi_head( features, rois, roi_indices) nms_scores, sorted_labels, sorted_cls_bboxes = self.duplicate_remover( rois, roi_cls_loc, roi_score, appearance_features, img_size) return nms_scores, sorted_labels, sorted_cls_bboxes
def predict(self, imgs, sizes=None, visualize=False): self.eval() prepared_imgs = imgs bboxes = list() labels = list() scores = list() for img, size in zip(prepared_imgs, sizes): img = totensor(img[None]).float() scale = img.shape[3] / size[1] roi_cls_loc, roi_scores, rois = self(img, scale=scale) # We are assuming that batch size is 1. roi_score = roi_scores.data roi_cls_loc = roi_cls_loc.data roi = totensor(rois[:,1:]) / scale # Convert predictions to bounding boxes in image coordinates. # Bounding boxes are scaled to the scale of the input images. device = torch.device("cuda:0" if torch.cuda.is_available() else "cuda:1") #torch.device代表将torch.Tensor分配到的设备的对象 mean = torch.Tensor(self.loc_normalize_mean).to(device).repeat(self.n_class)[None] std = torch.Tensor(self.loc_normalize_std).to(device).repeat(self.n_class)[None] roi_cls_loc = (roi_cls_loc * std + mean) roi_cls_loc = roi_cls_loc.view(-1, self.n_class, 4) roi = roi.view(-1, 1, 4).expand_as(roi_cls_loc) cls_bbox = loc2bbox(tonumpy(roi).reshape((-1, 4)),tonumpy(roi_cls_loc).reshape((-1, 4))) cls_bbox = totensor(cls_bbox) cls_bbox = cls_bbox.view(-1, self.n_class * 4) # clip bounding box cls_bbox[:, 0::2] = (cls_bbox[:, 0::2]).clamp(min=0, max=size[1]) cls_bbox[:, 1::2] = (cls_bbox[:, 1::2]).clamp(min=0, max=size[0]) prob = tonumpy(F.softmax(totensor(roi_score), dim=1)) raw_cls_bbox = tonumpy(cls_bbox) raw_prob = tonumpy(prob) bbox, label, score = self._suppress(raw_cls_bbox, raw_prob) bboxes.append(bbox) labels.append(label) scores.append(score) self.train() return bboxes, labels, scores
def forward(self, inputs, scale=1.): if self.training: img_batch, bboxes, labels, scale = inputs else: img_batch = inputs _, _, H, W = img_batch.shape img_size = (H, W) x = self.conv1(img_batch) x = self.bn1(x) x = self.relu(x) x = self.maxpool(x) x1 = self.layer1(x) x2 = self.layer2(x1) x3 = self.layer3(x2) x4 = self.layer4(x3) features = self.fpn([x2, x3, x4]) rpn_locs, rpn_scores, rois, roi_indices, anchor = self.rpn(features,img_size,scale) if self.training: gt_rpn_loc, gt_rpn_label = self.anchor_target_creator( at.tonumpy(bboxes[0]), anchor, img_size) sample_roi, gt_roi_loc, gt_roi_label = self.proposal_target_creator( rois, at.tonumpy(bboxes[0]), at.tonumpy(labels[0]), self.loc_normalize_mean, self.loc_normalize_std) sample_roi_index = t.zeros(len(sample_roi)) roi_cls_loc, roi_score = self.roi_head(features, sample_roi, sample_roi_index) return self.Loss(gt_rpn_loc,gt_rpn_label, gt_roi_loc, gt_roi_label,roi_cls_loc, roi_score,rpn_locs, rpn_scores) else: roi_cls_loc, roi_score = self.roi_head(features, rois, roi_indices) return roi_cls_loc,roi_score, rois, roi_indices
def forward(self, gt_bboxes, gt_labels, nms_scores, sorted_labels, sorted_cls_bboxes): sorted_score, prob_argsort = t.sort(nms_scores, descending=True) sorted_cls_bboxes = sorted_cls_bboxes[prob_argsort] sorted_labels = sorted_labels[prob_argsort] sorted_labels = tonumpy(sorted_labels) gt_labels = tonumpy(gt_labels) nms_gt = t.zeros_like(sorted_score) eps = 1e-8 iou = bbox_iou(tonumpy(gt_bboxes[0]), tonumpy(sorted_cls_bboxes)) for gt_idx in range(len(iou)): accept_iou = np.reshape(np.argwhere(iou[gt_idx] > 0.5),-1) accept_label = np.reshape(np.argwhere(sorted_labels[accept_iou] == gt_labels[0][gt_idx]),-1) if not(len(accept_label)==0): nms_gt[accept_iou[accept_label[0]]] = 1. loss = nms_gt * (sorted_score+ eps).log() + (1 - nms_gt) * (1-sorted_score + eps).log() loss = -loss.mean() return loss
def predict(self,imgs,visualize): self.use_preset(isTraining=False) if visualize: self.training=False prepared_imgs = list() sizes = list() for img in imgs: size = img.shape[1:] img = preprocess(at.tonumpy(img)) prepared_imgs.append(img) sizes.append(size) else: prepared_imgs = imgs bboxes = list() labels = list() scores = list() for img, size in zip(prepared_imgs, sizes): img = t.autograd.Variable(at.totensor(img).float()[None], volatile=True) scale = img.shape[3] / size[1] roi_cls_loc, roi_scores, rois, _ = self(img, scale=scale) # We are assuming that batch size is 1. roi_score = roi_scores.data roi_cls_loc = roi_cls_loc.data roi = at.totensor(rois) / scale # Convert predictions to bounding boxes in image coordinates. # Bounding boxes are scaled to the scale of the input images. self.n_class=21 mean = t.Tensor(self.loc_normalize_mean).cuda(). \ repeat(self.n_class)[None] std = t.Tensor(self.loc_normalize_std).cuda(). \ repeat(self.n_class)[None] roi_cls_loc = (roi_cls_loc * std + mean) roi_cls_loc = roi_cls_loc.view(-1, self.n_class, 4) roi = roi.view(-1, 1, 4).expand_as(roi_cls_loc) cls_bbox = loc2bbox(at.tonumpy(roi).reshape((-1, 4)), at.tonumpy(roi_cls_loc).reshape((-1, 4))) cls_bbox = at.totensor(cls_bbox) cls_bbox = cls_bbox.view(-1, self.n_class * 4) # clip bounding box cls_bbox[:, 0::2] = (cls_bbox[:, 0::2]).clamp(min=0, max=size[0]) cls_bbox[:, 1::2] = (cls_bbox[:, 1::2]).clamp(min=0, max=size[1]) prob = at.tonumpy(F.softmax(at.tovariable(roi_score), dim=1)) raw_cls_bbox = at.tonumpy(cls_bbox) raw_prob = at.tonumpy(prob) bbox, label, score = self._suppress(raw_cls_bbox, raw_prob) bboxes.append(bbox) labels.append(label) scores.append(score) self.use_preset('evaluate') self.train() return bboxes, labels, scores
def _suppress(self, raw_cls_bbox, raw_prob): bbox = list() label = list() score = list() # skip cls_id = 0 because it is the background class for l in range(1, self.n_class): cls_bbox_l = raw_cls_bbox.reshape((-1, self.n_class, 4))[:, l, :] prob_l = raw_prob[:, l] mask = prob_l > self.score_thresh cls_bbox_l = cls_bbox_l[mask] prob_l = prob_l[mask] keep = nms(totensor(cls_bbox_l), totensor(prob_l), self.nms_thresh) keep = tonumpy(keep) bbox.append(cls_bbox_l[keep]) # The labels are in [0, self.n_class - 2]. label.append((l - 1) * np.ones((len(keep),))) score.append(prob_l[keep]) bbox = np.concatenate(bbox, axis=0).astype(np.float32) label = np.concatenate(label, axis=0).astype(np.int32) score = np.concatenate(score, axis=0).astype(np.float32) return bbox, label, score
def forward(self, imgs, bboxes, labels, scale): n = bboxes.shape[0]#batchsize数量 if n != 1: raise ValueError('Currently only batch size 1 is supported.') _, _, H, W = imgs.shape img_size = (H, W) features = self.faster_rcnn.extractor(imgs) rpn_locs, rpn_scores, rois, anchor = self.faster_rcnn.rpn(features, img_size, scale)#rpn_locs的维度(hh*ww*9,4), #rpn_scores维度为(hh*ww*9,2), rois的维度为(2000,4),roi_indices用不到,anchor的维度为(hh*ww*9,4),H和W是经过数据预处理后的。 #计算(H/16)x(W/16)x9(大概20000)个anchor属于前景的概率,取前12000个并经过NMS得到2000个近似目标框G^的坐标。roi的维度为(2000,4) # 程序限定N=1,把批维度去掉方便操作 bbox = bboxes[0] #bbox维度(N, R, 4) label = labels[0] #labels维度为(N,R) rpn_score = rpn_scores[0] #(hh*ww*9,4) rpn_loc = rpn_locs[0] #hh*ww*9 roi = rois #(2000,4) # Sample RoIs and forward # it's fine to break the computation graph of rois, # consider them as constant input sample_roi, gt_roi_loc, gt_roi_label = self.proposal_target_creator( roi, tonumpy(bbox), tonumpy(label), self.loc_normalize_mean, self.loc_normalize_std) #因为ProposalTargetCreator的设计问题,此处需要插入一列idx sample_roi_index = np.zeros(len(sample_roi)) sample_roi = np.insert(sample_roi, 0, values=sample_roi_index, axis=1) roi_cls_loc, roi_score = self.faster_rcnn.head(features, sample_roi) # ------------------ RPN losses -------------------# gt_rpn_loc, gt_rpn_label = self.anchor_target_creator(tonumpy(bbox), anchor, img_size) gt_rpn_label = totensor(gt_rpn_label).long() gt_rpn_loc = totensor(gt_rpn_loc) rpn_loc_loss = _fast_rcnn_loc_loss( rpn_loc, gt_rpn_loc, gt_rpn_label.data, self.rpn_sigma) # NOTE: default value of ignore_index is -100 ...索引默认值 rpn_cls_loss = F.cross_entropy(rpn_score, gt_rpn_label.to(device), ignore_index=-1) # ------------------ ROI losses (fast rcnn loss) -------------------# n_sample = roi_cls_loc.shape[0] roi_cls_loc = roi_cls_loc.view(n_sample, -1, 4) roi_loc = roi_cls_loc[torch.arange(0, n_sample).long().to(device), totensor(gt_roi_label).long()] gt_roi_label = totensor(gt_roi_label).long() gt_roi_loc = totensor(gt_roi_loc) roi_loc_loss = _fast_rcnn_loc_loss( roi_loc.contiguous(), gt_roi_loc, gt_roi_label.data, self.roi_sigma) #迷惑行为 self.roi_cm.add(at.totensor(roi_score, False), gt_roi_label.data.long()) roi_cls_loss = nn.CrossEntropyLoss()(roi_score, gt_roi_label.to(device)) losses = [rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss] losses = losses + [sum(losses)] return losses
def forward(self, sample_roi, roi_cls_loc, roi_score, appearance_features, size): N = sample_roi.shape[0] roi_score = roi_score.data roi_cls_loc = roi_cls_loc.data roi = at.totensor(sample_roi) mean = t.Tensor(self.loc_normalize_mean).cuda(). \ repeat(self.n_class)[None] std = t.Tensor(self.loc_normalize_std).cuda(). \ repeat(self.n_class)[None] roi_cls_loc = (roi_cls_loc * std + mean) roi_cls_loc = roi_cls_loc.view(-1, self.n_class, 4) roi = roi.view(-1, 1, 4).expand_as(roi_cls_loc) cls_bbox = loc2bbox( at.tonumpy(roi).reshape((-1, 4)), at.tonumpy(roi_cls_loc).reshape((-1, 4))) cls_bbox = at.totensor(cls_bbox) cls_bbox = cls_bbox.view(-1, self.n_class, 4) # clip bounding box cls_bbox[:, 0::2] = (cls_bbox[:, 0::2]).clamp(min=0, max=size[0]) cls_bbox[:, 1::2] = (cls_bbox[:, 1::2]).clamp(min=0, max=size[1]) prob = F.softmax(at.tovariable(roi_score), dim=1) prob, prob_argmax = torch.max(prob, dim=-1) cls_bbox = cls_bbox[np.arange(start=0, stop=N), prob_argmax] nonzero_idx = torch.nonzero(prob_argmax) if (nonzero_idx.size()[0] == 0): return None, None, None else: nonzero_idx = nonzero_idx[:, 0] prob_argmax = prob_argmax[nonzero_idx] prob = prob[nonzero_idx] cls_bbox = cls_bbox[nonzero_idx] appearance_features_nobg = appearance_features[nonzero_idx] sorted_score, prob_argsort = torch.sort(prob, descending=True) sorted_prob = prob[prob_argsort] sorted_cls_bboxes = cls_bbox[prob_argsort] sorted_labels = prob_argmax[prob_argsort] sorted_features = appearance_features_nobg[prob_argsort] nms_rank_embedding = RankEmbedding(sorted_prob.size()[0], self.appearance_feature_dim) nms_rank = self.nms_rank_fc(nms_rank_embedding) roi_feat_embedding = self.roi_feat_embedding_fc(sorted_features) nms_embedding_feat = nms_rank + roi_feat_embedding position_embedding = PositionalEmbedding( sorted_cls_bboxes, dim_g=self.geo_feature_dim) nms_logit = self.relation_module(nms_embedding_feat, position_embedding) nms_logit = self.nms_logit_fc(nms_logit) s1 = self.sigmoid(nms_logit).view(-1) nms_scores = s1 * sorted_prob return nms_scores, sorted_labels - 1, sorted_cls_bboxes