def __call__(self, anchors: torch.Tensor, truths): # filter out too small objects truths_wh = truths[:, 2:] - truths[:, :2] msk = (truths_wh[:, 0] < self.igs) | (truths_wh[:, 1] < self.igs) truths = truths[~msk] truths_wh = truths_wh[~msk] # compute IOUs overlaps = jaccard( truths, point_form(anchors) ) # size [num_truths, num_priors] # [1,num_objects] best prior for each ground truth best_prior_overlap, best_prior_idx = overlaps.max(1, keepdim=False) recall_msk = best_prior_overlap <= 0.5 l1 = best_prior_overlap.mean() l2 = (best_prior_overlap[recall_msk]).mean() l3 = (best_prior_overlap ** (1. / 3)).mean() # approx ssd loss loss # diff_wh = anchors[best_prior_idx, 2:] / truths_wh # diff_wh = diff_wh.log().abs() # diff_wh = torch.where(diff_wh < 1., 0.5 * diff_wh ** 2, diff_wh - 0.5) # l4 = diff_wh.sum(dim=1).mean() * self.l # loss = (-(l1.log() + l2.log() + l3.log() * 3) + l4) / 4. loss = -(l1.log() + l2.log() + l3.log() * 3) / 3. if self.decay > 0.01: best_truth_overlap, best_truth_idx = overlaps.max(0, keepdim=False) l0 = -best_truth_overlap.mean().log() loss = l0 * self.decay + loss * (1. - self.decay) self.decay *= 0.9 return loss
def forward(self, locs: torch.Tensor, params: torch.Tensor, truths, variance=(0.1, 0.2)): sigmoid_alphas = params[:, -1].sigmoid() # size [num_priors] priors = torch.cat([locs, params[:, :2]], dim=1) # size [num_priors, 4] with torch.no_grad(): overlaps = jaccard( truths, point_form(priors) ) # size [num_truths, num_priors] # [num_priors] best ground truth for each prior best_truth_overlap, best_truth_idx = overlaps.max(0, keepdim=False) # [1,num_objects] best prior for each ground truth best_prior_overlap, best_prior_idx = overlaps.max(1, keepdim=False) # replace original best truth indexes whose prior boxes are the best priors of given truths best_truth_overlap[best_prior_idx] = best_prior_overlap best_truth_idx[best_prior_idx] = torch.tensor(range(best_prior_idx.size(0)), dtype=torch.long) # create filter x_filter = torch.zeros(best_truth_overlap.size()) x_filter[best_truth_overlap > self.thresh] = 1. x_filter[best_prior_idx] = self.k # filtering msk = x_filter > 1e-7 x_filter = x_filter[msk] best_truth_overlap = best_truth_overlap[msk] # return loss value return ((sigmoid_alphas[msk] * x_filter * best_truth_overlap).sum() + self.beta * sigmoid_alphas.sum()) / x_filter.sum()
def forward(self, locs: torch.Tensor, params: torch.Tensor, truths, variance=(0.1, 0.2)): sigmoid_alphas = params[:, -1].sigmoid() # size [num_priors] priors = torch.cat([locs, params[:, :2]], dim=1) # size [num_priors, 4] with torch.no_grad(): overlaps = jaccard( truths, point_form(priors) ) # size [num_truths, num_priors] # [num_priors] best ground truth for each prior best_truth_overlap, best_truth_idx = overlaps.max(0, keepdim=False) # [1,num_objects] best prior for each ground truth best_prior_overlap, best_prior_idx = overlaps.max(1, keepdim=False) # replace original best truth indexes whose prior boxes are the best priors of given truths # best_truth_overlap[best_prior_idx] = best_prior_overlap best_truth_idx[best_prior_idx] = torch.tensor(range(best_prior_idx.size(0)), dtype=torch.long) # filter x_filter = torch.zeros(best_truth_overlap.size()) x_filter[best_truth_overlap > self.thresh] = 1. x_filter[best_prior_idx] = self.k # encode, L1_loss encoded_dis = encode(truths[best_truth_idx], priors, variance) encoded_dis = torch.abs(encoded_dis) l1_tensor = torch.where(encoded_dis < 1., 0.5 * encoded_dis ** 2, encoded_dis - 0.5) l1_tensor = l1_tensor.sum(dim=1) # return loss value return ((sigmoid_alphas * x_filter * l1_tensor).sum() + self.beta * sigmoid_alphas.sum()) / x_filter.sum()
def __mk_iou_tensor(self, prior_boxes): if isinstance(prior_boxes, int): i, j = prior_boxes >> 16, prior_boxes & 0xffff prior_boxes = self.prior_groups[i][j] ret = torch.zeros(self.gts.size(0)) for start, end in self.intervals: truths = self.gts[start:end].cuda() overlaps = jaccard(truths, prior_boxes) best_prior_overlap, _ = overlaps.max(1, keepdim=False) ret[start:end] = best_prior_overlap return ret
def match(truths, priors, all_priors, mask_t, idx): iou_map = jaccard(point_form(priors), truths) iou_map_global = jaccard(point_form(all_priors), truths) feature_size = int(iou_map.shape[0]) max_iou, _ = torch.max(iou_map_global, dim=0) mask_per_img = torch.zeros([feature_size], dtype=torch.int64).cuda() for k in range(truths.shape[0]): if torch.sum(truths[k]) == 0.: break #if max_iou[k] < 0.2: # continue max_iou_per_gt = 0.35 #max_iou[k] * 0.5 #mask_per_gt = torch.sum((iou_map[:,k] > max_iou_per_gt).view(feature_size, num_anchors), dim=1) mask_per_gt = iou_map[:, k] > max_iou_per_gt mask_per_gt = mask_per_gt.long() mask_per_img += mask_per_gt mask_per_img = mask_per_img > 0 mask_t[idx] = mask_per_img
def fast_nms(box_thre, coef_thre, class_thre, cfg, second_threshold=False): class_thre, idx = class_thre.sort( 1, descending=True) # [80, 64 (the number of kept boxes)] idx = idx[:, :cfg.top_k].contiguous() class_thre = class_thre[:, :cfg.top_k] num_classes, num_dets = idx.size() box_thre = box_thre[idx.reshape(-1), :].reshape(num_classes, num_dets, 4) # [80, 64, 4] coef_thre = coef_thre[idx.reshape(-1), :].reshape(num_classes, num_dets, -1) # [80, 64, 32] iou = jaccard(box_thre, box_thre) iou.triu_(diagonal=1) iou_max, _ = iou.max(dim=1) # Now just filter out the ones higher than the threshold keep = (iou_max <= cfg.nms_iou_thre) # We should also only keep detections over the confidence threshold, but at the cost of # maxing out your detection count for every image, you can just not do that. Because we # have such a minimal amount of computation per detection (matrix mulitplication only), # this increase doesn't affect us much (+0.2 mAP for 34 -> 33 fps), so we leave it out. # However, when you implement this in your method, you should do this second threshold. if second_threshold: keep *= (class_thre > cfg.nms_score_thresh) # Assign each kept detection to its corresponding class class_ids = torch.arange(num_classes, device=box_thre.device)[:, None].expand_as(keep) class_ids = class_ids[keep] box_nms = box_thre[keep] coef_nms = coef_thre[keep] class_nms = class_thre[keep] # Only keep the top cfg.max_num_detections highest scores across all classes class_nms, idx = class_nms.sort(0, descending=True) idx = idx[:cfg.max_detections] class_nms = class_nms[:cfg.max_detections] class_ids = class_ids[idx] box_nms = box_nms[idx] coef_nms = coef_nms[idx] return box_nms, coef_nms, class_ids, class_nms
def build_targets(self, target, anchors, w, h, ignore_threshold): obj_mask = torch.zeros(self.batch_size, self.num_anchors, h, w).byte() noobj_mask = torch.ones(self.batch_size, self.num_anchors, h, w).byte() tx = torch.zeros(self.batch_size, self.num_anchors, h, w) ty = torch.zeros(self.batch_size, self.num_anchors, h, w) tw = torch.zeros(self.batch_size, self.num_anchors, h, w) th = torch.zeros(self.batch_size, self.num_anchors, h, w) tconf = torch.zeros(self.batch_size, self.num_anchors, h, w) tcls = torch.zeros(self.batch_size, self.num_anchors, h, w, self.classes) target_bbox = target[..., ::] * 1 target_bbox[..., :-1:2] *= w target_bbox[..., 1:-1:2] *= h gwh = target_bbox[..., 2:-1] anchor_shape = torch.FloatTensor(np.concatenate((np.zeros((self.num_anchors, 2)), np.array(anchors)), 1)) gwh_shape = torch.FloatTensor(np.concatenate((np.zeros_like(gwh), np.array(gwh)), 2)).contiguous() for bs in range(target_bbox.size(0)): for tg in range(target_bbox.size(1)): if target_bbox[bs, tg][:-1].sum() == 0: continue gx = target_bbox[bs, tg, 0] gy = target_bbox[bs, tg, 1] gw = target_bbox[bs, tg, 2] gh = target_bbox[bs, tg, 3] # Get grid box indices gi = int(gx) gj = int(gy) gi = 1 if gi == 0 else gi gj = 1 if gj == 0 else gj # acquire the best anchor by computing ious anchor_ious = jaccard(gwh_shape[bs, tg].unsqueeze(0), anchor_shape).squeeze() # if overlap more than threshold, set no object mask zero. noobj_mask[bs, anchor_ious > ignore_threshold, gi, gj] = 0 # Find the best matching anchor box best_n = np.argmax(anchor_ious) # mask obj_mask[bs, best_n, gj, gi] = 1 noobj_mask[bs, best_n, gj, gi] = 0 # coordinate tx[bs, best_n, gj, gi] = gx - gi ty[bs, best_n, gj, gi] = gy - gj tw[bs, best_n, gj, gi] = torch.log(gw / anchors[best_n][0] + 1e-16) th[bs, best_n, gj, gi] = torch.log(gh / anchors[best_n][1] + 1e-16) # object tconf[bs, best_n, gj, gi] = 1 # one-hot encoding of label tcls[bs, best_n, gj, gi, int(target_bbox[bs, tg, -1])] = 1 return obj_mask, noobj_mask, tx, ty, tw, th, tconf, tcls
def compare(bbox, other): means = [] for k, (p1, p2) in enumerate(zip(bbox, other)): exp_p1 = torch.zeros(p1.size(0), 4) exp_p2 = torch.zeros(p2.size(0), 4) exp_p1[:, 2:] = p1 exp_p2[:, 2:] = p2 overlaps = jaccard( point_form(exp_p1), point_form(exp_p2) ) # size [num_p1, num_p2] best_overlap, _ = overlaps.max(1) means += [best_overlap.mean().item()] print("Layer %d avg overlap = %.4f" % (k, means[-1])) print("Mean Avg Overlap = %.4f" % (sum(means) / len(means))) pass
def mk_iou_tensor(anchors: torch.Tensor, gts: torch.Tensor, interval=512, ret_idx=False): intervals = [i for i in range(0, gts.size(0), interval)] + [gts.size(0)] ret = torch.zeros(gts.size(0)) idx = None if ret_idx: idx = torch.zeros(gts.size(0), dtype=torch.long) for start, end in zip(intervals[:-1], intervals[1:]): truths = gts[start:end].cuda() overlaps = jaccard(truths, anchors) assert torch.isnan(overlaps).sum().item() == 0 best_prior_overlap, _ = overlaps.max(1, keepdim=False) ret[start:end] = best_prior_overlap if ret_idx: idx[start:end] = _ del overlaps, best_prior_overlap, _ if ret_idx: return ret, idx return ret
def get_conf_gt(detection, h, w, annopath, classes=HELMET_CLASSES, cls_to_ind=None): num_classes = len(HELMET_CLASSES) dets = decode_raw_detection(detection, h, w) assert num_classes == len(dets) if cls_to_ind is None: cls_to_ind = dict(zip(classes, range(len(classes)))) rec = parse_rec(annopath) bbgt = [torch.tensor([]) for _ in range(num_classes)] for cls_idx in range(num_classes): bbgt[cls_idx] = torch.tensor([x['bbox'] for x in rec if cls_to_ind[x['name']] == cls_idx], dtype=torch.float) bbdet = [dets[i][:, 1:] if dets[i].size(0) > 0 else torch.tensor([]) for i in range(len(dets))] cls_ious = [torch.tensor([]) for _ in range(num_classes)] for cls_idx in range(num_classes): # K * 4 bb = bbdet[cls_idx] # N * 4 gt = bbgt[cls_idx] if gt.size(0) == 0 or bb.size(0) == 0: continue iou = jaccard(gt, bb).t() cls_ious[cls_idx] = iou max_ious = [x.max(1)[0] if x.size(0) > 0 else None for x in cls_ious] return cls_ious, max_ious
def forward(self, locs: torch.Tensor, params: torch.Tensor, truths, variance=(0.1, 0.2)): sigmoid_alphas = params[:, -1].sigmoid() # size [num_priors] priors = torch.cat([locs, params[:, :2]], dim=1) # size [num_priors, 4] with torch.no_grad(): overlaps = jaccard( truths, point_form(priors) ) # size [num_truths, num_priors] # [num_priors] best ground truth for each prior best_truth_overlap, best_truth_idx = overlaps.max(0, keepdim=False) # [1,num_objects] best prior for each ground truth best_prior_overlap, best_prior_idx = overlaps.max(1, keepdim=False) # replace original best truth indexes whose prior boxes are the best priors of given truths best_truth_overlap[best_prior_idx] = best_prior_overlap best_truth_idx[best_prior_idx] = torch.tensor(range(best_prior_idx.size(0)), dtype=torch.long) # create filter x_filter = torch.zeros(best_truth_overlap.size()) x_filter[best_truth_overlap > self.thresh] = 1. x_filter[best_prior_idx] = self.k # filtering msk = x_filter > 1e-7 x_filter = x_filter[msk] best_truth_overlap = best_truth_overlap[msk] # log info aaa = (best_truth_overlap < 1e-7).sum().item() print('%d best truths after filtering' % (x_filter > 1e-4).sum().item()) print('%d best priors, of which %d priors fail to meet iou threshold' % (best_prior_idx.size(0), (best_prior_overlap <= self.thresh).sum().item())) ret = ((sigmoid_alphas[msk] * x_filter * best_truth_overlap).sum() + self.beta * sigmoid_alphas.sum()) / x_filter.sum() print("loss fn: (%.2f(1st term) + %.2f(2nd term)) / %.2f(3rd term) = %.2f" % ((sigmoid_alphas[msk] * x_filter * best_truth_overlap).sum().item(), self.beta * sigmoid_alphas.sum().item(), x_filter.sum().item(), ret.item())) # return loss value return ret
def forward(self, output, labels): """ pred is result of after used nms, like: ([pred_num, 6], ...... [pred_num, 6]). 6 shape like [box + socre + cls] targets is real box, like: ([box_nums, 5], ...... [box_nums, 5]). 5 shape like [box + cls] """ num = len(output) stats = [] for id in range(num): targets = labels[id] preds = output[id, output[id, :, 4].gt(0)] num_gt = len(targets) # number of target tcls = targets[:, 4].tolist() if num_gt else [] # target class # predict is none if preds is None: if num_gt: stats.append(([], torch.Tensor(), torch.Tensor(), tcls)) continue # Assign all predictions as incorrect correct = [0] * len(preds) if num_gt: detected = [] tcls_tensor = targets[:, 4] # target boxes tboxes = targets[:, :4] tboxes[:, [0, 2]] *= self.width tboxes[:, [1, 3]] *= self.height preds[:, [0, 2]] *= self.width preds[:, [1, 3]] *= self.height for ii, pred in enumerate(preds): pbox = pred[:4].unsqueeze(0) pcls = pred[5] # Break if all targets already located in image if len(detected) == num_gt: break # Continue if predicted class not among image classes if pcls.item() not in tcls: continue # Best iou, index between pred and targets m = (pcls == tcls_tensor).nonzero().view(-1) iou, bi = jaccard(pbox, tboxes[m]).max(1) # If iou > threshold and class is correct mark as correct if iou > self.iou_thresh and m[bi] not in detected: correct[ii] = 1 detected.append(m[bi]) # (correct, pconf, pcls, tcls) stats.append((correct, preds[:, 4].cpu(), preds[:, 5].cpu(), tcls)) return stats
def _bbox_iou(bbox1, bbox2, iscrowd=False): ret = jaccard(bbox1, bbox2, iscrowd) return ret.cpu()
def fast_nms(box_thre, coef_thre, class_thre, second_threshold: bool = False): class_thre, idx = class_thre.sort( 1, descending=True) # [80, 64 (the number of kept boxes)] idx = idx[:, :cfg.top_k].contiguous() class_thre = class_thre[:, :cfg.top_k] num_classes, num_dets = idx.size() box_thre = box_thre[idx.view(-1), :].view(num_classes, num_dets, 4) # [80, 64, 4] coef_thre = coef_thre[idx.view(-1), :].view(num_classes, num_dets, -1) # [80, 64, 32] iou = jaccard(box_thre, box_thre) iou.triu_(diagonal=1) iou_max, _ = iou.max(dim=1) # Now just filter out the ones higher than the threshold keep = (iou_max <= cfg.nms_thre) # We should also only keep detections over the confidence threshold, but at the cost of # maxing out your detection count for every image, you can just not do that. Because we # have such a minimal amount of computation per detection (matrix mulitplication only), # this increase doesn't affect us much (+0.2 mAP for 34 -> 33 fps), so we leave it out. # However, when you implement this in your method, you should do this second threshold. if second_threshold: keep *= (class_thre > cfg.conf_thresh) # Assign each kept detection to its corresponding class class_ids = torch.arange(num_classes, device=box_thre.device)[:, None].expand_as(keep) class_ids = class_ids[keep] box_nms = box_thre[keep] coef_nms = coef_thre[keep] class_nms = class_thre[keep] # Only keep the top cfg.max_num_detections highest scores across all classes class_nms, idx = class_nms.sort(0, descending=True) idx = idx[:cfg.max_detections] class_nms = class_nms[:cfg.max_detections] class_ids = class_ids[idx] box_nms = box_nms[idx] coef_nms = coef_nms[idx] ''' Test code, a little mAP dropped. If one box predicts more than one class, only keep the highest score duplicate. ''' # box_list = np.array(box_nms.cpu()).tolist() # class_nms_list = np.array(class_nms.cpu()).tolist() # # repeat = [] # ss = list(np.arange(len(box_list))) # # for aa in box_list: # if (box_list.count(aa) > 1) and (aa not in repeat): # repeat.append(aa) # # for aa in repeat: # id1 = [j for j, bb in enumerate(box_list) if bb == aa] # temp = [class_nms_list[aa] for aa in id1] # temp = np.array(temp).argmax() # id1.remove(id1[temp]) # # for jj in id1: # ss.remove(jj) # # box_nms = box_nms[ss] # coef_nms = coef_nms[ss] # classes = classes[ss] # class_nms = class_nms[ss] return box_nms, coef_nms, class_ids, class_nms
# anchor = priors.numpy() # min_xy = torch.max(gt[:,None,:2], priors[:,:2]) # max_xy = torch.min(gt[:,None,2:], priors[:,2:]) # inter = torch.clamp(max_xy - min-xy, min=0, max=1) from utils.box_utils import jaccard # iou2 = jaccard(gt.double(), priors.double()) # iou2_2_max = torch.max(jaccard(gt.double(), priors.double(),), 1)[0] iou_static = {} iou_param = [(1, 1), (1.25, .8), (1.5, .65), (2, .5)] param_name_list = [] for alpha, beta in iou_param: param_name_list.append("{}_{}".format(alpha, beta)) iou_static[iou_param] = torch.max(jaccard(gt.double(), priors.double(), alpha, beta)[0], dim=1)[0] iou_static['gt_area'] = (gt[:, 2] - gt[:, 0]) * (gt[:, 3] - gt[:, 1]) iou_df = pd.DataFrame.from_dict(iou_static) bin = np.concatenate( (np.arange(0, 0.1 - 1e-5, 0.01), np.arange(0.1, 0.5 - 1e-5, 0.05), np.arange(0.5, 1 + 1e-5, 0.1))) bin = [0, .02, .06, .2, .4, 1] labels = [np.mean(bin[i:i + 2]).round(3) for i in range(len(bin) - 1)] iou_df['size_index'] = pd.cut(iou_df['gt_area'], bin, labels=labels) iou_df['size_range'] = pd.cut(iou_df['gt_area'], bin) iou_des = {} for ii in iou_df.columns[:5]:
def eval_net(net, cuda, dataset, transform, top_k): # dump predictions and assoc. ground truth to text file for now num_images = len(dataset) ovthresh = 0.5 confidence_threshold = 0.01 num_classes = 0 # per class fp = defaultdict(list) tp = defaultdict(list) gts = defaultdict(list) precision = Counter() recall = Counter() ap = Counter() for i in range(num_images//100): if i % 10 == 0: print('Evaluating image {:d}/{:d}....'.format(i + 1, num_images)) t1 = time.time() img = dataset.pull_image(i) img_id, anno = dataset.pull_anno(i) anno = torch.Tensor(anno).long() x = cv2.resize(np.array(img), (300, 300)).astype(np.float32) x -= (104, 117, 123) x = x.transpose(2, 0, 1) x = Variable(torch.from_numpy(x).unsqueeze(0), volatile=True) if cuda: x = x.cuda() y = net(x) # forward pass detections = y.data # scale each detection back up to the image scale = torch.Tensor([img.size[0], img.size[1], img.size[0], img.size[1]]) # for each class if num_classes == 0: num_classes = detections.size(1) for cl in range(1, detections.size(1)): dets = detections[0, cl, :] mask = dets[:, 0].ge(confidence_threshold).expand( 5, dets.size(0)).t() # all dets w > 0.01 conf for class dets = torch.masked_select(dets, mask).view(-1, 5) mask = anno[:, 4].eq(cl-1).expand(5, anno.size(0)).t() # all gts for class truths = torch.masked_select(anno, mask).view(-1, 5) if truths.numel() > 0: # there exist gt of this class in the image # check for tp & fp truths = truths[:, :-1] if dets.numel() < 1: fp[cl].extend([0] * truths.size(0)) tp[cl].extend([0] * truths.size(0)) gts[cl].extend([1] * truths.size(0)) # gts[cl][-1] += truths.size(0) continue preds = dets[:, 1:] # compute overlaps overlaps = jaccard(truths.float() / scale.unsqueeze(0).expand_as(truths), preds) # found = if each gt obj is found yet found = [False] * overlaps.size(0) maxes, max_ids = overlaps.max(0) maxes.squeeze_(0), max_ids.squeeze_(0) for pb in range(overlaps.size(1)): max_overlap = maxes[pb] gt = max_ids[pb] if max_overlap > ovthresh: # 0.5 if found[gt]: # duplicate fp[cl].append(1) tp[cl].append(0) gts[cl].append(0) # tp else: # not yet found tp[cl].append(1) fp[cl].append(0) found[gt] = True # mark gt as found gts[cl].append(1) # tp else: fp[cl].append(1) tp[cl].append(0) gts[cl].append(0) # tp else: # there are no gts of this class in the image # all dets > 0.01 are fp if dets.numel() > 0: fp[cl].extend([1] * dets.size(0)) tp[cl].extend([0] * dets.size(0)) gts[cl].extend([0] * dets.size(0)) if i % 10 == 0: print('Timer: %.4f' % (time.time()-t1)) for cl in range(1, num_classes): if len(gts[cl]) < 1: continue # for each class calc rec, prec, ap tp_cumsum = torch.cumsum(torch.Tensor(tp[cl]), 0) fp_cumsum = torch.cumsum(torch.Tensor(fp[cl]), 0) gt_cumsum = torch.cumsum(torch.Tensor(gts[cl]), 0) rec_cumsum = tp_cumsum.float() / gt_cumsum[-1] prec_cumsum = tp_cumsum / (tp_cumsum + fp_cumsum).clamp(min=1e-6) ap[cl] = voc_ap(rec_cumsum, prec_cumsum) recall[cl] = rec_cumsum[-1] precision[cl] = prec_cumsum[-1] print('class %d rec %.4f prec %.4f AP %.4f tp %.4f fp %.4f, \ gt %.4f' % (cl, recall[cl], precision[cl], ap[cl], sum(tp[cl]), sum(fp[cl]), sum(gts[cl]))) # mAP = mean of APs for all classes mAP = sum(ap.values()) / len(ap) print('mAP', mAP) return mAP