def get_target(self, target, anchors, in_w, in_h, pred_box, ignore_threshold): bs = target.size(0) n_obj = 0 mask = torch.zeros(bs, self.num_anchors, in_h, in_w, requires_grad=False) noobj_mask = torch.ones(bs, self.num_anchors, in_h, in_w, requires_grad=False) tx = torch.zeros(bs, self.num_anchors, in_h, in_w, requires_grad=False) ty = torch.zeros(bs, self.num_anchors, in_h, in_w, requires_grad=False) tw = torch.zeros(bs, self.num_anchors, in_h, in_w, requires_grad=False) th = torch.zeros(bs, self.num_anchors, in_h, in_w, requires_grad=False) tconf = torch.zeros(bs, self.num_anchors, in_h, in_w, requires_grad=False) tcls = torch.zeros(bs, self.num_anchors, in_h, in_w, self.num_classes, requires_grad=False) scales = torch.zeros(bs, self.num_anchors, in_h, in_w, requires_grad=False) for b in range(bs): box_p = pred_box[b].view(-1, 4) for t in range(target.shape[1]): if target[b, t].sum() == 0: continue n_obj += 1 # Convert to position relative to box gx = target[b, t, 1] * in_w gy = target[b, t, 2] * in_h gw = target[b, t, 3] * in_w gh = target[b, t, 4] * in_h # Get grid box indices gi = int(gx) gj = int(gy) # Get shape of gt box gt_box_match = torch.FloatTensor(np.array([0, 0, gw, gh])).unsqueeze(0) gt_box = torch.FloatTensor(np.array([gx, gy, gw, gh])).unsqueeze(0) # Get shape of anchor box anchor_shapes = torch.FloatTensor(np.concatenate((np.zeros((self.num_anchors, 2)), np.array(anchors)), 1)) # Calculate iou between gt and anchor shapes anch_ious = bbox_iou(gt_box_match, anchor_shapes, True) pred_ious = bbox_iou(gt_box, box_p, True).view(self.num_anchors, in_h, in_w) # Where the overlap is larger than threshold set mask to zero (ignore) noobj_mask[b][pred_ious >= ignore_threshold] = 0 # Find the best matching anchor box best_n = np.argmax(anch_ious) best_conf = pred_ious[best_n, gj, gi] # Masks mask[b, best_n, gj, gi] = 1 noobj_mask[b, best_n, gj, gi] = 0 # Coordinates tx[b, best_n, gj, gi] = gx - gi ty[b, best_n, gj, gi] = gy - gj # Width and height tw[b, best_n, gj, gi] = torch.log(gw/anchors[best_n][0] + 1e-16) th[b, best_n, gj, gi] = torch.log(gh/anchors[best_n][1] + 1e-16) # object tconf[b, best_n, gj, gi] = best_conf # One-hot encoding of label tcls[b, best_n, gj, gi, int(target[b, t, 0])] = 1 scales[b, best_n, gj, gi] = 2 - target[b, t, 3] * target[b, t, 4] return n_obj, mask, noobj_mask, tx, ty, tw, th, tconf, tcls, scales
def get_target(self,pred_boxes, target, anchors, in_w, in_h, ignore_threshold,conf_mask, noobj_mask , tx, ty, tw, th, tconf,tcls): bs = target.size(0) # tx = torch.zeros(bs, self.num_anchors, in_h, in_w, requires_grad=False) # ty = torch.zeros(bs, self.num_anchors, in_h, in_w, requires_grad=False) # tw = torch.zeros(bs, self.num_anchors, in_h, in_w, requires_grad=False) # th = torch.zeros(bs, self.num_anchors, in_h, in_w, requires_grad=False) # tconf = torch.zeros(bs, self.num_anchors, in_h, in_w, requires_grad=False) # tcls = torch.zeros(bs, self.num_anchors, in_h, in_w, self.num_classes, requires_grad=False) nGT = 0 nCorrect = 0 gx_ = target[:, :, 1:5]* in_w gt_box = target[:, :, 1:5]* in_w gt_box[:, :, 0:2] = 0 targetbox = torch.FloatTensor( np.concatenate((np.zeros((1, self.num_anchors, 2)), np.array([anchors])), 2)).repeat(self.batch_size, 1, 1) batch_anchor=torch.FloatTensor(anchors).repeat(self.batch_size, 1,1).cuda() for t in range(target.shape[1]): if target[:, t].sum() == 0: continue gi= np.array(gx_[:, t, 0].int())#.type(torch.uint8) gj= np.array(gx_[:, t, 1].int())#.type(torch.uint8) r_gt_box= gt_box[:, t, :] anch_ious= bbox_iou(r_gt_box.view(self.batch_size,1,4),targetbox.cuda()) #80 3,11,11 noobj_mask[anch_ious > ignore_threshold] = 0 values = torch.max(anch_ious, 1, keepdim=True)[0] anch_ious[anch_ious == 0] = 1e+16 c = anch_ious - values best_n=(c==0)#(c == 0).type(torch.uint8) conf_mask[best_n, gj, gi]=1 # Coordinates tx[best_n,gj, gi] = (gx_[:, t, 0] - gx_[:, t, 0].int().float())#.cpu() ty[best_n,gj, gi] = (gx_[:, t, 1] - gx_[:, t, 1].int().float())#.cpu() tw[best_n,gj, gi] =torch.log(gx_[:,t, 2]/batch_anchor[best_n][:,0]+1e-16)#.cpu() th[best_n,gj, gi] =torch.log(gx_[:,t, 3]/batch_anchor[best_n][:,1]+1e-16)#.cpu() # object tconf[best_n,gj, gi] = 1 # One-hot encoding of label tcls[best_n, gj, gi, np.array(target[:, t, 0])] = 1 r_gt_box[:,0:2]=gx_[:, t, 0:2] pred_box = pred_boxes[best_n,gj, gi] iou = bbox_iou(r_gt_box.cuda(), pred_box, x1y1x2y2=False) nGT=nGT+self.batch_size nCorrect=nCorrect+int(sum(iou > 0.8)) return nGT, nCorrect
def get_target(self, target, anchors, in_w, in_h, ignore_threshold): bs = target.size(0) mask = torch.zeros(bs, self.num_anchors, in_h, in_w, requires_grad=False) noobj_mask = torch.ones(bs, self.num_anchors, in_h, in_w, requires_grad=False) tx = torch.zeros(bs, self.num_anchors, in_h, in_w, requires_grad=False) ty = torch.zeros(bs, self.num_anchors, in_h, in_w, requires_grad=False) tw = torch.zeros(bs, self.num_anchors, in_h, in_w, requires_grad=False) th = torch.zeros(bs, self.num_anchors, in_h, in_w, requires_grad=False) tconf = torch.zeros(bs, self.num_anchors, in_h, in_w, requires_grad=False) tcls = torch.zeros(bs, self.num_anchors, in_h, in_w, self.num_classes, requires_grad=False) for b in range(bs): # Convert to position relative to box gx = target[b, 0, 1] * in_w gy = target[b, 0, 2] * in_h gw = target[b, 0, 3] * in_w gh = target[b, 0, 4] * in_h # Get grid box indices gi = int(gx) gj = int(gy) # Get shape of gt box gt_box = torch.FloatTensor(np.array([0, 0, gw, gh])).unsqueeze(0) # Get shape of anchor box anchor_shapes = torch.FloatTensor( np.concatenate((np.zeros( (self.num_anchors, 2)), np.array(anchors)), 1)) # Calculate iou between gt and anchor shapes anch_ious = bbox_iou(gt_box, anchor_shapes) noobj_mask[b, anch_ious > ignore_threshold, gj, gi] = 0 best_n = np.argmax(anch_ious) # Masks mask[b, best_n, gj, gi] = 1 # Coordinates tx[b, best_n, gj, gi] = gx - gi ty[b, best_n, gj, gi] = gy - gj # Width and height tw[b, best_n, gj, gi] = math.log(gw / anchors[best_n][0] + 1e-16) th[b, best_n, gj, gi] = math.log(gh / anchors[best_n][1] + 1e-16) # object tconf[b, best_n, gj, gi] = 1 # One-hot encoding of label tcls[b, best_n, gj, gi, int(target[b, 0, 0])] = 1 return mask, noobj_mask, tx, ty, tw, th, tconf, tcls
def validate(net): n_gt = 0 correct = 0 for step, samples in enumerate(dataloader): images, labels, image_paths = samples["image"], samples[ "label"], samples["img_path"] labels = labels.cuda() with torch.no_grad(): time1 = datetime.datetime.now() outputs = net(images) output_list = [] for i in range(3): output_list.append(yolo_losses[i](outputs[i])) output = torch.cat(output_list, 1) output = non_max_suppression(output, 1, conf_thres=0.5) if ((datetime.datetime.now() - time1).seconds > 5): logging.info('Batch %d time is too long ' % (step)) n_gt = 1 break # print("time2", (datetime.datetime.now() - time1).seconds*1000+(datetime.datetime.now() - time1).microseconds//1000) # calculate for sample_i in range(labels.size(0)): # Get labels for sample where width is not zero (dummies) target_sample = labels[sample_i, labels[sample_i, :, 3] != 0] for obj_cls, tx, ty, tw, th in target_sample: # Get rescaled gt coordinates tx1, tx2 = config["img_w"] * ( tx - tw / 2), config["img_w"] * (tx + tw / 2) ty1, ty2 = config["img_h"] * ( ty - th / 2), config["img_h"] * (ty + th / 2) n_gt += 1 box_gt = torch.cat([ coord.unsqueeze(0) for coord in [tx1, ty1, tx2, ty2] ]).view(1, -1) sample_pred = output[sample_i] if sample_pred is not None: # Iterate through predictions where the class predicted is same as gt for x1, y1, x2, y2, conf, obj_conf, obj_pred in sample_pred[ sample_pred[:, 6] == obj_cls]: box_pred = torch.cat([ coord.unsqueeze(0) for coord in [x1, y1, x2, y2] ]).view(1, -1) iou = bbox_iou(box_pred, box_gt) if iou >= config["iou_thres"]: correct += 1 break if n_gt: logging.info('Batch [%d/%d] mAP: %.5f' % (step, len(dataloader), float(correct / n_gt))) logging.info('Mean Average Precision: %.5f' % float(correct / n_gt))
def myloss(self, anchors, y_pred, y_true): self.reso = 352 self.anchors = anchors loss = dict() # 1. Prepare # 1.1 re-organize y_pred # [bs, (5+nC)*nA, gs, gs] => [bs, num_anchors, gs, gs, 5+nC] bs, _, gs, _ = y_pred.size() nA = len(self.anchors) nC = self.num_classes y_pred = y_pred.view(bs, nA, 5 + nC, gs, gs) y_pred = y_pred.permute(0, 1, 3, 4, 2) # 1.3 prepare anchor boxes stride = self.reso // gs anchors = [(a[0] / stride, a[1] / stride) for a in self.anchors] anchor_bboxes = torch.zeros(3, 4).cuda() anchor_bboxes[:, 2:] = torch.Tensor(anchors) anchor_bboxes = anchor_bboxes.repeat(bs, 1, 1) # 2. Build gt [tx, ty, tw, th] and masks # TODO: f1 score implementation # total_num = 0 gt_tx = torch.zeros(bs, nA, gs, gs, requires_grad=False) gt_ty = torch.zeros(bs, nA, gs, gs, requires_grad=False) gt_tw = torch.zeros(bs, nA, gs, gs, requires_grad=False) gt_th = torch.zeros(bs, nA, gs, gs, requires_grad=False) obj_mask = torch.zeros(bs, nA, gs, gs, requires_grad=False) non_obj_mask = torch.ones(bs, nA, gs, gs, requires_grad=False) cls_mask = torch.zeros(bs, nA, gs, gs, nC, requires_grad=False) start = time.time() # for batch_idx in range(bs): # for box_idx, y_true_one in enumerate(y_true[batch_idx]): # total_num += 1 gt_bbox = y_true[:, :, :4] * gs # scale bbox relative to feature map gt_cls_label = y_true[:, :, 4].int() # gt_xc, gt_yc, gt_w, gt_h = gt_bbox[:,:,0:4] gt_xc = gt_bbox[:, :, 0] gt_yc = gt_bbox[:, :, 1] gt_w = gt_bbox[:, :, 2] gt_h = gt_bbox[:, :, 3] gt_i, gt_j = gt_xc.int(), gt_yc.int() gt_box_shape = y_true[:, :, :4] * gs gt_box_shape[:, :, 0:2] = 0 # gt_box_shape = torch.Tensor([0, 0, gt_w, gt_h]).unsqueeze(0).cuda() anch_ious = bbox_iou(gt_box_shape.view(self.batch_size, 1, 4), anchor_bboxes.cuda()) anchor_ious = IoU(gt_box_shape, anchor_bboxes, format='center') best_anchor = np.argmax(anchor_ious) anchor_w, anchor_h = anchors[best_anchor] gt_tw[:, best_anchor, gt_i, gt_j] = torch.log(gt_w / anchor_w + 1e-16) gt_th[:, best_anchor, gt_i, gt_j] = torch.log(gt_h / anchor_h + 1e-16) gt_tx[:, best_anchor, gt_i, gt_j] = gt_xc - gt_i gt_ty[:, best_anchor, gt_i, gt_j] = gt_yc - gt_j obj_mask[:, best_anchor, gt_i, gt_j] = 1 non_obj_mask[:, anchor_ious > 0.5] = 0 # FIXME: 0.5 as variable cls_mask[:, best_anchor, gt_i, gt_j, gt_cls_label] = 1 # 3. activate raw y_pred end = time.time() print("yolo_losses", bs, len(y_true), end - start) pred_tx = torch.sigmoid(y_pred[..., 0]) # gt tx/ty are not deactivated pred_ty = torch.sigmoid(y_pred[..., 1]) pred_tw = y_pred[..., 2] pred_th = y_pred[..., 3] pred_conf = y_pred[..., 4] pred_cls = y_pred[..., 5:] # 4. Compute loss obj_mask = obj_mask.cuda() non_obj_mask = non_obj_mask.cuda() cls_mask = cls_mask.cuda() gt_tx, gt_ty = gt_tx.cuda(), gt_ty.cuda() gt_tw, gt_th = gt_tw.cuda(), gt_th.cuda() # average over batch MSELoss = nn.MSELoss() BCEWithLogitsLoss = nn.BCEWithLogitsLoss() BCELoss = nn.BCELoss() CrossEntropyLoss = nn.CrossEntropyLoss() loss['x'] = MSELoss(pred_tx[obj_mask == 1], gt_tx[obj_mask == 1]) loss['y'] = MSELoss(pred_ty[obj_mask == 1], gt_ty[obj_mask == 1]) loss['w'] = MSELoss(pred_tw[obj_mask == 1], gt_tw[obj_mask == 1]) loss['h'] = MSELoss(pred_th[obj_mask == 1], gt_th[obj_mask == 1]) loss['cls'] = CrossEntropyLoss( pred_cls[obj_mask == 1], torch.argmax(cls_mask[obj_mask == 1], 1)) loss['conf'] = BCEWithLogitsLoss(pred_conf[obj_mask == 1], obj_mask[obj_mask == 1]) loss['non_conf'] = BCEWithLogitsLoss(pred_conf[non_obj_mask == 1], non_obj_mask[non_obj_mask == 1]) loss['total_loss'] = loss['x'] + loss['y'] + loss['w'] + loss[ 'h'] + loss['cls'] + loss['conf'] + loss['non_conf'] #["total_loss", "x", "y", "w", "h", "conf", "cls", "recall"] return loss['total_loss'], loss['x'], loss['y'], loss['w'], loss[ 'h'], loss['cls'], loss['conf'], loss['non_conf']
def evaluate(config): is_training = False # Load and initialize network net = ModelMain(config, is_training=is_training) net.train(is_training) # Set data parallel net = nn.DataParallel(net) net = net.cuda() # Restore pretrain model if config["pretrain_snapshot"]: state_dict = torch.load(config["pretrain_snapshot"]) net.load_state_dict(state_dict) else: logging.warning("missing pretrain_snapshot!!!") # YOLO loss with 3 scales yolo_losses = [] for i in range(3): yolo_losses.append( YOLOLoss(config["yolo"]["anchors"][i], config["yolo"]["classes"], (config["img_w"], config["img_h"]))) # DataLoader dataloader = torch.utils.data.DataLoader(COCODataset( config["val_path"], (config["img_w"], config["img_h"]), is_training=False), batch_size=config["batch_size"], shuffle=False, num_workers=16, pin_memory=False) # Start the eval loop logging.info("Start eval.") n_gt = 0 correct = 0 for step, samples in enumerate(dataloader): images, labels = samples["image"], samples["label"] labels = labels.cuda() with torch.no_grad(): outputs = net(images) output_list = [] for i in range(3): output_list.append(yolo_losses[i](outputs[i])) output = torch.cat(output_list, 1) output = non_max_suppression(output, 80, conf_thres=0.2) # calculate for sample_i in range(labels.size(0)): # Get labels for sample where width is not zero (dummies) target_sample = labels[sample_i, labels[sample_i, :, 3] != 0] for obj_cls, tx, ty, tw, th in target_sample: # Get rescaled gt coordinates tx1, tx2 = config["img_w"] * ( tx - tw / 2), config["img_w"] * (tx + tw / 2) ty1, ty2 = config["img_h"] * ( ty - th / 2), config["img_h"] * (ty + th / 2) n_gt += 1 box_gt = torch.cat([ coord.unsqueeze(0) for coord in [tx1, ty1, tx2, ty2] ]).view(1, -1) sample_pred = output[sample_i] if sample_pred is not None: # Iterate through predictions where the class predicted is same as gt for x1, y1, x2, y2, conf, obj_conf, obj_pred in sample_pred[ sample_pred[:, 6] == obj_cls]: box_pred = torch.cat([ coord.unsqueeze(0) for coord in [x1, y1, x2, y2] ]).view(1, -1) iou = bbox_iou(box_pred, box_gt) if iou >= config["iou_thres"]: correct += 1 break if n_gt: logging.info('Batch [%d/%d] mAP: %.5f' % (step, len(dataloader), float(correct / n_gt))) logging.info('Mean Average Precision: %.5f' % float(correct / n_gt))
def get_target(self, target, anchors, in_w, in_h, ignore_threshold): """ :param target: :param anchors: :param in_w: :param in_h: :param ignore_threshold: :return: """ bs = target.size(0) obj_mask = torch.zeros(bs, self.num_anchors, in_h, in_w, requires_grad=False) noobj_mask = torch.ones(bs, self.num_anchors, in_h, in_w, requires_grad=False) tx = torch.zeros(bs, self.num_anchors, in_h, in_w, requires_grad=False) ty = torch.zeros(bs, self.num_anchors, in_h, in_w, requires_grad=False) tw = torch.zeros(bs, self.num_anchors, in_h, in_w, requires_grad=False) th = torch.zeros(bs, self.num_anchors, in_h, in_w, requires_grad=False) tconf = torch.zeros(bs, self.num_anchors, in_h, in_w, requires_grad=False) tcls = torch.zeros(bs, self.num_anchors, in_h, in_w, self.num_classes, requires_grad=False) for b in range(bs): for t in range(target.shape[1]): if target[b, t].sum() == 0: continue # Convert to position relative to box gx = target[b, t, 1] * in_w gy = target[b, t, 2] * in_h gw = target[b, t, 3] * in_w gh = target[b, t, 4] * in_h # Get grid box indices gi = int(gx) gj = int(gy) if gi >= in_w: continue if gj >= in_h: continue # Get shape of gt box gt_box = torch.FloatTensor(np.array([0, 0, gw, gh])).unsqueeze(0) # Get shape of anchor box anchor_shapes = torch.FloatTensor( np.concatenate((np.zeros( (self.num_anchors, 2)), np.array(anchors)), 1)) # Calculate iou between gt and anchor shapes anch_ious = bbox_iou(gt_box, anchor_shapes) # If the overlap is larger than threshold set mask to zero (ignore) noobj_mask[b, anch_ious > ignore_threshold, gj, gi] = 0 # Find the best matching anchor box best_n = np.argmax(anch_ious) # Masks obj_mask[b, best_n, gj, gi] = 1 # Coordinates tx[b, best_n, gj, gi] = gx - gi ty[b, best_n, gj, gi] = gy - gj # Width and height tw[b, best_n, gj, gi] = math.log(gw / anchors[best_n][0] + 1e-16) th[b, best_n, gj, gi] = math.log(gh / anchors[best_n][1] + 1e-16) # object tconf[b, best_n, gj, gi] = 1 # One-hot encoding of label tcls[b, best_n, gj, gi, int(target[b, t, 0])] = 1 return obj_mask, noobj_mask, tx, ty, tw, th, tconf, tcls
def get_target(self, target, anchors, in_w, in_h): bs = target.size(0) mask = torch.zeros(bs, self.num_anchors, in_h, in_w, requires_grad=False) tx = torch.zeros(bs, self.num_anchors, in_h, in_w, requires_grad=False) ty = torch.zeros(bs, self.num_anchors, in_h, in_w, requires_grad=False) tw = torch.zeros(bs, self.num_anchors, in_h, in_w, requires_grad=False) th = torch.zeros(bs, self.num_anchors, in_h, in_w, requires_grad=False) tconf = torch.zeros(bs, self.num_anchors, in_h, in_w, requires_grad=False) tcls = torch.zeros(bs, self.num_anchors, in_h, in_w, self.num_classes, requires_grad=False) for b in range(bs): for t in range(target.shape[1]): if target[b, t].sum() == 0: continue # Convert to position relative to box gx = target[b, t, 1] * in_w gy = target[b, t, 2] * in_h gw = target[b, t, 3] * in_w gh = target[b, t, 4] * in_h # Get grid box indices gi = int(gx) gj = int(gy) # Get shape of gt box gt_box = torch.FloatTensor(np.array([0, 0, gw, gh])).unsqueeze(0) # Get shape of anchor box anchor_shapes = torch.FloatTensor( np.concatenate((np.zeros( (self.num_anchors, 2)), np.array(anchors)), 1)) # Calculate iou between gt and anchor shape anch_ious = bbox_iou(gt_box, anchor_shapes) # Find the best matching anchor box best_n = np.argmax(anch_ious) # Masks if (gj < in_h) and (gi < in_w): mask[b, best_n, gj, gi] = 1 # Coordinates tx[b, best_n, gj, gi] = gx - gi ty[b, best_n, gj, gi] = gy - gj # Width and height tw[b, best_n, gj, gi] = math.log(gw / anchors[best_n][0] + 1e-16) th[b, best_n, gj, gi] = math.log(gh / anchors[best_n][1] + 1e-16) # object tconf[b, best_n, gj, gi] = 1 # One-hot encoding of label tcls[b, best_n, gj, gi, int(target[b, t, 0])] = 1 else: print('Step {0} out of bound'.format(b)) print('gj: {0}, height: {1} | gi: {2}, width: {3}'.format( gj, in_h, gi, in_w)) continue return mask, tx, ty, tw, th, tconf, tcls
def eval(config): """ :param config: :return: """ is_training = False # Load and initialize network # net = ProposalModel(config, is_training=is_training) net = ProposalAttention(config, is_training=is_training) # Set data parallel net = nn.DataParallel(net) net = net.cuda() # Restore pretrain model if config["pretrain_snapshot"]: state_dict = torch.load(config["pretrain_snapshot"]) net.load_state_dict(state_dict) # YOLO loss with 3 scales val_losses = [] for i in range(3): val_losses.append( ProposalLoss(config["yolo"]["anchors"][i], (config["img_w"], config["img_h"]))) # DataLoader val_loader = torch.utils.data.DataLoader( COCOvalDataset(config["val_path"], (config["img_w"], config["img_h"])), batch_size=16, # set batch size by 1 shuffle=False, num_workers=2, pin_memory=False) """ VALIDATION """ total = 0.0 proposal = 0.0 correct = 0.0 net.eval() img_cnt = 0 recall_cnt = 0.0 for step, samples in enumerate(val_loader): images, labels = samples["image"], samples["label"] with torch.no_grad(): outputs = net(images) output_list = [] for i in range(3): output_list.append(val_losses[i](outputs[i])) output = torch.cat(output_list, 1) batch_detections = non_max_suppression(output) # one image at a time !!! for label_i in range(labels.size(0)): total_avg = 0 correct_avg = 0 # calculate total targets = labels[label_i] for tx, ty, tw, th in targets: if tw > 0: total += 1 total_avg += 1 else: continue # calculate proposal if batch_detections[label_i] is None: continue img_cnt += 1 predictions = batch_detections[label_i] proposal += predictions.size(0) # calculate correct for tx, ty, tw, th in targets: x1, x2 = config["img_w"] * (tx - tw / 2.0), config["img_w"] * ( tx + tw / 2.0) y1, y2 = config["img_h"] * (ty - th / 2.0), config["img_h"] * ( ty + th / 2.0) box_gt = [x1, y1, x2, y2, 1.0] box_gt = torch.from_numpy(np.array(box_gt)).float().cuda() best_iou = 0.0 for pred_i in range(predictions.size(0)): iou = bbox_iou(predictions[pred_i].unsqueeze(0), box_gt.unsqueeze(0)) iou = iou.item() best_iou = max(iou, best_iou) if best_iou >= 0.5: correct += 1 correct_avg += 1 recall_cnt += float(correct_avg / float(total_avg)) if (step + 1) % 100 == 0: print 'Total: %d\tProposal: %d\tCorrect: %d\tPrecision: %.4f\tRecall: %.4f' % ( total, proposal, correct, correct / (proposal + 1e-6), correct / (total + 1e-6)) precision = correct / (proposal + 1e-6) recall = correct / (total + 1e-6) fscore = (2.0 * precision * recall) / (precision + recall + 1e-6) print("Precision: %.4f\tRecall: %.4f\tFscore: %.4f" % (precision, recall, fscore)) print("Avg Recall: %.4f" % (recall_cnt / float(img_cnt + 1e-6)))
def GetIOU(bbox1, bbox2): from common.utils import bbox_iou b1 = torch.Tensor(bbox1).unsqueeze(0) b2 = torch.Tensor(bbox2).unsqueeze(0) iou = bbox_iou(b1, b2, x1y1x2y2=True) return iou.item()
def evaluate(config): is_training = False # Load and initialize network net = ModelMain(config, is_training=is_training) net.train(is_training) # Set data parallel net = nn.DataParallel(net) net = net.cuda() # Restore pretrain model if config["pretrain_snapshot"]: state_dict = torch.load(config["pretrain_snapshot"]) net.load_state_dict(state_dict) else: logging.warning("missing pretrain_snapshot!!!") # YOLO loss with 3 scales yolo_losses = [] for i in range(3): yolo_losses.append(YOLOLoss(config["yolo"]["anchors"][i], config["yolo"]["classes"], (config["img_w"], config["img_h"]))) # DataLoader dataloader = torch.utils.data.DataLoader(dataset=COCODataset(config["test_path"], config["img_w"]), batch_size=config["batch_size"], shuffle=False, num_workers=8, pin_memory=False) # Start the eval loop #logging.info("Start eval.") n_gt = 0 correct = 0 #logging.debug('%s' % str(dataloader)) gt_histro={} pred_histro = {} correct_histro = {} for i in range(config["yolo"]["classes"]): gt_histro[i] = 1 pred_histro[i] = 1 correct_histro[i] = 0 # images 是一个batch里的全部图片,labels是一个batch里面的全部标签 for step, (images, labels) in enumerate(dataloader): labels = labels.cuda() with torch.no_grad(): outputs = net(images) output_list = [] for i in range(3): output_list.append(yolo_losses[i](outputs[i])) # 把三个尺度上的预测结果在第1维度(第0维度是batch里的照片,第1维度是一张照片里面的各个预测框,第2维度是各个预测数值)上拼接起来 output = torch.cat(output_list, dim=1) #logging.info('%s' % str(output.shape)) # 进行NMS抑制 #output = non_max_suppression(prediction=output, num_classes=config["yolo"]["classes"], conf_thres=config["conf_thresh"], nms_thres=config["nms_thresh"]) output = class_nms(prediction=output, num_classes=config["yolo"]["classes"],conf_thres=config["conf_thresh"], nms_thres=config["nms_thresh"]) # calculate for sample_i in range(labels.size(0)): # 计算所有的预测数量 sample_pred = output[sample_i] if sample_pred is not None: #logging.debug(sample_pred.shape) for i in range(sample_pred.shape[0]): pred_histro[int(sample_pred[i,6])] += 1 # Get labels for sample where width is not zero (dummies) target_sample = labels[sample_i, labels[sample_i, :, 3] != 0] # Ground truth的 分类编号obj_cls、相对中心x、相对中心y、相对宽w、相对高h n_gt=0 correct=0 for obj_cls, tx, ty, tw, th in target_sample: # Get rescaled gt coordinates # 转化为输入像素尺寸的 左上角像素tx1 ty1,右下角像素tx2 ty2 tx1, tx2 = config["img_w"] * (tx - tw / 2), config["img_w"] * (tx + tw / 2) ty1, ty2 = config["img_h"] * (ty - th / 2), config["img_h"] * (ty + th / 2) # 计算ground truth数量,用于统计信息 n_gt += 1 gt_histro[int(obj_cls)] += 1 # 转化为 shape(1,4)的tensor,用来计算IoU box_gt = torch.cat([coord.unsqueeze(0) for coord in [tx1, ty1, tx2, ty2]]).view(1, -1) # logging.info('%s' % str(box_gt.shape)) sample_pred = output[sample_i] if sample_pred is not None: # Iterate through predictions where the class predicted is same as gt # 对于每一个ground truth,遍历预测结果 for x1, y1, x2, y2, conf, obj_conf, obj_pred in sample_pred[sample_pred[:, 6] == obj_cls]: # 如果当前预测分类 == 当前真实分类 #logging.info("%d" % obj_cls) box_pred = torch.cat([coord.unsqueeze(0) for coord in [x1, y1, x2, y2]]).view(1, -1) #pred_histro[int(obj_pred)] += 1 iou = bbox_iou(box_pred, box_gt) #if iou >= config["iou_thres"] and obj_conf >= config["obj_thresh"]: if iou >= config["iou_thresh"]: correct += 1 correct_histro[int(obj_pred)] += 1 break #logging.debug("----------------") #logging.debug(correct_histro[4]) #logging.debug(pred_histro[4]) #logging.debug(gt_histro[4]) if n_gt: types = config["types"] reverse_types = {} # 建立一个反向的types for key in types.keys(): reverse_types[types[key]] = key #logging.info('Batch [%d/%d] mAP: %.5f' % (step, len(dataloader), float(correct / n_gt))) logging.info('Precision:%s' % str([reverse_types[i] +':'+ str(int(100 * correct_histro[i] / pred_histro[i])) for i in range(config["yolo"]["classes"]) ])) logging.info('Recall :%s' % str([reverse_types[i] +':'+ str(int(100 * correct_histro[i] / gt_histro[i])) for i in range(config["yolo"]["classes"])]))
def voc(): os.environ["CUDA_VISIBLE_DEVICES"] = '1' gnd_dir = '/home/yz/cde/ProposalYOLO/data/voc/Labels' roi_dir = '/home/yz/cde/MxRCNN/roi/voc100' img_dir = '/home/yz/cde/ProposalYOLO/data/voc/JPEGImages' rois = os.listdir(roi_dir) rois.sort() gnds = os.listdir(gnd_dir) gnds.sort() assert len(rois) == len(gnds) total = 0.0 proposal = 0.0 correct = 0.0 for i in range(len(rois)): # 1 Prediction pred_boxes = np.loadtxt(os.path.join(roi_dir, rois[i])) # 2 Ground-truth cords = np.loadtxt(os.path.join(gnd_dir, gnds[i])) try: cords = cords[:, 1:] except: cords = cords[1:] cords = cords.reshape(1, cords.shape[0]) # 3 Height & Width img = os.path.join(img_dir, gnds[i].split('.')[0] + '.jpg') print img im = cv2.imread(img, cv2.IMREAD_COLOR) im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) height, width = im.shape[:2] gt_boxes = np.zeros(cords.shape) gt_boxes[:, 0] = (cords[:, 0] - cords[:, 2] / 2) * width gt_boxes[:, 1] = (cords[:, 1] - cords[:, 3] / 2) * height gt_boxes[:, 2] = (cords[:, 0] + cords[:, 2] / 2) * width gt_boxes[:, 3] = (cords[:, 1] + cords[:, 3] / 2) * height if i < 10: # Debug purpose plt.figure() fig, (ax1, ax2) = plt.subplots(1, 2) ax1.imshow(im) ax2.imshow(im) for idx in range(gt_boxes.shape[0]): bbox = patches.Rectangle((gt_boxes[idx][0], gt_boxes[idx][1]), gt_boxes[idx][2] - gt_boxes[idx][0], gt_boxes[idx][3] - gt_boxes[idx][1], linewidth=2, edgecolor='blue', facecolor='none') ax1.add_patch(bbox) for idx in range(pred_boxes.shape[0]): bbox = patches.Rectangle((pred_boxes[idx][0], pred_boxes[idx][1]), pred_boxes[idx][2] - pred_boxes[idx][0], pred_boxes[idx][3] - pred_boxes[idx][1], linewidth=2, edgecolor='red', facecolor='none') ax2.add_patch(bbox) ax1.axis('off') ax2.axis('off') # plt.gca().xaxis.set_major_locator(NullLocator()) # plt.gca().yaxis.set_major_locator(NullLocator()) plt.savefig('/home/yz/cde/ProposalYOLO/eval/RPN/voc100/{}'.format(gnds[i].split('.')[0]), bbox_inches='tight', pad_inches=0.0) plt.close() total += gt_boxes.shape[0] proposal += pred_boxes.shape[0] for j in range(gt_boxes.shape[0]): best_iou = 0.0 for k in range(pred_boxes.shape[0]): # print gt_boxes[j], pred_boxes[k] gt = torch.from_numpy(gt_boxes[j]).float().cuda() pd = torch.from_numpy(pred_boxes[k]).float().cuda() iou = bbox_iou(pd.unsqueeze(0), gt.unsqueeze(0)) iou = iou.item() best_iou = max(iou, best_iou) if best_iou >= 0.5: correct += 1 print total, proposal, correct, correct / total precision = correct / proposal recall = correct / total fscore = (2.0 * precision * recall) / (precision + recall) print("Precision: %.4f\tRecall: %.4f\tFscore: %.4f" % (precision, recall, fscore))
def coco(): os.environ["CUDA_VISIBLE_DEVICES"] = '4' gnd_dir = '/home/yz/cde/ProposalYOLO/data/coco/5kxLabels' roi_dir = '/home/yz/cde/MxRCNN/roi/coco10' img_dir = '/home/yz/cde/ProposalYOLO/data/coco/images/val2014' fn = '/home/yz/cde/ProposalYOLO/data/coco/5kx.txt' rois = os.listdir(roi_dir) rois.sort() gnds = os.listdir(gnd_dir) gnds.sort() assert len(rois) == len(gnds) total = 0.0 proposal = 0.0 correct = 0.0 fn_strm = open(fn, 'r') for i in range(len(rois)): # 0 Name line = fn_strm.readline() name = line.split('\n')[0] # 1 Prediction pred_boxes = np.loadtxt(os.path.join(roi_dir, rois[i])) # 2 Ground-truth cords = np.loadtxt(os.path.join(gnd_dir, name.replace('jpg', 'txt'))) try: cords = cords[:, 0:] except: cords = cords[0:] cords = cords.reshape(1, cords.shape[0]) # 3 Height & Width img = os.path.join(img_dir, name) print img im = cv2.imread(img, cv2.IMREAD_COLOR) im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) height, width = im.shape[:2] gt_boxes = cords if gt_boxes.shape == (1, 0): continue if i < 10: # Debug purpose plt.figure() fig, (ax1, ax2) = plt.subplots(1, 2) ax1.imshow(im) ax2.imshow(im) for idx in range(gt_boxes.shape[0]): bbox = patches.Rectangle((gt_boxes[idx][0], gt_boxes[idx][1]), gt_boxes[idx][2] - gt_boxes[idx][0], gt_boxes[idx][3] - gt_boxes[idx][1], linewidth=2, edgecolor='blue', facecolor='none') ax1.add_patch(bbox) for idx in range(pred_boxes.shape[0]): bbox = patches.Rectangle((pred_boxes[idx][0], pred_boxes[idx][1]), pred_boxes[idx][2] - pred_boxes[idx][0], pred_boxes[idx][3] - pred_boxes[idx][1], linewidth=1, edgecolor='red', facecolor='none') ax2.add_patch(bbox) ax1.axis('off') ax2.axis('off') # plt.gca().xaxis.set_major_locator(NullLocator()) # plt.gca().yaxis.set_major_locator(NullLocator()) plt.savefig('/home/yz/cde/ProposalYOLO/eval/RPN/test10/{}'.format(gnds[i].split('.')[0]), bbox_inches='tight', pad_inches=0.0) plt.close() # continue total += gt_boxes.shape[0] proposal += pred_boxes.shape[0] for j in range(gt_boxes.shape[0]): best_iou = 0.0 for k in range(pred_boxes.shape[0]): # print gt_boxes[j], pred_boxes[k] gt = torch.from_numpy(gt_boxes[j]).float().cuda() pd = torch.from_numpy(pred_boxes[k]).float().cuda() gt = gt.unsqueeze(0) pd = pd.unsqueeze(0) iou = bbox_iou(pd, gt) iou = iou.item() best_iou = max(iou, best_iou) if best_iou >= 0.5: correct += 1 print total, proposal, correct, correct / total precision = correct / proposal recall = correct / total fscore = (2.0 * precision * recall) / (precision + recall) print("Precision: %.4f\tRecall: %.4f\tFscore: %.4f" % (precision, recall, fscore))
def evaluate(config): is_training = False # Load and initialize network net = ModelMain(config, is_training=is_training) net.train(is_training) # Set data parallel net = nn.DataParallel(net) net = net.cuda() # Restore pretrain model if config["pretrain_snapshot"]: state_dict = torch.load(config["pretrain_snapshot"]) net.load_state_dict(state_dict) else: logging.warning("missing pretrain_snapshot!!!") # YOLO loss with 3 scales yolo_losses = [] for i in range(3): yolo_losses.append(YOLOLoss(config["yolo"]["anchors"][i], config["yolo"]["classes"], (config["img_w"], config["img_h"]))) # DataLoader dataloader = torch.utils.data.DataLoader(dataset=COCODataset(config["val_path"], config["img_w"]), batch_size=config["batch_size"], shuffle=True, num_workers=1, pin_memory=False) # Start the eval loop logging.info("Start eval.") n_gt = 0 correct = 0 logging.info('%s' % str(dataloader)) gt_histro={} pred_histro = {} correct_histro = {} for i in range(config["yolo"]["classes"]): gt_histro[i] = 1 pred_histro[i] = 1 correct_histro[i] = 0 # images 是一个batch里的全部图片,labels是一个batch里面的全部标签 for step, (images, labels) in enumerate(dataloader): labels = labels.cuda() with torch.no_grad(): outputs = net(images) output_list = [] for i in range(3): output_list.append(yolo_losses[i](outputs[i])) # 把三个尺度上的预测结果在第1维度(第0维度是batch里的照片,第1维度是一张照片里面的各个预测框,第2维度是各个预测数值)上拼接起来 batch_output = torch.cat(output_list, dim=1) logging.info('%s' % str(batch_output.shape)) # 进行NMS抑制 batch_output = non_max_suppression(prediction=batch_output, num_classes=config["yolo"]["classes"], conf_thres=config["conf_thresh"], nms_thres=config["nms_thresh"]) # calculate for sample_index_in_batch in range(labels.size(0)): # fetched img sample in tensor( C(RxGxB) x H x W ), transform to cv2 format in H x W x C(BxGxR) sample_image = images[sample_index_in_batch].numpy() sample_image = np.transpose(sample_image, (1, 2, 0)) sample_image = cv2.cvtColor(sample_image, cv2.COLOR_RGB2BGR) logging.debug("fetched img %d size %s" % (sample_index_in_batch, sample_image.shape)) # Get labels for sample where width is not zero (dummies)(init all labels to zeros in array) target_sample = labels[sample_index_in_batch, labels[sample_index_in_batch, :, 3] != 0] # get prediction for this sample sample_pred = batch_output[sample_index_in_batch] if sample_pred is not None: for x1, y1, x2, y2, conf, obj_conf, obj_pred in sample_pred: # for each prediction box # logging.info("%d" % obj_cls) box_pred = torch.cat([coord.unsqueeze(0) for coord in [x1, y1, x2, y2]]).view(1, -1) sample_image = draw_prediction(sample_image,conf, obj_conf, int(obj_pred), (x1, y1, x2, y2), config) # 每一个ground truth的 分类编号obj_cls、相对中心x、相对中心y、相对宽w、相对高h for obj_cls, tx, ty, tw, th in target_sample: # Get rescaled gt coordinates # 转化为输入像素尺寸的 左上角像素tx1 ty1,右下角像素tx2 ty2 tx1, tx2 = config["img_w"] * (tx - tw / 2), config["img_w"] * (tx + tw / 2) ty1, ty2 = config["img_h"] * (ty - th / 2), config["img_h"] * (ty + th / 2) # 计算ground truth数量,用于统计信息 n_gt += 1 gt_histro[int(obj_cls)] += 1 # 转化为 shape(1,4)的tensor,用来计算IoU box_gt = torch.cat([coord.unsqueeze(0) for coord in [tx1, ty1, tx2, ty2]]).view(1, -1) # logging.info('%s' % str(box_gt.shape)) sample_pred = batch_output[sample_index_in_batch] if sample_pred is not None: # Iterate through predictions where the class predicted is same as gt # 对于每一个ground truth,遍历预测结果 for x1, y1, x2, y2, conf, obj_conf, obj_pred in sample_pred[sample_pred[:, 6] == obj_cls]: # 如果当前预测分类 == 当前真实分类 #logging.info("%d" % obj_cls) box_pred = torch.cat([coord.unsqueeze(0) for coord in [x1, y1, x2, y2]]).view(1, -1) pred_histro[int(obj_pred)] += 1 iou = bbox_iou(box_pred, box_gt) if iou >= config["iou_thresh"]: correct += 1 correct_histro[int(obj_pred)] += 1 break if n_gt: types = config["types"] reverse_types = {} # 建立一个反向的types for key in types.keys(): reverse_types[types[key]] = key logging.info('Batch [%d/%d] mAP: %.5f' % (step, len(dataloader), float(correct / n_gt))) logging.info('mAP Histro:%s' % str([ reverse_types[i] +':'+ str(int(100 * correct_histro[i] / gt_histro[i])) for i in range(config["yolo"]["classes"] ) ])) logging.info('Recall His:%s' % str([ reverse_types[i] +':'+ str(int(100 * correct_histro[i] / pred_histro[i])) for i in range(config["yolo"]["classes"]) ])) logging.info('Mean Average Precision: %.5f' % float(correct / n_gt))
def evaluate(config): # checkpoint_paths = {'58': r'\\192.168.25.58\Team-CV\checkpoints\torch_yolov3'} checkpoint_paths = {'39': r'F:\Team-CV\checkpoints\shuffle_v2/'} # checkpoint_paths = {'68': r'E:\github\YOLOv3_PyTorch\evaluate\weights'} post_weights = {k: 0 for k in checkpoint_paths.keys()} weight_index = {k: 0 for k in checkpoint_paths.keys()} time_inter = 10 dataloader = torch.utils.data.DataLoader(COCODataset( config["train_path"], (config["img_w"], config["img_h"]), is_training=False, is_scene=True), batch_size=config["batch_size"], shuffle=False, num_workers=0, pin_memory=False, drop_last=True) # DataLoader net, yolo_losses = build_yolov3(config) while 1: for key, checkpoint_path in checkpoint_paths.items(): os.makedirs(checkpoint_path + '/result', exist_ok=True) checkpoint_weights = os.listdir(checkpoint_path) checkpoint_result = os.listdir(checkpoint_path + '/result') checkpoint_result = [ cweight.split("_")[2][:-4] for cweight in checkpoint_result if cweight.endswith('ini') ] checkpoint_weights = [ cweight for cweight in checkpoint_weights if cweight.endswith('weights') ] if weight_index[key] >= len(checkpoint_weights): print('weight_index[key]', weight_index[key], len(checkpoint_weights)) time.sleep(time_inter) continue if post_weights[key] == checkpoint_weights[weight_index[key]]: print('post_weights[key]', post_weights[key]) time.sleep(time_inter) continue post_weights[key] = checkpoint_weights[weight_index[key]] if post_weights[key].endswith("_.weights"): #检查权重是否保存完 print("post_weights[key].split('_')", post_weights[key].split('_')) time.sleep(time_inter) continue if checkpoint_weights[weight_index[key]].split( "_")[1][:-8] in checkpoint_result: print('weight_index[key] +', weight_index[key]) weight_index[key] += 1 time.sleep(time_inter // 20) continue weight_index[key] += 1 try: if config["pretrain_snapshot"]: # Restore pretrain model state_dict = torch.load(config["pretrain_snapshot"]) logging.info("loading model from %s" % config["pretrain_snapshot"]) net.load_state_dict(state_dict) else: state_dict = torch.load( os.path.join(checkpoint_path, post_weights[key])) logging.info( "loading model from %s" % os.path.join(checkpoint_path, post_weights[key])) net.load_state_dict(state_dict) except Exception as E: print(E) time.sleep(time_inter) continue logging.info("Start eval.") # Start the eval loop n_gt = 0 correct = 0 imagepath_list = [] for step, samples in enumerate(dataloader): images, labels, image_paths = samples["image"], samples[ "label"], samples["img_path"] labels = labels.cuda() with torch.no_grad(): time1 = datetime.datetime.now() outputs = net(images) output_list = [] for i in range(3): output_list.append(yolo_losses[i](outputs[i])) output = torch.cat(output_list, 1) output = non_max_suppression(output, 1, conf_thres=0.5) if ((datetime.datetime.now() - time1).seconds > 5): logging.info('Batch %d time is too long ' % (step)) n_gt = 1 break print( "time2", (datetime.datetime.now() - time1).seconds * 1000 + (datetime.datetime.now() - time1).microseconds // 1000) # calculate for sample_i in range(labels.size(0)): # Get labels for sample where width is not zero (dummies) target_sample = labels[sample_i, labels[sample_i, :, 3] != 0] for obj_cls, tx, ty, tw, th in target_sample: # Get rescaled gt coordinates tx1, tx2 = config["img_w"] * ( tx - tw / 2), config["img_w"] * (tx + tw / 2) ty1, ty2 = config["img_h"] * ( ty - th / 2), config["img_h"] * (ty + th / 2) n_gt += 1 box_gt = torch.cat([ coord.unsqueeze(0) for coord in [tx1, ty1, tx2, ty2] ]).view(1, -1) sample_pred = output[sample_i] if sample_pred is not None: # Iterate through predictions where the class predicted is same as gt for x1, y1, x2, y2, conf, obj_conf, obj_pred in sample_pred[ sample_pred[:, 6] == obj_cls.cuda()]: box_pred = torch.cat([ coord.unsqueeze(0) for coord in [x1, y1, x2, y2] ]).view(1, -1) iou = bbox_iou(box_pred, box_gt) if iou >= config["iou_thres"]: correct += 1 break else: if image_paths[ sample_i] not in imagepath_list: imagepath_list.append( image_paths[sample_i]) else: if image_paths[sample_i] not in imagepath_list: imagepath_list.append( image_paths[sample_i]) if n_gt: logging.info('Batch [%d/%d] err_count:%d mAP: %.5f' % (step, len(dataloader), len(imagepath_list), float(correct / n_gt))) logging.info('Mean Average Precision: %.5f' % float(correct / n_gt)) Mean_Average = float(correct / n_gt) ini_name = os.path.join( checkpoint_path + '/result/', '%.4f_%s.ini' % ((float(post_weights[key].split("_")[0]) + float(correct / n_gt)) / 2, post_weights[key].replace(".weights", ""))) write_ini(ini_name, Mean_Average, imagepath_list) break
def get_target(self, target, anchors, in_w, in_h, ignore_threshold, pred_boxes): bs = target.size(0) mask = torch.zeros(bs, self.num_anchors, in_h, in_w, requires_grad=False) noobj_mask = torch.ones(bs, self.num_anchors, in_h, in_w, requires_grad=False) tx = torch.zeros(bs, self.num_anchors, in_h, in_w, requires_grad=False) ty = torch.zeros(bs, self.num_anchors, in_h, in_w, requires_grad=False) tw = torch.zeros(bs, self.num_anchors, in_h, in_w, requires_grad=False) th = torch.zeros(bs, self.num_anchors, in_h, in_w, requires_grad=False) gwxh = torch.zeros(bs, self.num_anchors, in_h, in_w, requires_grad=False) tconf = torch.zeros(bs, self.num_anchors, in_h, in_w, requires_grad=False) tcls = torch.zeros(bs, self.num_anchors, in_h, in_w, self.num_classes, requires_grad=False) for b in range(bs): # print(pred_shapes.size()) for t in range(target.shape[1]): if target[b, t].sum() == 0: continue # Convert to position relative to box gx = target[b, t, 1] * in_w gy = target[b, t, 2] * in_h gw = target[b, t, 3] * in_w gh = target[b, t, 4] * in_h # Get grid box indices gi = int(gx) gj = int(gy) # Get shape of gt box gt_box = torch.FloatTensor([0, 0, gw, gh]).unsqueeze(0) # Get shape of anchor box anchor_shapes = torch.FloatTensor( np.concatenate((np.zeros( (self.num_anchors, 2)), np.array(anchors)), 1)) # print('anchor_shapes:', anchor_shapes) # print('gt_box:', gt_box) # Calculate iou between gt and anchor shapes anch_ious = bbox_iou(gt_box, anchor_shapes, x1y1x2y2=False) # Where the overlap is larger than threshold set mask to zero (ignore) pred_ious = bbox_iou(torch.FloatTensor([gx, gy, gw, gh]).unsqueeze(0), pred_boxes[b, :, gj, gi].cpu(), x1y1x2y2=False) # print(pred_ious.size()) noobj_mask[b, pred_ious > ignore_threshold, gj, gi] = 0 # noobj_mask[b, anch_ious > ignore_threshold, gj, gi] = 0 # Find the best matching anchor box best_n = np.argmax(anch_ious) # Masks mask[b, best_n, gj, gi] = 1 # Coordinates tx[b, best_n, gj, gi] = gx - gi ty[b, best_n, gj, gi] = gy - gj # Width and height tw[b, best_n, gj, gi] = math.log(gw / anchors[best_n][0] + 1e-16) th[b, best_n, gj, gi] = math.log(gh / anchors[best_n][1] + 1e-16) gwxh[b, best_n, gj, gi] = torch.sigmoid(gw) * torch.sigmoid(gh) # object tconf[b, best_n, gj, gi] = 1 # One-hot encoding of label tcls[b, best_n, gj, gi, int(target[b, t, 0])] = 1 return mask, noobj_mask, tx, ty, tw, th, gwxh, tconf, tcls
def eval_voc(self, val_dataset, classes, iou_thresh=0.5): logging.info('Start Evaling') results = {} def voc_ap(rec, prec, use_07_metric=False): """ ap = voc_ap(rec, prec, [use_07_metric]) Compute VOC AP given precision and recall. If use_07_metric is true, uses the VOC 07 11 point method (default:False). """ _rec = np.arange(0., 1.1, 0.1) _prec = [] if use_07_metric: # 11 point metric ap = 0. for t in np.arange(0., 1.1, 0.1): if np.sum(rec >= t) == 0: p = 0 else: p = np.max(prec[rec >= t]) _prec.append(p) ap = ap + p / 11. else: # correct AP calculation # first append sentinel values at the end mrec = np.concatenate(([0.], rec, [1.])) mpre = np.concatenate(([0.], prec, [0.])) # compute the precision envelope for i in range(mpre.size - 1, 0, -1): mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i]) # to calculate area under PR curve, look for points # where X axis (recall) changes value i = np.where(mrec[1:] != mrec[:-1])[0] # and sum (\Delta recall) * prec ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) return ap def caculate_ap(correct, conf, pred_cls, total, classes): correct, conf, pred_cls = np.array(correct), np.array( conf), np.array(pred_cls) index = np.argsort(-conf) correct, conf, pred_cls = correct[index], conf[index], pred_cls[ index] ap = [] AP = {} for i, c in enumerate(classes): k = pred_cls == i n_gt = total[c] n_p = sum(k) if n_gt == 0 and n_p == 0: continue elif n_p == 0 or n_gt == 0: ap.append(0) AP[c] = 0 else: fpc = np.cumsum(1 - correct[k]) tpc = np.cumsum(correct[k]) rec = tpc / n_gt prec = tpc / (tpc + fpc) _ap = voc_ap(rec, prec) ap.append(_ap) AP[c] = _ap mAP = np.array(ap).mean() return mAP, AP def parse_rec(imagename, classes): filename = imagename.replace('jpg', 'xml') tree = ET.parse(filename) objects = [] for obj in tree.findall('object'): difficult = obj.find('difficult').text cls = obj.find('name').text if cls not in classes or int(difficult) == 1: continue cls_id = classes.index(cls) xmlbox = obj.find('bndbox') obj = [ float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text), cls_id ] objects.append(obj) return np.asarray(objects) total = {} for cls in classes: total[cls] = 0 correct = [] conf_list = [] pred_list = [] for step, samples in enumerate(val_dataset): images, labels = samples['image'], samples['label'] image_paths, origin_sizes = samples['image_path'], samples[ 'origin_size'] logging.info("Now have finished [%.3d/%.3d]" % (step, len(val_dataset))) with torch.no_grad(): outputs = self.net(images) output_list = [] for i in range(3): output_list.append(self.yolo_loss[i](outputs[i])) output = torch.cat(output_list, 1) batch_detections = non_max_suppression(output, self.config.num_classes, conf_thres=0.001, nms_thres=0.4) for idx, detections in enumerate(batch_detections): image_path = image_paths[idx] label = labels[idx] for t in range(label.size(0)): if label[t, :].sum() == 0: label = label[:t, :] break label_cls = np.array(label[:, 0]) for cls_id in label_cls: total[classes[int(cls_id)]] += 1 if detections is None: if label.size(0) != 0: label_cls = np.unique(label_cls) for cls_id in label_cls: correct.append(0) conf_list.append(1) pred_list.append(int(cls_id)) continue if label.size(0) == 0: for *pred_box, conf, cls_conf, cls_pred in detections: correct.append(0) conf_list.append(conf) pred_list.append(int(cls_pred)) else: detections = detections[np.argsort(-detections[:, 4])] detected = [] for *pred_box, conf, cls_conf, cls_pred in detections: pred_box = torch.FloatTensor(pred_box).view(1, -1) pred_box[:, 2:] = pred_box[:, 2:] - pred_box[:, :2] pred_box[:, :2] = pred_box[:, :2] + pred_box[:, 2:] / 2 pred_box = pred_box / self.config.image_size ious = bbox_iou(pred_box, label[:, 1:]) best_i = np.argmax(ious) if ious[best_i] > iou_thresh and int(cls_pred) == int( label[best_i, 0]) and best_i not in detected: correct.append(1) detected.append(best_i) else: correct.append(0) pred_list.append(int(cls_pred)) conf_list.append(float(conf)) results['correct'] = correct results['conf'] = conf_list results['pred_cls'] = pred_list results['total'] = total with open('results.json', 'w') as f: json.dump(results, f) logging.info('Having saved to results.json') logging.info('Begin calculating....') with open('results.json', 'r') as result_file: results = json.load(result_file) mAP, AP_class = caculate_ap(correct=results['correct'], conf=results['conf'], pred_cls=results['pred_cls'], total=results['total'], classes=classes) logging.info('mAP(IoU=0.5):{:.1f}'.format(mAP * 100))
def get_target(self, pred_boxes, target, anchors, in_w, in_h, ignore_threshold): bs = target.size(0) # tx = torch.zeros(bs, self.num_anchors, in_h, in_w, requires_grad=False) # ty = torch.zeros(bs, self.num_anchors, in_h, in_w, requires_grad=False) # tw = torch.zeros(bs, self.num_anchors, in_h, in_w, requires_grad=False) # th = torch.zeros(bs, self.num_anchors, in_h, in_w, requires_grad=False) # tconf = torch.zeros(bs, self.num_anchors, in_h, in_w, requires_grad=False) # tcls = torch.zeros(bs, self.num_anchors, in_h, in_w, self.num_classes, requires_grad=False) nGT = 0 nCorrect = 0 for b in range(bs): for t in range(target.shape[1]): if target[b, t].sum() == 0: continue nGT += 1 # Convert to position relative to box gx = target[b, t, 1] * in_w gy = target[b, t, 2] * in_h gw = target[b, t, 3] * in_w gh = target[b, t, 4] * in_h # Get grid box indices gi = int(gx) gj = int(gy) # Get shape of gt box gt_box = torch.FloatTensor(np.array([0, 0, gw, gh])).unsqueeze(0) # Get shape of anchor box anchor_shapes = torch.FloatTensor( np.concatenate((np.zeros( (self.num_anchors, 2)), np.array(anchors)), 1)) # Calculate iou between gt and anchor shapes anch_ious = bbox_iou(gt_box, anchor_shapes) # Where the overlap is larger than threshold set mask to zero (ignore) self.noobj_mask[b, anch_ious > ignore_threshold, gj, gi] = 0 # Find the best matching anchor box best_n = np.argmax(anch_ious) if gi >= pred_boxes.shape[3]: print(pred_boxes.shape, b, best_n, gj, gi) gi = pred_boxes.shape[3] - 1 if gj >= pred_boxes.shape[2]: print(pred_boxes.shape, b, best_n, gj, gi) gj = pred_boxes.shape[2] - 1 gt_box = torch.FloatTensor(np.array([gx, gy, gw, gh])).unsqueeze(0) pred_box = pred_boxes[b, best_n, gj, gi].unsqueeze(0) # Masks self.conf_mask[b, best_n, gj, gi] = 1 # Coordinates self.tx[b, best_n, gj, gi] = gx - gi self.ty[b, best_n, gj, gi] = gy - gj # Width and height self.tw[b, best_n, gj, gi] = math.log(gw / anchors[best_n][0] + 1e-16) self.th[b, best_n, gj, gi] = math.log(gh / anchors[best_n][1] + 1e-16) # object self.tconf[b, best_n, gj, gi] = 1 # One-hot encoding of label self.tcls[b, best_n, gj, gi, int(target[b, t, 0])] = 1 iou = bbox_iou(gt_box, pred_box, x1y1x2y2=False) if iou > 0.8: nCorrect = nCorrect + 1 return nGT, nCorrect