def test_calc_iou(self): a1 = torch.zeros(3, 4) a1[:, :2] = 0 a1[:, 2:] = 100 b1 = torch.zeros(4, 4) b1[:, :2] = 0 b1[:, 2:] = 100 c1 = torch.zeros(3, 4) c1[:, :] = 1 a2 = torch.zeros(3, 4) a2[:, :2] = 0 a2[:, 2:] = 99 b2 = torch.zeros(4, 4) b2[:, :2] = 0 b2[:, 2:] = 49 c2 = torch.zeros(3, 4) c2[:, :] = 0.25 a3 = torch.zeros(3, 4) a3[:, :2] = 0 a3[:, 2:] = 99 b3 = torch.zeros(4, 4) b3[:, :2] = 0 b3[:, 2:] = 199 c3 = torch.zeros(3, 4) c3[:, :] = 0.25 a4 = torch.zeros(3, 4) a4[:, :2] = 50 a4[:, 2:] = 149 b4 = torch.zeros(4, 4) b4[:, 0] = 100 b4[:, 1] = 0 b4[:, 2:] = 199 c4 = torch.zeros(3, 4) c4[:, :] = 0.2 a5 = torch.zeros(3, 4) a5[:1, :2] = 0 a5[:1, 2:] = 99 b5 = torch.zeros(4, 4) b5[:1, :2] = 0 b5[:1, 2:] = 99 c5 = torch.zeros(3, 4) c5[:, :1] = 1 a5[1:, :2] = 0 a5[1:, 2:] = 99 b5[1:, :2] = 0 b5[1:, 2:] = 49 c5[:, 1:] = 0.25 self.assertTrue(calc_iou(a1, b1).equal(c1)) self.assertTrue(calc_iou(a2, b2).equal(c2)) self.assertTrue(calc_iou(a3, b3).equal(c3)) self.assertTrue(calc_iou(a4, b4).equal(c4)) self.assertTrue(calc_iou(a5, b5).equal(c5))
def nms(y_pred_conf, y_pred_loc, prob): """ Non-Maximum Suppression(NMS) :param y_pred_conf: Class predictions, numpy array of shape( num_feature_map_cell * num_default_boxes :param y_pred_loc: Bounding box coordinates, numpy array of shape(num_feature_map_cell * num_default_boxes*4) :param prob: class probabilities, numpy array of shape( num_feature_map_cell * num_default_boxes) :return: a list of box coordinates post-NMS, numpy array of boxes, with shape(num_boxes, 6 ) [x1, y1, x2, y2, class,probability] """ # Keep track of boxes for each class class_boxes = {} # class -> [(x1, y1, x2, y2, prob), (...), ...] with open('signnames.csv', 'r') as f: for line in f: cls, _ = line.split(',') class_boxes[float(cls)] = [] # Go through all possible boxes and perform class-based greedy NMS (greedy based on class prediction confidence) y_idx = 0 for fm_size in FM_SIZES: fm_h, fm_w = fm_size # get feature map height and width for row in range(fm_h): for col in range(fm_w): for db in DEFAULT_BOXES: if prob[y_idx] > CONF_THRESH and y_pred_conf[y_idx] > 0: xc, yc = col + 0.5, row + 0.5 center_coords = np.array([xc, yc, xc, yc]) abs_box_coords = center_coords + y_pred_loc[y_idx * 4: y_idx * 4 + 4] # Calculate predicted box coordinates in actual image scale = np.array([IMG_W / fm_w, IMG_H / fm_h, IMG_W / fm_w, IMG_H / fm_h]) box_coords = abs_box_coords * scale box_coords = [int(round(x)) for x in box_coords] # Compare this box to all previous boxes of this class cls = y_pred_conf[y_idx] cls_prob = prob[y_idx] box = (*box_coords, cls, cls_prob) if len(class_boxes[cls]) == 0: class_boxes[cls].append(box) else: suppressed = False # did this box suppress other box(es)? overlapped = False # did this box overlap with other box(es)? for other_box in class_boxes[cls]: iou = calc_iou(box[:4], other_box[:4]) if iou > NMS_IOU_THRESH: overlapped = True # If current box has higher confidence than other box if box[5] > other_box[5]: class_boxes[cls].remove(other_box) suppressed = True if suppressed or not overlapped: class_boxes[cls].append(box) y_idx += 1 # Gather all the pruned boxes and return them boxes = [] for cls in class_boxes.keys(): for class_box in class_boxes[cls]: boxes.append(class_box) boxes = np.array(boxes) return boxes
def _generate_targets(self, proposals, gt_classes, gt_bboxes, gt_masks, mask_size=(28, 28)): """Generate Mask R-CNN targets, and corresponding rois. Args: proposals(Tensor): [N, a, (idx, x1, y1, x2, y2)], proposals from RPN, idx is batch size index. gt_classes(Tensor): [N, b], ground truth class ids. gt_bboxes(Tensor): [N, b, (x1, y1, x2, y2)], ground truth bounding boxes. gt_masks(Tensor): [(N, b, 1, H, W], ground truth masks, H and W for origin image height and width. Returns: sampled_rois(Tensor): [N, c, (idx, x1, y1, x2, y2)], proposals after sampled to feed RoIAlign. cls_targets(Variable): [(Nxc)], train targets for classification. bbox_targets(Variable): [(Nxc), (dx, dy, dw, dh)], train targets for bounding box regression, see R-CNN paper for meaning details. mask_targets(Variable): [(Nxc), 28, 28], train targets for mask prediction. Notes: N: batch_size, a: number of proposals from FRN, b: number of ground truth objects, c: number of rois to train. """ rois_sample_size = int(self.config['TRAIN']['ROIS_SAMPLE_SIZE']) rois_pos_ratio = float(self.config['TRAIN']['ROIS_POS_RATIO']) rois_pos_thresh = float(self.config['TRAIN']['ROIS_POS_THRESH']) rois_neg_thresh = float(self.config['TRAIN']['ROIS_NEG_THRESH']) batch_size = proposals.size(0) # Todo: add support to use batch_size >= 1 assert batch_size == 1, "batch_size >= 2 will add support later." # get rid of batch size dim, need change when support batch_size >= 1. proposals = proposals.squeeze(0) gt_classes = gt_classes.squeeze(0) gt_bboxes = gt_bboxes.squeeze(0) gt_masks = gt_masks.squeeze(0) iou = calc_iou(proposals[:, 1:], gt_bboxes[:, :]) max_iou, max_iou_idx_gt = torch.max(iou, dim=1) pos_index_prop = torch.nonzero(max_iou >= rois_pos_thresh).view(-1) neg_index_prop = torch.nonzero(max_iou < rois_neg_thresh).view(-1) # if pos_index_prop or neg_index_prop is empty, return an background. if pos_index_prop.numel() == 0 or neg_index_prop.numel() == 0: cls_targets = gt_classes.new([0]) bbox_targets = MaskRCNN._get_bbox_targets(proposals[:1, 1:], proposals[:1, 1:]) mask_targets = gt_masks.new(1, mask_size[0], mask_size[1]).zero_() sampled_rois = proposals[:1, :] sampled_rois = sampled_rois.view(batch_size, -1, 5) cls_targets = Variable(cls_targets, requires_grad=False) bbox_targets = Variable(bbox_targets, requires_grad=False) mask_targets = Variable(mask_targets, requires_grad=False) return sampled_rois, cls_targets, bbox_targets, mask_targets pos_index_gt = max_iou_idx_gt[pos_index_prop] assert pos_index_prop.size() == pos_index_gt.size() sample_size_pos = int(rois_pos_ratio * rois_sample_size) pos_num = pos_index_prop.size(0) neg_num = neg_index_prop.size(0) sample_size_pos = min(sample_size_pos, pos_num) # keep the ratio of positive and negative rois, if there are not enough positives. sample_size_neg = int((sample_size_pos / rois_pos_ratio) * (1 - rois_pos_ratio) + 1) sample_size_neg = min(sample_size_neg, neg_num) sample_index_pos = random.sample(range(pos_num), sample_size_pos) sample_index_neg = random.sample(range(neg_num), sample_size_neg) pos_index_sampled_prop = pos_index_prop[sample_index_pos] neg_index_sampled_prop = neg_index_prop[sample_index_neg] pos_index_sampled_gt = pos_index_gt[sample_index_pos] index_proposal = torch.cat([pos_index_sampled_prop, neg_index_sampled_prop]) sampled_rois = proposals[index_proposal, :] # targets for classification, positive rois use gt_class id, negative use 0 as background. cls_targets_pos = gt_classes[pos_index_sampled_gt] cls_targets_neg = gt_classes.new([0 for _ in range(sample_size_neg)]) cls_targets = torch.cat([cls_targets_pos, cls_targets_neg]) # bbox regression target define on define on positive proposals. bboxes = proposals[:, 1:] bbox_targets = MaskRCNN._get_bbox_targets(bboxes[pos_index_sampled_prop, :], gt_bboxes[pos_index_sampled_gt, :]) # mask targets define on positive proposals. mask_targets = MaskRCNN._get_mask_targets(bboxes[pos_index_sampled_prop, :], gt_masks[pos_index_sampled_gt, :, :], mask_size) sampled_rois = sampled_rois.view(batch_size, -1, 5) return sampled_rois, Variable(cls_targets), Variable(bbox_targets), Variable(mask_targets)
def main(): global args, logger, writer args = get_parser().parse_args() logger_train = get_logger() random.seed(20170624) logger_train.info((args)) os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu utils.mkdir(args.snapshot_dir, args.sample_dir) # setting up model model = AGSSVOS(init_atn=args.init_atn, freeze=args.freeze).cuda() model = torch.nn.DataParallel(model).cuda() model.train() for m in model.module.Encoder.modules(): if isinstance(m, nn.BatchNorm2d): m.eval() if args.set_bn_no_update: for p in m.parameters(): p.requires_grad = False if args.restore != None: assert os.path.isfile(args.restore), "no restore file found at %s" % (args.restore) logger_train.info("loading from %s" % (args.restore)) state = model.state_dict() checkpoint = torch.load(args.restore) if args.finetune: checkpoint = checkpoint['seg'] checkpoint = {k: v for k, v in checkpoint.items() if k in state} state.update(checkpoint) model.load_state_dict(state) del checkpoint torch.cuda.empty_cache() if args.finetune: flow_infer = Inference_flow(args, train_flow=True, resume=args.restore) else: flow_infer = Inference_flow(args, train_flow=True) params = [] scale_lr = [] assert args.lr_atn != args.lr_after_atn for key, value in dict(model.module.named_parameters()).items(): if args.lr_atn and ('atn' in key or 'pred2' in key or 'RF2' in key) and not args.finetune: flag = True elif args.lr_after_atn and ('atn' in key or 'pred2' in key or 'RF2' in key) and not args.finetune: flag = True else: flag = False if value.requires_grad: if flag: scale_lr.append(True) print('lrx10', key) else: scale_lr.append(False) params += [{'params':[value],'lr':args.lr*10 if flag else args.lr , 'weight_decay': 4e-5}] optimizer = torch.optim.Adam(params, lr=args.lr, weight_decay=4e-5) spec_vid = None spec_obj_ind = None trainloader = data.DataLoader( Trainset(root_data=args.root_data, json_meta_list=args.meta_list, sample_size=args.sample_size, test_mode=False, spec_vid=spec_vid, spec_obj_ind=spec_obj_ind, step=1, fix_size=False, half_size=False, random_ref=args.random_ref, random_skip=args.random_skip), batch_size=args.batch_size, shuffle=True, num_workers=1, pin_memory=True) # training tot_iter = len(trainloader) logger_train.info("Total iteration per epoch is %d" % (tot_iter)) tot_time = [] loss_set = [] iou_set = [] optimizer.zero_grad() for epoch in range(args.start_epoch, args.epoch): for i_iter, batch in enumerate(trainloader): start_time = timeit.default_timer() img, lab, ori_img = batch img = img[0].cuda().float() lab = lab[0].cuda().float() ori_img = ori_img.numpy() # img KT3HW, lab KTHW, ori_img, KTHW3# ### It may be better to move this augmentation into the dataset preprocessing ## if random.uniform(0,1)>0.5 and args.random_crop: ### random resize ### coord = [1e4,1e4,0,0] lab_agno = lab.sum(0) val_cnt = 0 for i in range(lab_agno.shape[0]): idx = torch.nonzero(lab_agno[i]>0) if idx.shape[0] == 0: continue val_cnt += 1 h0 = idx[:,0].min().item() w0 = idx[:,1].min().item() h1 = idx[:,0].max().item() w1 = idx[:,1].max().item() coord[0] = min(coord[0], h0) coord[1] = min(coord[1], w0) coord[2] = max(coord[2], h1) coord[3] = max(coord[3], w1) if val_cnt < 2: logger_train.info(('The number of frames that have label is less than 2, continue..')) continue ori_shape = lab.shape[-2:] rand_coord = [0]*4 if random.uniform(0,1) > 0.3: scale = random.uniform(0,1) else: scale = 1 rand_coord[0] = coord[0] * scale rand_coord[1] = coord[1] * scale rand_coord[2] = (ori_shape[0]-coord[2]-1)*(1-scale)+coord[2]+1 rand_coord[3] = (ori_shape[1]-coord[3]-1)*(1-scale)+coord[3]+1 for j in range(4): rand_coord[j] = int(rand_coord[j]) old_img = img.clone() old_lab = lab.clone() ori_img = torch.FloatTensor(ori_img).cuda().transpose(-1,-2).transpose(-2,-3) old_ori_img = ori_img.clone() old_lab = old_lab[:,:,rand_coord[0]:rand_coord[2]+1,rand_coord[1]:rand_coord[3]+1] lab = F.upsample(old_lab, ori_shape, mode='bilinear', align_corners=True) lab = (lab>0.5).float() for i in range(img.shape[0]): img_obj = old_img[i,:,:,rand_coord[0]:rand_coord[2]+1,rand_coord[1]:rand_coord[3]+1] img[i] = F.upsample(img_obj, ori_shape, mode='bilinear', align_corners=True) img_obj = old_ori_img[0,:,:,rand_coord[0]:rand_coord[2]+1,rand_coord[1]:rand_coord[3]+1] ori_img[0] = F.upsample(img_obj, ori_shape, mode='bilinear', align_corners=True) ori_img = ori_img.transpose(-2,-3).transpose(-1,-2).cpu().numpy().astype(np.uint8) ### end of random resize ### if lab.shape[1] == 1: logger_train.info('lab.shape[1](vid_len) == 1, continue..') continue lr = utils.lr_poly(args.lr, i_iter, tot_iter, epoch, args.epoch) utils.adjust_optim_all(optimizer, lr, scale_lr) preds = [] prev_labs = [] preds.append(lab[:,0:1].contiguous()) preds.append(lab[:,1:2].contiguous()) merge_preds_ref = lab[:,0:1].contiguous().sum(0) for i in range(2, img.shape[1], 1): ms = model.forward(img[:,0], merge_preds_ref) flow = flow_infer.infer(ori_img[0,i], ori_img[0,i-1]) prev_lab = utils.flow_warp_tensor(preds[i-1], flow) prev_labs.append(prev_lab.detach()) merge_preds = prev_lab.max(0)[0] output, _ = model.forward(img[:,i], merge_preds, prev_lab.squeeze(1), ref=ms) cur_lab = lab[:,i].contiguous() if args.loss_iou_maxmin: cur_loss = utils.loss_calc_iou(output, cur_lab.unsqueeze(1), unify=False, optim_hard=False, square=False) # try this else: cur_loss = utils.loss_calc_iou_v2(output, cur_lab.unsqueeze(1), unify=False, optim_hard=False, square=False) # try this loss_set.append(cur_loss.item()) iou = utils.calc_iou(output.data, cur_lab.long(), merge=False) iou_set.append(np.mean(iou)) optimizer.zero_grad() cur_loss.backward() optimizer.step() if args.iou_thr_per_obj: output = output.detach() new_output = torch.zeros_like(output).cuda().float() for j in range(new_output.shape[0]): if iou[j] > 0.5: new_output[j] = output[j] else: new_output[j] = lab[j:j+1,i] new_output = new_output.contiguous() preds.append(new_output.detach()) else: if np.mean(iou) > 0.5: preds.append(output.detach()) else: preds.append(cur_lab.unsqueeze(1).detach()) end_time = timeit.default_timer() tot_time.append(end_time - start_time) if i_iter % 200 == 0: logger_train.info('show at %s' % args.sample_dir) try: preds = torch.cat(preds, dim=1) prev_labs = torch.cat(prev_labs, dim=1) except Exception as e: print(e) print('Ignore.. Continue..') continue if args.show_img: show(img.data.cpu().numpy(), lab.data.cpu().numpy(), preds.data.cpu().numpy().astype(np.float), prev_labs.data.cpu().numpy().astype(np.float32)) if i_iter % 20 == 0: run_time = np.mean(tot_time) rem_time = utils.calc_remain_time(run_time, i_iter, tot_iter, epoch, args.epoch) logger_train.info('iter = %d of %d in epoch = %d of %d, remain_time = %s' % (i_iter, tot_iter, epoch, args.epoch, rem_time)) tot_time = [] logger_train.info('lr = %f, loss = %f, iou = %f' % (lr, np.mean(loss_set), np.mean(iou_set))) loss_set = [] iou_set = [] if epoch % (args.epoch//5) == 0 or epoch == args.epoch - 1: path = os.path.join(args.snapshot_dir, 'model_' + str(epoch) + '.pth') logger_train.info('save model at %s' % path) torch.save({'seg':model.state_dict(), 'flow':flow_infer.model.state_dict()}, path)
def find_gt_boxes(data_raw, image_file): """ Given (global) feature map sizes, and single training example, find all default boxes that exceed Jaccard overlap threshold :param data_raw: :param image_file: :return: y_true array that flags the matching default boxes with class ID (-1 means nothing there) """ # pre-process ground true data data = data_raw[image_file] class_labels = [] box_coords = [] # relative coordinates for obj in data: class_label = obj['class'] class_labels.append(class_label) # calculate relative coordinates # (x1, y1, x2, y2), where 1 denotes upper left corner, 2 denotes lower right corner abs_box_coords = obj['box_coords'] scale = np.array([IMG_W, IMG_H, IMG_W, IMG_H]) box_coord = np.array(abs_box_coords) / scale box_coords.append(box_coord) y_true_len = 0 for fm_size in FM_SIZES: y_true_len += fm_size[0] * fm_size[1] * NUM_DEFAULT_BOXES y_true_conf = np.zeros(y_true_len) y_true_loc = np.zeros(y_true_len * 4) # For each GT box, for each feature map, for each feature map cell, for each default box: # 1) Calculate the Jaccard overlap (IOU) and annotate the class label # 2) Count how many box matches we got # 3) If we got a match, calculate normalized box coordinates and updte y_true_loc match_counter = 0 for i, gt_box_coords in enumerate(box_coords): y_true_idx = 0 # for fm_idx, fm_size in enumerate(FM_SIZES): for fm_size in FM_SIZES: fm_h, fm_w = fm_size # feature map height and width for row in range(fm_h): for col in range(fm_w): for db in DEFAULT_BOXES: # Calculate relative box coordinates for this default box x1_offset, y1_offset, x2_offset, y2_offset = db abs_db_box_coords = np.array([ max(0, col + x1_offset), max(0, row + y1_offset), min(fm_w, col + 1 + x2_offset), min(fm_h, row + 1 + y2_offset) ]) scale = np.array([fm_w, fm_h, fm_w, fm_h]) db_box_coords = abs_db_box_coords / scale # Calculate Jaccard overlap (i.e. Intersection Over Union, IOU) of GT box and default box iou = calc_iou(gt_box_coords, db_box_coords) # If box matches, i.e. IOU threshold met if iou >= IOU_THRESH: # Update y_true_conf to reflect we found a match, and increment match_counter y_true_conf[y_true_idx] = class_labels[i] match_counter += 1 # Calculate normalized box coordinates and update y_true_loc abs_box_center = np.array( [col + 0.5, row + 0.5] ) # absolute coordinates of center of feature map cell abs_gt_box_coords = gt_box_coords * scale # absolute ground truth box coordinates (in feature map grid) norm_box_coords = abs_gt_box_coords - np.concatenate( (abs_box_center, abs_box_center)) y_true_loc[y_true_idx * 4:y_true_idx * 4 + 4] = norm_box_coords y_true_idx += 1 print(y_true_conf) return y_true_conf, y_true_loc, match_counter