def test_calc_iou(self):
        a1 = torch.zeros(3, 4)
        a1[:, :2] = 0
        a1[:, 2:] = 100
        b1 = torch.zeros(4, 4)
        b1[:, :2] = 0
        b1[:, 2:] = 100
        c1 = torch.zeros(3, 4)
        c1[:, :] = 1

        a2 = torch.zeros(3, 4)
        a2[:, :2] = 0
        a2[:, 2:] = 99
        b2 = torch.zeros(4, 4)
        b2[:, :2] = 0
        b2[:, 2:] = 49
        c2 = torch.zeros(3, 4)
        c2[:, :] = 0.25

        a3 = torch.zeros(3, 4)
        a3[:, :2] = 0
        a3[:, 2:] = 99
        b3 = torch.zeros(4, 4)
        b3[:, :2] = 0
        b3[:, 2:] = 199
        c3 = torch.zeros(3, 4)
        c3[:, :] = 0.25

        a4 = torch.zeros(3, 4)
        a4[:, :2] = 50
        a4[:, 2:] = 149
        b4 = torch.zeros(4, 4)
        b4[:, 0] = 100
        b4[:, 1] = 0
        b4[:, 2:] = 199
        c4 = torch.zeros(3, 4)
        c4[:, :] = 0.2

        a5 = torch.zeros(3, 4)
        a5[:1, :2] = 0
        a5[:1, 2:] = 99
        b5 = torch.zeros(4, 4)
        b5[:1, :2] = 0
        b5[:1, 2:] = 99
        c5 = torch.zeros(3, 4)
        c5[:, :1] = 1

        a5[1:, :2] = 0
        a5[1:, 2:] = 99
        b5[1:, :2] = 0
        b5[1:, 2:] = 49
        c5[:, 1:] = 0.25

        self.assertTrue(calc_iou(a1, b1).equal(c1))
        self.assertTrue(calc_iou(a2, b2).equal(c2))
        self.assertTrue(calc_iou(a3, b3).equal(c3))
        self.assertTrue(calc_iou(a4, b4).equal(c4))
        self.assertTrue(calc_iou(a5, b5).equal(c5))
Esempio n. 2
0
def nms(y_pred_conf, y_pred_loc, prob):
    """
    Non-Maximum Suppression(NMS)

    :param y_pred_conf:  Class predictions, numpy array of shape( num_feature_map_cell * num_default_boxes
    :param y_pred_loc:  Bounding box coordinates, numpy array of shape(num_feature_map_cell * num_default_boxes*4)
    :param prob: class probabilities, numpy array of shape( num_feature_map_cell * num_default_boxes)
    :return: a list of box coordinates post-NMS, numpy array of boxes, with shape(num_boxes, 6 )  [x1, y1, x2, y2, class,probability]
    """

    # Keep track of boxes for each class
    class_boxes = {}  # class -> [(x1, y1, x2, y2, prob), (...), ...]
    with open('signnames.csv', 'r') as f:
        for line in f:
            cls, _ = line.split(',')
            class_boxes[float(cls)] = []

    # Go through all possible boxes and perform class-based greedy NMS (greedy based on class prediction confidence)
    y_idx = 0

    for fm_size in FM_SIZES:
        fm_h, fm_w = fm_size   # get feature map height and width
        for row in range(fm_h):
            for col in range(fm_w):
                for db in DEFAULT_BOXES:
                    if prob[y_idx] > CONF_THRESH and y_pred_conf[y_idx] > 0:
                        xc, yc = col + 0.5, row + 0.5
                        center_coords = np.array([xc, yc, xc, yc])
                        abs_box_coords = center_coords + y_pred_loc[y_idx * 4: y_idx * 4 + 4]

                        # Calculate predicted box coordinates in actual image
                        scale = np.array([IMG_W / fm_w, IMG_H / fm_h, IMG_W / fm_w, IMG_H / fm_h])
                        box_coords = abs_box_coords * scale
                        box_coords = [int(round(x)) for x in box_coords]

                        # Compare this box to all previous boxes of this class
                        cls = y_pred_conf[y_idx]
                        cls_prob = prob[y_idx]
                        box = (*box_coords, cls, cls_prob)
                        if len(class_boxes[cls]) == 0:
                            class_boxes[cls].append(box)
                        else:
                            suppressed = False  # did this box suppress other box(es)?
                            overlapped = False  # did this box overlap with other box(es)?
                            for other_box in class_boxes[cls]:
                                iou = calc_iou(box[:4], other_box[:4])
                                if iou > NMS_IOU_THRESH:
                                    overlapped = True
                                    # If current box has higher confidence than other box
                                    if box[5] > other_box[5]:
                                        class_boxes[cls].remove(other_box)
                                        suppressed = True
                            if suppressed or not overlapped:
                                class_boxes[cls].append(box)

                    y_idx += 1
                    # Gather all the pruned boxes and return them
        boxes = []
        for cls in class_boxes.keys():
            for class_box in class_boxes[cls]:
                boxes.append(class_box)
        boxes = np.array(boxes)

        return boxes
Esempio n. 3
0
    def _generate_targets(self, proposals, gt_classes, gt_bboxes, gt_masks, mask_size=(28, 28)):
        """Generate Mask R-CNN targets, and corresponding rois.

        Args:
            proposals(Tensor): [N, a, (idx, x1, y1, x2, y2)], proposals from RPN, idx is batch
                size index. 
            gt_classes(Tensor): [N, b], ground truth class ids.
            gt_bboxes(Tensor): [N, b, (x1, y1, x2, y2)], ground truth bounding boxes.
            gt_masks(Tensor): [(N, b, 1, H, W], ground truth masks, H and W for origin image height 
                and width.  

        Returns: 
            sampled_rois(Tensor): [N, c, (idx, x1, y1, x2, y2)], proposals after sampled to feed 
                RoIAlign. 
            cls_targets(Variable): [(Nxc)], train targets for classification.
            bbox_targets(Variable): [(Nxc), (dx, dy, dw, dh)], train targets for bounding box 
                regression, see R-CNN paper for meaning details.  
            mask_targets(Variable): [(Nxc), 28, 28], train targets for mask prediction.

        Notes: N: batch_size, a: number of proposals from FRN, b: number of ground truth objects,
            c: number of rois to train.

        """
        rois_sample_size = int(self.config['TRAIN']['ROIS_SAMPLE_SIZE'])
        rois_pos_ratio = float(self.config['TRAIN']['ROIS_POS_RATIO'])
        rois_pos_thresh = float(self.config['TRAIN']['ROIS_POS_THRESH'])
        rois_neg_thresh = float(self.config['TRAIN']['ROIS_NEG_THRESH'])

        batch_size = proposals.size(0)
        # Todo: add support to use batch_size >= 1
        assert batch_size == 1, "batch_size >= 2 will add support later."

        # get rid of batch size dim, need change when support batch_size >= 1.
        proposals = proposals.squeeze(0)
        gt_classes = gt_classes.squeeze(0)
        gt_bboxes = gt_bboxes.squeeze(0)
        gt_masks = gt_masks.squeeze(0)
        iou = calc_iou(proposals[:, 1:], gt_bboxes[:, :])
        max_iou, max_iou_idx_gt = torch.max(iou, dim=1)
        pos_index_prop = torch.nonzero(max_iou >= rois_pos_thresh).view(-1)
        neg_index_prop = torch.nonzero(max_iou < rois_neg_thresh).view(-1)

        # if pos_index_prop or neg_index_prop is empty, return an background.
        if pos_index_prop.numel() == 0 or neg_index_prop.numel() == 0:
            cls_targets = gt_classes.new([0])
            bbox_targets = MaskRCNN._get_bbox_targets(proposals[:1, 1:],
                                                      proposals[:1, 1:])
            mask_targets = gt_masks.new(1, mask_size[0], mask_size[1]).zero_()
            sampled_rois = proposals[:1, :]
            sampled_rois = sampled_rois.view(batch_size, -1, 5)
            cls_targets = Variable(cls_targets, requires_grad=False)
            bbox_targets = Variable(bbox_targets, requires_grad=False)
            mask_targets = Variable(mask_targets, requires_grad=False)

            return sampled_rois, cls_targets, bbox_targets, mask_targets

        pos_index_gt = max_iou_idx_gt[pos_index_prop]
        assert pos_index_prop.size() == pos_index_gt.size()

        sample_size_pos = int(rois_pos_ratio * rois_sample_size)

        pos_num = pos_index_prop.size(0)
        neg_num = neg_index_prop.size(0)
        sample_size_pos = min(sample_size_pos, pos_num)
        # keep the ratio of positive and negative rois, if there are not enough positives.
        sample_size_neg = int((sample_size_pos / rois_pos_ratio) * (1 - rois_pos_ratio) + 1)
        sample_size_neg = min(sample_size_neg, neg_num)

        sample_index_pos = random.sample(range(pos_num), sample_size_pos)
        sample_index_neg = random.sample(range(neg_num), sample_size_neg)

        pos_index_sampled_prop = pos_index_prop[sample_index_pos]
        neg_index_sampled_prop = neg_index_prop[sample_index_neg]
        pos_index_sampled_gt = pos_index_gt[sample_index_pos]

        index_proposal = torch.cat([pos_index_sampled_prop, neg_index_sampled_prop])
        sampled_rois = proposals[index_proposal, :]

        # targets for classification, positive rois use gt_class id, negative use 0 as background.
        cls_targets_pos = gt_classes[pos_index_sampled_gt]
        cls_targets_neg = gt_classes.new([0 for _ in range(sample_size_neg)])
        cls_targets = torch.cat([cls_targets_pos, cls_targets_neg])

        # bbox regression target define on define on positive proposals.
        bboxes = proposals[:, 1:]
        bbox_targets = MaskRCNN._get_bbox_targets(bboxes[pos_index_sampled_prop, :],
                                                  gt_bboxes[pos_index_sampled_gt, :])
        # mask targets define on positive proposals.
        mask_targets = MaskRCNN._get_mask_targets(bboxes[pos_index_sampled_prop, :],
                                                  gt_masks[pos_index_sampled_gt, :, :], mask_size)
        sampled_rois = sampled_rois.view(batch_size, -1, 5)

        return sampled_rois, Variable(cls_targets), Variable(bbox_targets), Variable(mask_targets)
Esempio n. 4
0
def main():
    global args, logger, writer
    args = get_parser().parse_args()
    logger_train = get_logger()
    random.seed(20170624)
    logger_train.info((args))

    os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu
    utils.mkdir(args.snapshot_dir, args.sample_dir)

    # setting up model
    model = AGSSVOS(init_atn=args.init_atn, freeze=args.freeze).cuda()
    model = torch.nn.DataParallel(model).cuda()
    model.train()

    for m in model.module.Encoder.modules():
        if isinstance(m, nn.BatchNorm2d):
            m.eval()
            if args.set_bn_no_update:
                for p in m.parameters():
                    p.requires_grad = False

    if args.restore != None:
        assert os.path.isfile(args.restore), "no restore file found at %s" % (args.restore)
        logger_train.info("loading from %s" % (args.restore))

        state = model.state_dict()
        checkpoint = torch.load(args.restore)
        if args.finetune:
            checkpoint = checkpoint['seg']
        checkpoint = {k: v for k, v in checkpoint.items() if k in state}
        state.update(checkpoint)
        model.load_state_dict(state)

        del checkpoint
        torch.cuda.empty_cache()

    if args.finetune:
        flow_infer = Inference_flow(args, train_flow=True, resume=args.restore)
    else:
        flow_infer = Inference_flow(args, train_flow=True)

    params = []
    scale_lr = []
    assert args.lr_atn != args.lr_after_atn
    for key, value in dict(model.module.named_parameters()).items():
        if args.lr_atn and ('atn' in key or 'pred2' in key or 'RF2' in key) and not args.finetune:
            flag = True
        elif args.lr_after_atn and ('atn' in key or 'pred2' in key or 'RF2' in key) and not args.finetune:
            flag = True
        else:
            flag = False
        if value.requires_grad:
            if flag:
                scale_lr.append(True)
                print('lrx10', key)
            else:
                scale_lr.append(False)
            params += [{'params':[value],'lr':args.lr*10 if flag else args.lr , 'weight_decay': 4e-5}]
    optimizer = torch.optim.Adam(params, lr=args.lr, weight_decay=4e-5)
    spec_vid = None
    spec_obj_ind = None

    trainloader = data.DataLoader(
            Trainset(root_data=args.root_data, json_meta_list=args.meta_list,
                         sample_size=args.sample_size, test_mode=False, spec_vid=spec_vid, spec_obj_ind=spec_obj_ind,
                         step=1, fix_size=False, half_size=False, random_ref=args.random_ref, random_skip=args.random_skip),
            batch_size=args.batch_size, shuffle=True, num_workers=1, pin_memory=True)

    # training
    tot_iter = len(trainloader)
    logger_train.info("Total iteration per epoch is %d" % (tot_iter))
    tot_time = []
    loss_set = []
    iou_set = []
    optimizer.zero_grad()

    for epoch in range(args.start_epoch, args.epoch):
        for i_iter, batch in enumerate(trainloader):
            start_time = timeit.default_timer()

            img, lab, ori_img = batch

            img = img[0].cuda().float()
            lab = lab[0].cuda().float()
            ori_img = ori_img.numpy()
            # img KT3HW, lab KTHW, ori_img, KTHW3#

            ### It may be better to move this augmentation into the dataset preprocessing ##
            if random.uniform(0,1)>0.5 and args.random_crop:
                ### random resize ###
                coord = [1e4,1e4,0,0]
                lab_agno = lab.sum(0)
                val_cnt = 0
                for i in range(lab_agno.shape[0]):
                    idx = torch.nonzero(lab_agno[i]>0)
                    if idx.shape[0] == 0:
                        continue
                    val_cnt += 1
                    h0 = idx[:,0].min().item()
                    w0 = idx[:,1].min().item()
                    h1 = idx[:,0].max().item()
                    w1 = idx[:,1].max().item()
                    coord[0] = min(coord[0], h0)
                    coord[1] = min(coord[1], w0)
                    coord[2] = max(coord[2], h1)
                    coord[3] = max(coord[3], w1)
                if val_cnt < 2:
                    logger_train.info(('The number of frames that have label is less than 2, continue..'))
                    continue
                ori_shape = lab.shape[-2:]
                rand_coord = [0]*4

                if random.uniform(0,1) > 0.3:
                    scale = random.uniform(0,1)
                else:
                    scale = 1
                rand_coord[0] = coord[0] * scale
                rand_coord[1] = coord[1] * scale
                rand_coord[2] = (ori_shape[0]-coord[2]-1)*(1-scale)+coord[2]+1
                rand_coord[3] = (ori_shape[1]-coord[3]-1)*(1-scale)+coord[3]+1
                for j in range(4):
                    rand_coord[j] = int(rand_coord[j])

                old_img = img.clone()
                old_lab = lab.clone()
                ori_img = torch.FloatTensor(ori_img).cuda().transpose(-1,-2).transpose(-2,-3)
                old_ori_img = ori_img.clone()

                old_lab = old_lab[:,:,rand_coord[0]:rand_coord[2]+1,rand_coord[1]:rand_coord[3]+1]
                lab = F.upsample(old_lab, ori_shape, mode='bilinear', align_corners=True)
                lab = (lab>0.5).float()
                for i in range(img.shape[0]):
                    img_obj = old_img[i,:,:,rand_coord[0]:rand_coord[2]+1,rand_coord[1]:rand_coord[3]+1]
                    img[i] = F.upsample(img_obj, ori_shape, mode='bilinear', align_corners=True)
                img_obj = old_ori_img[0,:,:,rand_coord[0]:rand_coord[2]+1,rand_coord[1]:rand_coord[3]+1]
                ori_img[0] = F.upsample(img_obj, ori_shape, mode='bilinear', align_corners=True)
                ori_img = ori_img.transpose(-2,-3).transpose(-1,-2).cpu().numpy().astype(np.uint8)

                ### end of random resize ###

            if lab.shape[1] == 1:
                logger_train.info('lab.shape[1](vid_len) == 1, continue..')
                continue

            lr = utils.lr_poly(args.lr, i_iter, tot_iter, epoch, args.epoch)
            utils.adjust_optim_all(optimizer, lr, scale_lr)
            preds = []
            prev_labs = []
            preds.append(lab[:,0:1].contiguous())
            preds.append(lab[:,1:2].contiguous())
            merge_preds_ref = lab[:,0:1].contiguous().sum(0)
            for i in range(2, img.shape[1], 1):
                ms = model.forward(img[:,0], merge_preds_ref)
                flow = flow_infer.infer(ori_img[0,i], ori_img[0,i-1])
                prev_lab = utils.flow_warp_tensor(preds[i-1], flow)

                prev_labs.append(prev_lab.detach())
                merge_preds = prev_lab.max(0)[0]

                output, _ = model.forward(img[:,i], merge_preds, prev_lab.squeeze(1), ref=ms)

                cur_lab = lab[:,i].contiguous()

                if args.loss_iou_maxmin:
                    cur_loss = utils.loss_calc_iou(output, cur_lab.unsqueeze(1), unify=False, optim_hard=False,
                                                      square=False) # try this
                else:
                    cur_loss = utils.loss_calc_iou_v2(output, cur_lab.unsqueeze(1), unify=False, optim_hard=False,
                                                      square=False) # try this

                loss_set.append(cur_loss.item())

                iou = utils.calc_iou(output.data, cur_lab.long(), merge=False)
                iou_set.append(np.mean(iou))

                optimizer.zero_grad()
                cur_loss.backward()
                optimizer.step()

                if args.iou_thr_per_obj:
                    output = output.detach()
                    new_output = torch.zeros_like(output).cuda().float()
                    for j in range(new_output.shape[0]):
                        if iou[j] > 0.5:
                            new_output[j] = output[j]
                        else:
                            new_output[j] = lab[j:j+1,i]
                    new_output = new_output.contiguous()
                    preds.append(new_output.detach())
                else:
                    if np.mean(iou) > 0.5:
                        preds.append(output.detach())
                    else:
                        preds.append(cur_lab.unsqueeze(1).detach())

            end_time = timeit.default_timer()
            tot_time.append(end_time - start_time)

            if i_iter % 200 == 0:
                logger_train.info('show at %s' % args.sample_dir)
                try:
                    preds = torch.cat(preds, dim=1)
                    prev_labs = torch.cat(prev_labs, dim=1)
                except Exception as e:
                    print(e)
                    print('Ignore.. Continue..')
                    continue
                if args.show_img:
                    show(img.data.cpu().numpy(), lab.data.cpu().numpy(), preds.data.cpu().numpy().astype(np.float),
                        prev_labs.data.cpu().numpy().astype(np.float32))

            if i_iter % 20 == 0:
                run_time = np.mean(tot_time)
                rem_time = utils.calc_remain_time(run_time, i_iter, tot_iter, epoch, args.epoch)
                logger_train.info('iter = %d of %d in epoch = %d of %d, remain_time = %s' %
                            (i_iter, tot_iter, epoch, args.epoch, rem_time))
                tot_time = []
                logger_train.info('lr = %f, loss = %f, iou = %f' % (lr, np.mean(loss_set), np.mean(iou_set)))
                loss_set = []
                iou_set = []

        if epoch % (args.epoch//5) == 0 or epoch == args.epoch - 1:
            path = os.path.join(args.snapshot_dir, 'model_' + str(epoch) + '.pth')
            logger_train.info('save model at %s' % path)
            torch.save({'seg':model.state_dict(), 'flow':flow_infer.model.state_dict()}, path)
Esempio n. 5
0
def find_gt_boxes(data_raw, image_file):
    """
    Given (global) feature map sizes, and single training example, find all default boxes that exceed Jaccard overlap threshold
    :param data_raw:
    :param image_file:
    :return: y_true array that flags the matching default boxes with class ID (-1 means nothing there)
    """

    # pre-process ground true data
    data = data_raw[image_file]

    class_labels = []
    box_coords = []  # relative coordinates
    for obj in data:
        class_label = obj['class']
        class_labels.append(class_label)

        # calculate relative coordinates
        # (x1, y1, x2, y2), where 1 denotes upper left corner, 2 denotes lower right corner

        abs_box_coords = obj['box_coords']
        scale = np.array([IMG_W, IMG_H, IMG_W, IMG_H])
        box_coord = np.array(abs_box_coords) / scale
        box_coords.append(box_coord)

    y_true_len = 0
    for fm_size in FM_SIZES:
        y_true_len += fm_size[0] * fm_size[1] * NUM_DEFAULT_BOXES
    y_true_conf = np.zeros(y_true_len)
    y_true_loc = np.zeros(y_true_len * 4)

    # For each GT box, for each feature map, for each feature map cell, for each default box:
    # 1) Calculate the Jaccard overlap (IOU) and annotate the class label
    # 2) Count how many box matches we got
    # 3) If we got a match, calculate normalized box coordinates and updte y_true_loc

    match_counter = 0
    for i, gt_box_coords in enumerate(box_coords):
        y_true_idx = 0
        # for fm_idx, fm_size in enumerate(FM_SIZES):
        for fm_size in FM_SIZES:
            fm_h, fm_w = fm_size  # feature map height and width
            for row in range(fm_h):
                for col in range(fm_w):
                    for db in DEFAULT_BOXES:
                        # Calculate relative box coordinates for this default box
                        x1_offset, y1_offset, x2_offset, y2_offset = db
                        abs_db_box_coords = np.array([
                            max(0, col + x1_offset),
                            max(0, row + y1_offset),
                            min(fm_w, col + 1 + x2_offset),
                            min(fm_h, row + 1 + y2_offset)
                        ])
                        scale = np.array([fm_w, fm_h, fm_w, fm_h])
                        db_box_coords = abs_db_box_coords / scale

                        # Calculate Jaccard overlap (i.e. Intersection Over Union, IOU) of GT box and default box
                        iou = calc_iou(gt_box_coords, db_box_coords)

                        # If box matches, i.e. IOU threshold met
                        if iou >= IOU_THRESH:
                            # Update y_true_conf to reflect we found a match, and increment match_counter
                            y_true_conf[y_true_idx] = class_labels[i]
                            match_counter += 1

                            # Calculate normalized box coordinates and update y_true_loc
                            abs_box_center = np.array(
                                [col + 0.5, row + 0.5]
                            )  # absolute coordinates of center of feature map cell
                            abs_gt_box_coords = gt_box_coords * scale  # absolute ground truth box coordinates (in feature map grid)
                            norm_box_coords = abs_gt_box_coords - np.concatenate(
                                (abs_box_center, abs_box_center))
                            y_true_loc[y_true_idx * 4:y_true_idx * 4 +
                                       4] = norm_box_coords

                        y_true_idx += 1
    print(y_true_conf)
    return y_true_conf, y_true_loc, match_counter