Beispiel #1
0
def infer_amodal_sup(model,
                     image,
                     inmodal,
                     category,
                     bboxes,
                     use_rgb=True,
                     th=0.5,
                     input_size=None,
                     min_input_size=16,
                     interp='nearest',
                     debug_info=False):
    num = inmodal.shape[0]
    inmodal_patches = []
    amodal_patches = []
    for i in range(num):
        image_patch = utils.crop_padding(image, bboxes[i], pad_value=(0, 0, 0))
        inmodal_patch = utils.crop_padding(inmodal[i],
                                           bboxes[i],
                                           pad_value=(0, ))
        if input_size is not None:
            newsize = input_size
        elif min_input_size > bboxes[i, 2]:
            newsize = min_input_size
        else:
            newsize = None
        if newsize is not None:
            inmodal_patch = resize_mask(inmodal_patch, newsize, interp)
        inmodal_patches.append(inmodal_patch)
        amodal_patches.append(
            net_forward(model, image_patch, inmodal_patch * category[i], None,
                        use_rgb, th))
    if debug_info:
        return inmodal_patches, amodal_patches
    else:
        return amodal_patches
Beispiel #2
0
def infer_amodal(model,
                 image,
                 inmodal,
                 category,
                 bboxes,
                 order_matrix,
                 use_rgb=True,
                 th=0.5,
                 dilate_kernel=0,
                 input_size=None,
                 min_input_size=16,
                 interp='nearest',
                 order_grounded=True,
                 debug_info=False):
    num = inmodal.shape[0]
    inmodal_patches = []
    eraser_patches = []
    amodal_patches = []
    for i in range(num):
        if order_grounded:
            ancestors = get_ancestors(order_matrix, i)
        else:
            ancestors = get_neighbors(order_matrix, i)
        image_patch = utils.crop_padding(image, bboxes[i], pad_value=(0, 0, 0))
        inmodal_patch = utils.crop_padding(inmodal[i],
                                           bboxes[i],
                                           pad_value=(0, ))
        if input_size is not None:  # always
            newsize = input_size
        elif min_input_size > bboxes[i, 2]:
            newsize = min_input_size
        else:
            newsize = None
        if newsize is not None:
            inmodal_patch = resize_mask(inmodal_patch, newsize, interp)

        eraser = (inmodal[ancestors, ...].sum(axis=0) > 0).astype(
            np.uint8)  # union
        eraser = utils.crop_padding(eraser, bboxes[i], pad_value=(0, ))
        if newsize is not None:
            eraser = resize_mask(eraser, newsize, interp)
        if dilate_kernel > 0:
            eraser = cv2.dilate(eraser,
                                np.ones((dilate_kernel, dilate_kernel),
                                        np.uint8),
                                iterations=1)
        # erase inmodal
        inmodal_patch[eraser == 1] = 0
        # gather
        inmodal_patches.append(inmodal_patch)
        eraser_patches.append(eraser)
        amodal_patches.append(
            net_forward(model, image_patch, inmodal_patch * category[i],
                        eraser, use_rgb, th))
    if debug_info:
        return inmodal_patches, eraser_patches, amodal_patches
    else:
        return amodal_patches
Beispiel #3
0
def infer_instseg(model,
                  image,
                  category,
                  bboxes,
                  new_bboxes,
                  input_size,
                  th,
                  rgb=None):
    num = bboxes.shape[0]
    seg_patches = []
    for i in range(num):
        rel_bbox = [
            bboxes[i, 0] - new_bboxes[i, 0], bboxes[i, 1] - new_bboxes[i, 1],
            bboxes[i, 2], bboxes[i, 3]
        ]
        bbox_mask = np.zeros((new_bboxes[i, 3], new_bboxes[i, 2]),
                             dtype=np.uint8)
        bbox_mask[rel_bbox[1]:rel_bbox[1] + rel_bbox[3],
                  rel_bbox[0]:rel_bbox[0] + rel_bbox[2]] = 1
        bbox_mask = cv2.resize(bbox_mask, (input_size, input_size),
                               interpolation=cv2.INTER_NEAREST)
        bbox_mask_tensor = torch.from_numpy(
            bbox_mask.astype(np.float32) *
            category[i]).unsqueeze(0).unsqueeze(0).cuda()
        image_patch = cv2.resize(utils.crop_padding(image,
                                                    new_bboxes[i],
                                                    pad_value=(0, 0, 0)),
                                 (input_size, input_size),
                                 interpolation=cv2.INTER_CUBIC)
        image_tensor = torch.from_numpy(
            image_patch.transpose(
                (2, 0, 1)).astype(np.float32)).unsqueeze(0).cuda()  # 13HW
        with torch.no_grad():
            output = model.model(
                torch.cat([image_tensor, bbox_mask_tensor], dim=1)).detach()
        if output.shape[2] != image_tensor.shape[2]:
            output = nn.functional.interpolate(output,
                                               size=image_tensor.shape[2:4],
                                               mode="bilinear",
                                               align_corners=True)  # 12HW
        output = nn.functional.softmax(output, dim=1)  # 12HW
        if rgb is not None:
            prob = output[0, ...].cpu().numpy()  # 2HW
            rgb_patch = cv2.resize(utils.crop_padding(rgb,
                                                      new_bboxes[i],
                                                      pad_value=(0, 0, 0)),
                                   (input_size, input_size),
                                   interpolation=cv2.INTER_CUBIC)
            prob_crf = np.array(utils.densecrf(prob,
                                               rgb_patch)).reshape(*prob.shape)
            pred = (prob_crf[1, :, :] > th).astype(np.uint8)  # HW
        else:
            pred = (output[0, 1, :, :] > th).cpu().numpy().astype(
                np.uint8)  # HW
        seg_patches.append(pred)
    return seg_patches
Beispiel #4
0
def infer_order_sup(model,
                    image,
                    inmodal,
                    bboxes,
                    input_size=256,
                    use_rgb=True):
    num = inmodal.shape[0]
    order_matrix = np.zeros((num, num), dtype=np.int)
    for i in range(num):
        for j in range(i + 1, num):
            if bordering(inmodal[i], inmodal[j]):
                bbox = utils.combine_bbox(bboxes[(i, j), :])
                centerx = bbox[0] + bbox[2] / 2.
                centery = bbox[1] + bbox[3] / 2.
                size = max([
                    np.sqrt(bbox[2] * bbox[3] * 2.), bbox[2] * 1.1,
                    bbox[3] * 1.1
                ])
                new_bbox = [int(centerx - size / 2.), int(centery - size / 2.), \
                            int(size), int(size)]
                image_patch = cv2.resize(utils.crop_padding(image,
                                                            new_bbox,
                                                            pad_value=(0, 0,
                                                                       0)),
                                         (input_size, input_size),
                                         interpolation=cv2.INTER_CUBIC)
                modal_i_patch = resize_mask(
                    utils.crop_padding(inmodal[i], new_bbox, pad_value=(0, )),
                    input_size, 'nearest')
                modal_j_patch = resize_mask(
                    utils.crop_padding(inmodal[j], new_bbox, pad_value=(0, )),
                    input_size, 'nearest')
                if np.random.rand() > 0.5:  # randomize the input order
                    j_over_i = net_forward_ordernet(model, image_patch,
                                                    modal_j_patch,
                                                    modal_i_patch, use_rgb)
                else:
                    j_over_i = not net_forward_ordernet(
                        model, image_patch, modal_i_patch, modal_j_patch,
                        use_rgb)
                if j_over_i:
                    order_matrix[i, j] = -1
                    order_matrix[j, i] = 1
                else:
                    order_matrix[i, j] = 1
                    order_matrix[j, i] = -1

    return order_matrix
    def __getitem__(self, idx):
        img_fn = self.fns[idx]
        image = np.array(Image.open(img_fn).convert('RGB'))

        # resize w.r.t. short size or long size
        if self.short_size is not None or self.long_size is not None:
            image = utils.image_resize(image,
                                       short_size=self.short_size,
                                       long_size=self.long_size)

        # crop
        centerx = image.shape[1] // 2
        centery = image.shape[0] // 2
        sizex, sizey = self.crop_size
        bbox = [centerx - sizex / 2, centery - sizey / 2, sizex, sizey]
        image = cv2.resize(utils.crop_padding(image, bbox,
                                              pad_value=(0, 0, 0)),
                           self.crop_size,
                           interpolation=cv2.INTER_CUBIC)

        # transform
        image = torch.from_numpy(image.astype(np.float32)).transpose(
            (2, 0, 1))  # 3HW
        image = self.transforms(image)
        return image
    def _get_eraser(self, idx):
        modal, bbox, category, imgfn, _ = self.data_reader.get_instance(idx)
        centerx = bbox[0] + bbox[2] / 2.
        centery = bbox[1] + bbox[3] / 2.
        size = self.config['crop_size']

        # shift & scale aug
        centerx += np.random.uniform(-0.5, 0.5) * size
        centery += np.random.uniform(-0.5, 0.5) * size
        size /= np.random.uniform(0.8, 1.2)

        # crop
        new_bbox = [
            int(centerx - size / 2.),
            int(centery - size / 2.),
            int(size),
            int(size)
        ]
        modal = cv2.resize(
            utils.crop_padding(modal, new_bbox, pad_value=(0, )),
            (self.config['crop_size'], self.config['crop_size']),
            interpolation=cv2.INTER_NEAREST)

        # flip
        if np.random.rand() > 0.5:
            modal = modal[:, ::-1]
        return modal
Beispiel #7
0
def get_eraser(inst_ind, idx, bbox, input_size):
    inst_ind = inst_ind.numpy()
    bbox = bbox.numpy().tolist()
    eraser = cv2.resize(utils.crop_padding(inst_ind, bbox, pad_value=(0, )),
                        (input_size, input_size),
                        interpolation=cv2.INTER_NEAREST)
    eraser = (eraser == idx + 1)
    return torch.from_numpy(eraser.astype(np.float32)).unsqueeze(0)
Beispiel #8
0
def recover_image_patch(patch, bbox, h, w, pad_value, interp='cubic'):
    interp = {'cubic': cv2.INTER_CUBIC, 'linear': cv2.INTER_LINEAR,
              'nearest': cv2.INTER_NEAREST}[interp]
    size = bbox[2]
    patch = cv2.resize(patch, (size, size), interpolation=interp)
    woff, hoff = bbox[0], bbox[1]
    newbbox = [-woff, -hoff, w, h]
    return utils.crop_padding(patch, newbbox, pad_value=pad_value)
Beispiel #9
0
    def _get_inst(self, idx, load_rgb=False, randshift=False):
        modal, bbox, category, imgfn, amodal = self.data_reader.get_instance(idx, with_gt=True)
        centerx = bbox[0] + bbox[2] / 2.
        centery = bbox[1] + bbox[3] / 2.
        size = max([np.sqrt(bbox[2] * bbox[3] * self.config['enlarge_box']), bbox[2] * 1.1, bbox[3] * 1.1])
        if size < 5 or np.all(modal == 0):
            return self._get_inst(
                np.random.choice(len(self)), load_rgb=load_rgb, randshift=randshift)

        # shift & scale aug
        if self.phase  == 'train':
            if randshift:
                centerx += np.random.uniform(*self.config['base_aug']['shift']) * size
                centery += np.random.uniform(*self.config['base_aug']['shift']) * size
            size /= np.random.uniform(*self.config['base_aug']['scale'])

        # crop
        new_bbox = [int(centerx - size / 2.), int(centery - size / 2.), int(size), int(size)]
        modal = cv2.resize(utils.crop_padding(modal, new_bbox, pad_value=(0,)),
            (self.sz, self.sz), interpolation=cv2.INTER_NEAREST)
        amodal = cv2.resize(utils.crop_padding(amodal, new_bbox, pad_value=(0,)),
            (self.sz, self.sz), interpolation=cv2.INTER_NEAREST)

        # flip
        if self.config['base_aug']['flip'] and np.random.rand() > 0.5:
            flip = True
            modal = modal[:, ::-1]
            amodal = amodal[:, ::-1]
        else:
            flip = False

        if load_rgb:
            rgb = np.array(self._load_image(os.path.join(
                self.config['{}_image_root'.format(self.phase)], imgfn))) # uint8
            rgb = cv2.resize(utils.crop_padding(rgb, new_bbox, pad_value=(0,0,0)),
                (self.sz, self.sz), interpolation=cv2.INTER_CUBIC)
            if flip:
                rgb = rgb[:, ::-1, :]
            rgb = torch.from_numpy(rgb.astype(np.float32).transpose((2, 0, 1)) / 255.)
            rgb = self.img_transform(rgb) # CHW

        if load_rgb:
            return modal, amodal, rgb
        else:
            return modal, amodal, None
Beispiel #10
0
    def _get_pair(self, modal, bboxes, idx1, idx2, imgfn, load_rgb=False, randshift=False):
        bbox = utils.combine_bbox(bboxes[(idx1, idx2), :] )
        centerx = bbox[0] + bbox[2] / 2.
        centery = bbox[1] + bbox[3] / 2.
        size = max([np.sqrt(bbox[2] * bbox[3] * 2.), bbox[2] * 1.1, bbox[3] * 1.1])

        # shift & scale aug
        if self.phase  == 'train':
            if randshift:
                centerx += np.random.uniform(*self.config['base_aug']['shift']) * size
                centery += np.random.uniform(*self.config['base_aug']['shift']) * size
            size /= np.random.uniform(*self.config['base_aug']['scale'])

        # crop
        new_bbox = [int(centerx - size / 2.), int(centery - size / 2.), int(size), int(size)]
        modal1 = cv2.resize(utils.crop_padding(modal[idx1], new_bbox, pad_value=(0,)),
            (self.sz, self.sz), interpolation=cv2.INTER_NEAREST)
        modal2 = cv2.resize(utils.crop_padding(modal[idx2], new_bbox, pad_value=(0,)),
            (self.sz, self.sz), interpolation=cv2.INTER_NEAREST)

        # flip
        if self.config['base_aug']['flip'] and np.random.rand() > 0.5:
            flip = True
            modal1 = modal1[:, ::-1]
            modal2 = modal2[:, ::-1]
        else:
            flip = False

        if load_rgb:
            rgb = np.array(self._load_image(os.path.join(
                self.config['{}_image_root'.format(self.phase)], imgfn))) # uint8
            rgb = cv2.resize(utils.crop_padding(rgb, new_bbox, pad_value=(0,0,0)),
                (self.sz, self.sz), interpolation=cv2.INTER_CUBIC)
            if flip:
                rgb = rgb[:, ::-1, :]
            rgb = torch.from_numpy(rgb.astype(np.float32).transpose((2, 0, 1)) / 255.)
            rgb = self.img_transform(rgb) # CHW

        if load_rgb:
            return modal1, modal2, rgb
        else:
            return modal1, modal2, None
Beispiel #11
0
def recover_mask(mask, bbox, h, w, interp):
    size = bbox[2]
    if interp == 'linear':
        mask = (cv2.resize(mask.astype(np.float32), (size, size),
                           interpolation=cv2.INTER_LINEAR) > 0.5).astype(
                               np.uint8)
    else:
        mask = cv2.resize(mask, (size, size), interpolation=cv2.INTER_NEAREST)
    woff, hoff = bbox[0], bbox[1]
    newbbox = [-woff, -hoff, w, h]
    return utils.crop_padding(mask, newbbox, pad_value=(0, ))
Beispiel #12
0
def to_eraser(inst, bbox, newbbox):
    inst = inst.squeeze(0).numpy()
    final_h, final_w = inst.shape[:2]
    w, h = bbox.numpy()[2:]
    inst = cv2.resize(inst, (w, h), interpolation=cv2.INTER_LINEAR)
    offbbox = [
        newbbox[0] - bbox[0], newbbox[1] - bbox[1], newbbox[2], newbbox[3]
    ]
    eraser = utils.crop_padding(inst, offbbox, pad_value=(0, ))
    eraser = cv2.resize(eraser, (final_w, final_h),
                        interpolation=cv2.INTER_NEAREST)
    #eraser = (eraser >= 0.5).astype(inst.dtype)
    return torch.from_numpy(eraser).unsqueeze(0)
Beispiel #13
0
def infer_order(model,
                image,
                inmodal,
                category,
                bboxes,
                use_rgb=True,
                th=0.5,
                dilate_kernel=0,
                input_size=None,
                min_input_size=32,
                interp='nearest',
                debug_info=False):
    '''
    image: HW3, inmodal: NHW, category: N, bboxes: N4
    '''
    deal_with_fullcover = False
    num = inmodal.shape[0]
    order_matrix = np.zeros((num, num), dtype=np.int)
    ind = []
    if deal_with_fullcover:
        fullcover_inds = []
    for i in range(num):
        for j in range(i + 1, num):
            if bordering(inmodal[i], inmodal[j]):
                ind.append([i, j])
                ind.append([j, i])
            if deal_with_fullcover:
                fullcover = fullcovering(inmodal[i], inmodal[j], bboxes[i],
                                         bboxes[j])
                if fullcover == 1:
                    fullcover_inds.append([i, j])
                elif fullcover == 2:
                    fullcover_inds.append([j, i])
    pairnum = len(ind)
    if pairnum == 0:
        return order_matrix
    ind = np.array(ind)
    eraser_patches = []
    inmodal_patches = []
    amodal_patches = []
    ratios = []
    for i in range(pairnum):
        tid = ind[i, 0]
        eid = ind[i, 1]
        image_patch = utils.crop_padding(image,
                                         bboxes[tid],
                                         pad_value=(0, 0, 0))
        inmodal_patch = utils.crop_padding(inmodal[tid],
                                           bboxes[tid],
                                           pad_value=(0, ))
        if input_size is not None:
            newsize = input_size
        elif min_input_size > bboxes[tid, 2]:
            newsize = min_input_size
        else:
            newsize = None
        if newsize is not None:
            inmodal_patch = resize_mask(inmodal_patch, newsize, interp)
        eraser = utils.crop_padding(inmodal[eid], bboxes[tid], pad_value=(0, ))
        if newsize is not None:
            eraser = resize_mask(eraser, newsize, interp)
        if dilate_kernel > 0:
            eraser = cv2.dilate(eraser,
                                np.ones((dilate_kernel, dilate_kernel),
                                        np.uint8),
                                iterations=1)
        # erase inmodal
        inmodal_patch[eraser == 1] = 0
        # gather
        inmodal_patches.append(inmodal_patch)
        eraser_patches.append(eraser)
        amodal_patches.append(
            net_forward(model, image_patch, inmodal_patch * category[tid],
                        eraser, use_rgb, th))
        ratios.append(1. if newsize is None else bboxes[tid, 2] /
                      float(newsize))

    occ_value_matrix = np.zeros((num, num), dtype=np.float32)
    for i, idx in enumerate(ind):
        occ_value_matrix[idx[0],
                         idx[1]] = (((amodal_patches[i] > inmodal_patches[i]) &
                                     (eraser_patches[i] == 1)).sum() *
                                    (ratios[i]**2))
    order_matrix[occ_value_matrix > occ_value_matrix.transpose()] = -1
    order_matrix[occ_value_matrix < occ_value_matrix.transpose()] = 1
    order_matrix[(occ_value_matrix == 0)
                 & (occ_value_matrix == 0).transpose()] = 0
    if deal_with_fullcover:
        for fc in fullcover_inds:
            assert order_matrix[fc[0], fc[1]] == 0
            order_matrix[fc[0], fc[1]] = -1
            order_matrix[fc[1], fc[0]] = 1
    if debug_info:
        return order_matrix, ind, inmodal_patches, eraser_patches, amodal_patches
    else:
        return order_matrix
Beispiel #14
0
 def moveObject(self, image, move_x, move_y):
     bbox = [-move_x, -move_y, image.shape[1], image.shape[0]]
     return utils.crop_padding(image, bbox, pad_value=(0, 0, 0, 0))
Beispiel #15
0
 def objectSave(self):
     obj = self.objects[self.this_obj - 1]
     crop_obj = utils.crop_padding(obj,
                                   utils.mask_to_bbox(obj[:, :, 3]),
                                   pad_value=(0, 0, 0, 0))
     self.window().objectSaveAs(crop_obj)