def infer_amodal_sup(model, image, inmodal, category, bboxes, use_rgb=True, th=0.5, input_size=None, min_input_size=16, interp='nearest', debug_info=False): num = inmodal.shape[0] inmodal_patches = [] amodal_patches = [] for i in range(num): image_patch = utils.crop_padding(image, bboxes[i], pad_value=(0, 0, 0)) inmodal_patch = utils.crop_padding(inmodal[i], bboxes[i], pad_value=(0, )) if input_size is not None: newsize = input_size elif min_input_size > bboxes[i, 2]: newsize = min_input_size else: newsize = None if newsize is not None: inmodal_patch = resize_mask(inmodal_patch, newsize, interp) inmodal_patches.append(inmodal_patch) amodal_patches.append( net_forward(model, image_patch, inmodal_patch * category[i], None, use_rgb, th)) if debug_info: return inmodal_patches, amodal_patches else: return amodal_patches
def infer_amodal(model, image, inmodal, category, bboxes, order_matrix, use_rgb=True, th=0.5, dilate_kernel=0, input_size=None, min_input_size=16, interp='nearest', order_grounded=True, debug_info=False): num = inmodal.shape[0] inmodal_patches = [] eraser_patches = [] amodal_patches = [] for i in range(num): if order_grounded: ancestors = get_ancestors(order_matrix, i) else: ancestors = get_neighbors(order_matrix, i) image_patch = utils.crop_padding(image, bboxes[i], pad_value=(0, 0, 0)) inmodal_patch = utils.crop_padding(inmodal[i], bboxes[i], pad_value=(0, )) if input_size is not None: # always newsize = input_size elif min_input_size > bboxes[i, 2]: newsize = min_input_size else: newsize = None if newsize is not None: inmodal_patch = resize_mask(inmodal_patch, newsize, interp) eraser = (inmodal[ancestors, ...].sum(axis=0) > 0).astype( np.uint8) # union eraser = utils.crop_padding(eraser, bboxes[i], pad_value=(0, )) if newsize is not None: eraser = resize_mask(eraser, newsize, interp) if dilate_kernel > 0: eraser = cv2.dilate(eraser, np.ones((dilate_kernel, dilate_kernel), np.uint8), iterations=1) # erase inmodal inmodal_patch[eraser == 1] = 0 # gather inmodal_patches.append(inmodal_patch) eraser_patches.append(eraser) amodal_patches.append( net_forward(model, image_patch, inmodal_patch * category[i], eraser, use_rgb, th)) if debug_info: return inmodal_patches, eraser_patches, amodal_patches else: return amodal_patches
def infer_instseg(model, image, category, bboxes, new_bboxes, input_size, th, rgb=None): num = bboxes.shape[0] seg_patches = [] for i in range(num): rel_bbox = [ bboxes[i, 0] - new_bboxes[i, 0], bboxes[i, 1] - new_bboxes[i, 1], bboxes[i, 2], bboxes[i, 3] ] bbox_mask = np.zeros((new_bboxes[i, 3], new_bboxes[i, 2]), dtype=np.uint8) bbox_mask[rel_bbox[1]:rel_bbox[1] + rel_bbox[3], rel_bbox[0]:rel_bbox[0] + rel_bbox[2]] = 1 bbox_mask = cv2.resize(bbox_mask, (input_size, input_size), interpolation=cv2.INTER_NEAREST) bbox_mask_tensor = torch.from_numpy( bbox_mask.astype(np.float32) * category[i]).unsqueeze(0).unsqueeze(0).cuda() image_patch = cv2.resize(utils.crop_padding(image, new_bboxes[i], pad_value=(0, 0, 0)), (input_size, input_size), interpolation=cv2.INTER_CUBIC) image_tensor = torch.from_numpy( image_patch.transpose( (2, 0, 1)).astype(np.float32)).unsqueeze(0).cuda() # 13HW with torch.no_grad(): output = model.model( torch.cat([image_tensor, bbox_mask_tensor], dim=1)).detach() if output.shape[2] != image_tensor.shape[2]: output = nn.functional.interpolate(output, size=image_tensor.shape[2:4], mode="bilinear", align_corners=True) # 12HW output = nn.functional.softmax(output, dim=1) # 12HW if rgb is not None: prob = output[0, ...].cpu().numpy() # 2HW rgb_patch = cv2.resize(utils.crop_padding(rgb, new_bboxes[i], pad_value=(0, 0, 0)), (input_size, input_size), interpolation=cv2.INTER_CUBIC) prob_crf = np.array(utils.densecrf(prob, rgb_patch)).reshape(*prob.shape) pred = (prob_crf[1, :, :] > th).astype(np.uint8) # HW else: pred = (output[0, 1, :, :] > th).cpu().numpy().astype( np.uint8) # HW seg_patches.append(pred) return seg_patches
def infer_order_sup(model, image, inmodal, bboxes, input_size=256, use_rgb=True): num = inmodal.shape[0] order_matrix = np.zeros((num, num), dtype=np.int) for i in range(num): for j in range(i + 1, num): if bordering(inmodal[i], inmodal[j]): bbox = utils.combine_bbox(bboxes[(i, j), :]) centerx = bbox[0] + bbox[2] / 2. centery = bbox[1] + bbox[3] / 2. size = max([ np.sqrt(bbox[2] * bbox[3] * 2.), bbox[2] * 1.1, bbox[3] * 1.1 ]) new_bbox = [int(centerx - size / 2.), int(centery - size / 2.), \ int(size), int(size)] image_patch = cv2.resize(utils.crop_padding(image, new_bbox, pad_value=(0, 0, 0)), (input_size, input_size), interpolation=cv2.INTER_CUBIC) modal_i_patch = resize_mask( utils.crop_padding(inmodal[i], new_bbox, pad_value=(0, )), input_size, 'nearest') modal_j_patch = resize_mask( utils.crop_padding(inmodal[j], new_bbox, pad_value=(0, )), input_size, 'nearest') if np.random.rand() > 0.5: # randomize the input order j_over_i = net_forward_ordernet(model, image_patch, modal_j_patch, modal_i_patch, use_rgb) else: j_over_i = not net_forward_ordernet( model, image_patch, modal_i_patch, modal_j_patch, use_rgb) if j_over_i: order_matrix[i, j] = -1 order_matrix[j, i] = 1 else: order_matrix[i, j] = 1 order_matrix[j, i] = -1 return order_matrix
def __getitem__(self, idx): img_fn = self.fns[idx] image = np.array(Image.open(img_fn).convert('RGB')) # resize w.r.t. short size or long size if self.short_size is not None or self.long_size is not None: image = utils.image_resize(image, short_size=self.short_size, long_size=self.long_size) # crop centerx = image.shape[1] // 2 centery = image.shape[0] // 2 sizex, sizey = self.crop_size bbox = [centerx - sizex / 2, centery - sizey / 2, sizex, sizey] image = cv2.resize(utils.crop_padding(image, bbox, pad_value=(0, 0, 0)), self.crop_size, interpolation=cv2.INTER_CUBIC) # transform image = torch.from_numpy(image.astype(np.float32)).transpose( (2, 0, 1)) # 3HW image = self.transforms(image) return image
def _get_eraser(self, idx): modal, bbox, category, imgfn, _ = self.data_reader.get_instance(idx) centerx = bbox[0] + bbox[2] / 2. centery = bbox[1] + bbox[3] / 2. size = self.config['crop_size'] # shift & scale aug centerx += np.random.uniform(-0.5, 0.5) * size centery += np.random.uniform(-0.5, 0.5) * size size /= np.random.uniform(0.8, 1.2) # crop new_bbox = [ int(centerx - size / 2.), int(centery - size / 2.), int(size), int(size) ] modal = cv2.resize( utils.crop_padding(modal, new_bbox, pad_value=(0, )), (self.config['crop_size'], self.config['crop_size']), interpolation=cv2.INTER_NEAREST) # flip if np.random.rand() > 0.5: modal = modal[:, ::-1] return modal
def get_eraser(inst_ind, idx, bbox, input_size): inst_ind = inst_ind.numpy() bbox = bbox.numpy().tolist() eraser = cv2.resize(utils.crop_padding(inst_ind, bbox, pad_value=(0, )), (input_size, input_size), interpolation=cv2.INTER_NEAREST) eraser = (eraser == idx + 1) return torch.from_numpy(eraser.astype(np.float32)).unsqueeze(0)
def recover_image_patch(patch, bbox, h, w, pad_value, interp='cubic'): interp = {'cubic': cv2.INTER_CUBIC, 'linear': cv2.INTER_LINEAR, 'nearest': cv2.INTER_NEAREST}[interp] size = bbox[2] patch = cv2.resize(patch, (size, size), interpolation=interp) woff, hoff = bbox[0], bbox[1] newbbox = [-woff, -hoff, w, h] return utils.crop_padding(patch, newbbox, pad_value=pad_value)
def _get_inst(self, idx, load_rgb=False, randshift=False): modal, bbox, category, imgfn, amodal = self.data_reader.get_instance(idx, with_gt=True) centerx = bbox[0] + bbox[2] / 2. centery = bbox[1] + bbox[3] / 2. size = max([np.sqrt(bbox[2] * bbox[3] * self.config['enlarge_box']), bbox[2] * 1.1, bbox[3] * 1.1]) if size < 5 or np.all(modal == 0): return self._get_inst( np.random.choice(len(self)), load_rgb=load_rgb, randshift=randshift) # shift & scale aug if self.phase == 'train': if randshift: centerx += np.random.uniform(*self.config['base_aug']['shift']) * size centery += np.random.uniform(*self.config['base_aug']['shift']) * size size /= np.random.uniform(*self.config['base_aug']['scale']) # crop new_bbox = [int(centerx - size / 2.), int(centery - size / 2.), int(size), int(size)] modal = cv2.resize(utils.crop_padding(modal, new_bbox, pad_value=(0,)), (self.sz, self.sz), interpolation=cv2.INTER_NEAREST) amodal = cv2.resize(utils.crop_padding(amodal, new_bbox, pad_value=(0,)), (self.sz, self.sz), interpolation=cv2.INTER_NEAREST) # flip if self.config['base_aug']['flip'] and np.random.rand() > 0.5: flip = True modal = modal[:, ::-1] amodal = amodal[:, ::-1] else: flip = False if load_rgb: rgb = np.array(self._load_image(os.path.join( self.config['{}_image_root'.format(self.phase)], imgfn))) # uint8 rgb = cv2.resize(utils.crop_padding(rgb, new_bbox, pad_value=(0,0,0)), (self.sz, self.sz), interpolation=cv2.INTER_CUBIC) if flip: rgb = rgb[:, ::-1, :] rgb = torch.from_numpy(rgb.astype(np.float32).transpose((2, 0, 1)) / 255.) rgb = self.img_transform(rgb) # CHW if load_rgb: return modal, amodal, rgb else: return modal, amodal, None
def _get_pair(self, modal, bboxes, idx1, idx2, imgfn, load_rgb=False, randshift=False): bbox = utils.combine_bbox(bboxes[(idx1, idx2), :] ) centerx = bbox[0] + bbox[2] / 2. centery = bbox[1] + bbox[3] / 2. size = max([np.sqrt(bbox[2] * bbox[3] * 2.), bbox[2] * 1.1, bbox[3] * 1.1]) # shift & scale aug if self.phase == 'train': if randshift: centerx += np.random.uniform(*self.config['base_aug']['shift']) * size centery += np.random.uniform(*self.config['base_aug']['shift']) * size size /= np.random.uniform(*self.config['base_aug']['scale']) # crop new_bbox = [int(centerx - size / 2.), int(centery - size / 2.), int(size), int(size)] modal1 = cv2.resize(utils.crop_padding(modal[idx1], new_bbox, pad_value=(0,)), (self.sz, self.sz), interpolation=cv2.INTER_NEAREST) modal2 = cv2.resize(utils.crop_padding(modal[idx2], new_bbox, pad_value=(0,)), (self.sz, self.sz), interpolation=cv2.INTER_NEAREST) # flip if self.config['base_aug']['flip'] and np.random.rand() > 0.5: flip = True modal1 = modal1[:, ::-1] modal2 = modal2[:, ::-1] else: flip = False if load_rgb: rgb = np.array(self._load_image(os.path.join( self.config['{}_image_root'.format(self.phase)], imgfn))) # uint8 rgb = cv2.resize(utils.crop_padding(rgb, new_bbox, pad_value=(0,0,0)), (self.sz, self.sz), interpolation=cv2.INTER_CUBIC) if flip: rgb = rgb[:, ::-1, :] rgb = torch.from_numpy(rgb.astype(np.float32).transpose((2, 0, 1)) / 255.) rgb = self.img_transform(rgb) # CHW if load_rgb: return modal1, modal2, rgb else: return modal1, modal2, None
def recover_mask(mask, bbox, h, w, interp): size = bbox[2] if interp == 'linear': mask = (cv2.resize(mask.astype(np.float32), (size, size), interpolation=cv2.INTER_LINEAR) > 0.5).astype( np.uint8) else: mask = cv2.resize(mask, (size, size), interpolation=cv2.INTER_NEAREST) woff, hoff = bbox[0], bbox[1] newbbox = [-woff, -hoff, w, h] return utils.crop_padding(mask, newbbox, pad_value=(0, ))
def to_eraser(inst, bbox, newbbox): inst = inst.squeeze(0).numpy() final_h, final_w = inst.shape[:2] w, h = bbox.numpy()[2:] inst = cv2.resize(inst, (w, h), interpolation=cv2.INTER_LINEAR) offbbox = [ newbbox[0] - bbox[0], newbbox[1] - bbox[1], newbbox[2], newbbox[3] ] eraser = utils.crop_padding(inst, offbbox, pad_value=(0, )) eraser = cv2.resize(eraser, (final_w, final_h), interpolation=cv2.INTER_NEAREST) #eraser = (eraser >= 0.5).astype(inst.dtype) return torch.from_numpy(eraser).unsqueeze(0)
def infer_order(model, image, inmodal, category, bboxes, use_rgb=True, th=0.5, dilate_kernel=0, input_size=None, min_input_size=32, interp='nearest', debug_info=False): ''' image: HW3, inmodal: NHW, category: N, bboxes: N4 ''' deal_with_fullcover = False num = inmodal.shape[0] order_matrix = np.zeros((num, num), dtype=np.int) ind = [] if deal_with_fullcover: fullcover_inds = [] for i in range(num): for j in range(i + 1, num): if bordering(inmodal[i], inmodal[j]): ind.append([i, j]) ind.append([j, i]) if deal_with_fullcover: fullcover = fullcovering(inmodal[i], inmodal[j], bboxes[i], bboxes[j]) if fullcover == 1: fullcover_inds.append([i, j]) elif fullcover == 2: fullcover_inds.append([j, i]) pairnum = len(ind) if pairnum == 0: return order_matrix ind = np.array(ind) eraser_patches = [] inmodal_patches = [] amodal_patches = [] ratios = [] for i in range(pairnum): tid = ind[i, 0] eid = ind[i, 1] image_patch = utils.crop_padding(image, bboxes[tid], pad_value=(0, 0, 0)) inmodal_patch = utils.crop_padding(inmodal[tid], bboxes[tid], pad_value=(0, )) if input_size is not None: newsize = input_size elif min_input_size > bboxes[tid, 2]: newsize = min_input_size else: newsize = None if newsize is not None: inmodal_patch = resize_mask(inmodal_patch, newsize, interp) eraser = utils.crop_padding(inmodal[eid], bboxes[tid], pad_value=(0, )) if newsize is not None: eraser = resize_mask(eraser, newsize, interp) if dilate_kernel > 0: eraser = cv2.dilate(eraser, np.ones((dilate_kernel, dilate_kernel), np.uint8), iterations=1) # erase inmodal inmodal_patch[eraser == 1] = 0 # gather inmodal_patches.append(inmodal_patch) eraser_patches.append(eraser) amodal_patches.append( net_forward(model, image_patch, inmodal_patch * category[tid], eraser, use_rgb, th)) ratios.append(1. if newsize is None else bboxes[tid, 2] / float(newsize)) occ_value_matrix = np.zeros((num, num), dtype=np.float32) for i, idx in enumerate(ind): occ_value_matrix[idx[0], idx[1]] = (((amodal_patches[i] > inmodal_patches[i]) & (eraser_patches[i] == 1)).sum() * (ratios[i]**2)) order_matrix[occ_value_matrix > occ_value_matrix.transpose()] = -1 order_matrix[occ_value_matrix < occ_value_matrix.transpose()] = 1 order_matrix[(occ_value_matrix == 0) & (occ_value_matrix == 0).transpose()] = 0 if deal_with_fullcover: for fc in fullcover_inds: assert order_matrix[fc[0], fc[1]] == 0 order_matrix[fc[0], fc[1]] = -1 order_matrix[fc[1], fc[0]] = 1 if debug_info: return order_matrix, ind, inmodal_patches, eraser_patches, amodal_patches else: return order_matrix
def moveObject(self, image, move_x, move_y): bbox = [-move_x, -move_y, image.shape[1], image.shape[0]] return utils.crop_padding(image, bbox, pad_value=(0, 0, 0, 0))
def objectSave(self): obj = self.objects[self.this_obj - 1] crop_obj = utils.crop_padding(obj, utils.mask_to_bbox(obj[:, :, 3]), pad_value=(0, 0, 0, 0)) self.window().objectSaveAs(crop_obj)