def __call__(self, sample): instances_info = { obj_id: obj_info for obj_id, obj_info in sample['instances_info'].items() if not obj_info['ignore'] } obj_id, obj_info = random.choice(list(instances_info.items())) sample['instances_info'] = {obj_id: obj_info} obj_mask = sample['instances_mask'] == obj_id crop_height, crop_width = self.crop_size obj_bbox = get_bbox_from_mask(obj_mask) obj_bbox = fit_bbox_ratio(obj_bbox, crop_height / crop_width) expand_k = np.random.uniform(self.min_expand, self.max_expand) obj_bbox = expand_bbox(obj_bbox, expand_ratio=expand_k) obj_bbox = clamp_bbox(obj_bbox, 0, sample['image'].shape[0] - 1, 0, sample['image'].shape[1] - 1) sample['image'] = sample['image'][obj_bbox[0]:obj_bbox[1] + 1, obj_bbox[2]:obj_bbox[3] + 1, :] sample['instances_mask'] = sample['instances_mask'][ obj_bbox[0]:obj_bbox[1] + 1, obj_bbox[2]:obj_bbox[3] + 1] sample['image'] = cv2.resize(sample['image'], (crop_width, crop_height)) sample['instances_mask'] = cv2.resize(sample['instances_mask'], (crop_width, crop_height), interpolation=cv2.INTER_NEAREST) return sample
def get_object_roi(pred_mask, clicks_list, expansion_ratio, min_crop_size): pred_mask = pred_mask.copy() for click in clicks_list: if click.is_positive: pred_mask[int(click.coords[0]), int(click.coords[1])] = 1 bbox = get_bbox_from_mask(pred_mask) bbox = expand_bbox(bbox, expansion_ratio, min_crop_size) h, w = pred_mask.shape[0], pred_mask.shape[1] bbox = clamp_bbox(bbox, 0, h - 1, 0, w - 1) return bbox
def get_params_dependent_on_targets(self, params): instances = params['mask'] is_mask_layer = len(instances.shape) > 2 candidates = [] if is_mask_layer: for layer_indx in range(instances.shape[2]): labels, areas = get_labels_with_sizes(instances[:, :, layer_indx]) candidates.extend([(layer_indx, obj_id) for obj_id, area in zip(labels, areas) if area > self.min_area]) else: labels, areas = get_labels_with_sizes(instances) candidates = [ obj_id for obj_id, area in zip(labels, areas) if area > self.min_area ] selected_object = None bbox = None if candidates: selected_object = random.choice(candidates) if is_mask_layer: layer_indx, mask_id = selected_object obj_mask = instances[:, :, layer_indx] == mask_id else: obj_mask = instances == selected_object bbox = get_bbox_from_mask(obj_mask) if isinstance(self.expansion_ratio, tuple): expansion_ratio = random.uniform(*self.expansion_ratio) else: expansion_ratio = self.expansion_ratio bbox = expand_bbox(bbox, expansion_ratio, self.min_crop_size) bbox = self._jitter_bbox(bbox) bbox = clamp_bbox(bbox, 0, obj_mask.shape[0] - 1, 0, obj_mask.shape[1] - 1) return {'selected_object': selected_object, 'bbox': bbox}
def remove_buggy_masks(self, index, instances_mask): if self._buggy_mask_thresh > 0.0: buggy_image_objects = self._buggy_objects.get(index, None) if buggy_image_objects is None: buggy_image_objects = [] instances_ids, _ = get_labels_with_sizes(instances_mask) for obj_id in instances_ids: obj_mask = instances_mask == obj_id mask_area = obj_mask.sum() bbox = get_bbox_from_mask(obj_mask) bbox_area = (bbox[1] - bbox[0] + 1) * (bbox[3] - bbox[2] + 1) obj_area_ratio = mask_area / bbox_area if obj_area_ratio < self._buggy_mask_thresh: buggy_image_objects.append(obj_id) self._buggy_objects[index] = buggy_image_objects for obj_id in buggy_image_objects: instances_mask[instances_mask == obj_id] = 0 return instances_mask
def create_annotations(lvis_path: Path, coco_path: Path, dataset_split='train', min_object_area=80): lvis_dataset = LvisDataset(lvis_path, split=dataset_split) lvis_samples = lvis_dataset.dataset_samples lvis_annotations = lvis_dataset.annotations coco_dataset = CocoDataset(coco_path, split=dataset_split + '2017') coco_lvis_mapping = [] lvis_images = { x['coco_url'].split('/')[-1].split('.')[0]: lvis_indx for lvis_indx, x in enumerate(lvis_samples) } for indx, coco_sample in enumerate(coco_dataset.dataset_samples): lvis_indx = lvis_images.get(coco_sample['file_name'].split('.')[0], None) if lvis_indx is not None: coco_lvis_mapping.append((indx, lvis_indx)) output_masks_path = lvis_path / dataset_split / 'masks' output_masks_path.mkdir(parents=True, exist_ok=True) hlvis_annotation = dict() for coco_indx, lvis_indx in tqdm(coco_lvis_mapping): coco_sample = get_coco_sample(coco_dataset, coco_indx) lvis_info = lvis_samples[lvis_indx] lvis_annotation = lvis_annotations[lvis_info['id']] empty_mask = np.zeros((lvis_info['height'], lvis_info['width'])) image_name = lvis_info['coco_url'].split('/')[-1].split('.')[0] lvis_masks = [] lvis_bboxes = [] for obj_annotation in lvis_annotation: obj_mask = lvis_dataset.get_mask_from_polygon( obj_annotation, empty_mask) obj_mask = obj_mask == 1 if obj_mask.sum() >= min_object_area: lvis_masks.append(obj_mask) lvis_bboxes.append(get_bbox_from_mask(obj_mask)) coco_bboxes = [] coco_masks = [] for inst_id in coco_sample['instances_info'].keys(): obj_mask = coco_sample['instances_mask'] == inst_id if obj_mask.sum() >= min_object_area: coco_masks.append(obj_mask) coco_bboxes.append(get_bbox_from_mask(obj_mask)) masks = [] for coco_j, coco_bbox in enumerate(coco_bboxes): for lvis_i, lvis_bbox in enumerate(lvis_bboxes): if get_bbox_iou(lvis_bbox, coco_bbox) > 0.70 and \ get_iou(lvis_masks[lvis_i], coco_masks[coco_j]) > 0.70: break else: masks.append(coco_masks[coco_j]) for ti, (lvis_mask, lvis_bbox) in enumerate(zip(lvis_masks, lvis_bboxes)): for tj_mask, tj_bbox in zip(lvis_masks[ti + 1:], lvis_bboxes[ti + 1:]): bbox_iou = get_bbox_iou(lvis_bbox, tj_bbox) if bbox_iou > 0.7 and get_iou(lvis_mask, tj_mask) > 0.85: break else: masks.append(lvis_mask) masks_meta = [(get_bbox_from_mask(x), x.sum()) for x in masks] if not masks: continue hierarchy = get_masks_hierarchy(masks, masks_meta) for obj_id, obj_info in list(hierarchy.items()): if obj_info['parent'] is None and len(obj_info['children']) == 0: hierarchy[obj_id] = None merged_mask = np.max(masks, axis=0) num_instance_masks = len(masks) for obj_id in coco_sample['semantic_info'].keys(): obj_mask = coco_sample['semantic_map'] == obj_id obj_mask = np.logical_and(obj_mask, np.logical_not(merged_mask)) if obj_mask.sum() > 500: masks.append(obj_mask) hlvis_annotation[image_name] = { 'num_instance_masks': num_instance_masks, 'hierarchy': hierarchy } with open(output_masks_path / f'{image_name}.pickle', 'wb') as f: pickle.dump(encode_masks(masks), f) with open(lvis_path / dataset_split / 'hannotation.pickle', 'wb') as f: pickle.dump(hlvis_annotation, f, protocol=pickle.HIGHEST_PROTOCOL)