def add_annotations_to_sequences(annotations_dir: str, temp_sequences_dir: str,
                                 sequences_dir: str):
    """
    Extract the bounding box annotations from the COCO JSONs for all datasets labeled in this round.

    Args:
        annotations_dir: Path to directory with the annotations in COCO JSONs at the root level.
        temp_sequences_dir: Path to a flat directory of JSONs ending in '_temp.json' which are
            MegaDB sequences without the bounding box annotations.
        sequences_dir: Path to a directory to output corresponding bounding box-included sequences
            in MegaDB format.

    Returns:
        None. JSON files will be written to sequences_dir.
    """
    assert os.path.exists(annotations_dir), \
        f'annotations_dir {annotations_dir} does not exist'
    assert os.path.isdir(annotations_dir), \
        f'annotations_dir {annotations_dir} is not a directory'
    assert os.path.exists(temp_sequences_dir), \
        f'temp_sequences_dir {temp_sequences_dir} does not exist'
    assert os.path.isdir(temp_sequences_dir), \
        f'temp_sequences_dir {temp_sequences_dir} is not a directory'
    os.makedirs(sequences_dir, exist_ok=True)

    temp_megadb_files = path_utils.recursive_file_list(temp_sequences_dir)
    temp_megadb_files = [i for i in temp_megadb_files if i.endswith('.json')]
    print(f'{len(temp_megadb_files)} temporary MegaDB dataset files found.')

    annotation_files = path_utils.recursive_file_list(annotations_dir)
    annotation_files = [i for i in annotation_files if i.endswith('.json')]
    print(
        f'{len(annotation_files)} annotation_files found. Extracting annotations...'
    )

    # dataset name : (seq_id, frame_num) : [bbox, bbox]
    # where bbox is a dict with str 'category' and list 'bbox'
    all_image_bbox: Dict[str, Dict[Tuple[str, int], list]]
    all_image_bbox = defaultdict(lambda: {})

    for p in tqdm(annotation_files):
        incoming_coco = IndexedJsonDb(p)
        assert bbox_categories_str == json.dumps(incoming_coco.db['categories']), \
            f'Incoming COCO JSON has a different category mapping! {p}'

        # iterate over image_id_to_image rather than image_id_to_annotations so we include
        # the confirmed empty images
        for image_id, image_entry in incoming_coco.image_id_to_image.items():
            image_file_name = image_entry['file_name']
            # The file_name field in the incoming json looks like
            # alka_squirrels.seq2020_05_07_25C.frame119221.jpg
            dataset_name, seq_id, frame_num = file_name_to_parts(
                image_file_name)
            bbox_field = []  # empty means this image is confirmed empty

            annotations = incoming_coco.image_id_to_annotations.get(
                image_id, [])
            for coco_anno in annotations:
                if coco_anno['category_id'] == 5:
                    assert len(coco_anno['bbox']) == 0, f'{coco_anno}'

                    # there seems to be a bug in the annotations where sometimes there's a
                    # non-empty label along with a label of category_id 5
                    # ignore the empty label (they seem to be actually non-empty)
                    continue

                assert coco_anno['category_id'] is not None, f'{p} {coco_anno}'

                bbox_field.append({
                    'category':
                    bbox_cat_map[coco_anno['category_id']],
                    'bbox':
                    ct_utils.truncate_float_array(coco_anno['bbox'],
                                                  precision=4)
                })
            all_image_bbox[dataset_name][(seq_id, frame_num)] = bbox_field

    print('\nAdding bounding boxes to the MegaDB dataset files...')
    for p in temp_megadb_files:
        basename = os.path.basename(p)
        dataset_name = basename.split('_temp.')[0] if basename.endswith('_temp.json') \
            else basename.split('.json')[0]
        print(f'Adding to dataset {dataset_name}')
        dataset_image_bbox = all_image_bbox.get(dataset_name, None)
        if dataset_image_bbox is None:
            print('Skipping, no annotations found for this dataset\n')
            continue

        with open(p) as f:
            sequences = json.load(f)

        num_images_updated = 0
        for seq in tqdm(sequences):
            assert seq['dataset'] == dataset_name
            seq_id = seq['seq_id']
            for im in seq['images']:
                frame_num = im.get('frame_num', 1)
                bbox_field = dataset_image_bbox.get((seq_id, frame_num), None)
                if bbox_field is not None:  # empty list also evaluates to False
                    im['bbox'] = bbox_field
                    num_images_updated += 1
        print(
            f'Dataset {dataset_name} had {num_images_updated} images updated\n'
        )

        with open(os.path.join(sequences_dir, f'{dataset_name}.json'),
                  'w',
                  encoding='utf-8') as f:
            json.dump(sequences, f, indent=1, ensure_ascii=False)
Example #2
0
    det_list = list()
    det_boxes_old_format = detections[im_key]['detection_boxes']
    det_classes = detections[im_key]['detection_classes']
    det_conf = detections[im_key]['detection_scores']
    # Convert boxes from [ymin, xmin, ymax, xmax] format to
    # [x_min, y_min, width_of_box, height_of_box]
    tmp = det_boxes_old_format.T
    det_boxes = np.array([tmp[1], tmp[0], tmp[3] - tmp[1], tmp[2] - tmp[0]]).T
    del tmp

    for det_id in range(len(det_boxes)):

        if det_conf[det_id] > DETECTION_CONF_THRESHOLD:
            det_list.append(
                dict(category=str(det_classes[det_id]),
                     conf=ct_utils.truncate_float(det_conf[det_id].item()),
                     bbox=ct_utils.truncate_float_array(
                         det_boxes[det_id].tolist())))
    im_dict['detections'] = det_list
    if len(im_dict['detections']) > 0:
        im_dict['max_detection_conf'] = ct_utils.truncate_float(
            max(det_conf).item())
    else:
        im_dict['max_detection_conf'] = 0.

    js['images'].append(im_dict)

# Write output json
with open(args.output_json, 'wt') as fi:
    json.dump(js, fi, indent=1)
def extract_annotations(annotation_path, dataset_name):
    """
    Extract the bounding box annotations from the pseudo-jsons iMerit sends us for a single dataset.

    Args:
        annotation_path: a list or string; the list of annotation entries, a path to a directory
            containing pseudo-jsons with the annotations or a path to a single pseudo-json (cannot have sub-directories)
        dataset_name: string used to identify this dataset when the images were sent for annotation.
            Note that this needs to be the same as what's in the annotation files, if different
            from what's in the `dataset` table

    Returns:
        image_filename_to_bboxes: a dict of image filename to the bbox items ready to
        insert to MegaDB sequences' image objects.
    """
    content = []
    if type(annotation_path) == str:
        assert os.path.exists(
            annotation_path
        ), 'annotation_paths provided does not exist as a dir or file'

        if os.path.isdir(annotation_path):
            # annotation_path points to a directory containing annotation pseudo-jsons
            for file_name in os.listdir(annotation_path):
                if not file_name.endswith('.json'):
                    continue
                p = os.path.join(annotation_path, file_name)
                with open(p) as f:
                    c = f.readlines()
                    content.extend(c)
            print('{} files found in directory at annotation_path'.format(
                len(os.listdir(annotation_path))))
        else:
            # annotation_path points to a single annotation pseudo-json
            with open(annotation_path) as f:
                content = f.readlines()

    else:
        assert type(
            annotation_path
        ) == list, 'annotation_paths provided is not a string (path) or list'

    print('Number of annotation entries found: {}'.format(len(content)))

    image_filename_to_bboxes = defaultdict(list)
    num_bboxes = 0
    num_bboxes_skipped = 0

    # each row in this pseudo-json is a COCO formatted entry for an image sequence
    for row in tqdm(content):
        entry = json.loads(row)

        entry_categories = entry.get('categories', [])
        assert json.dumps(bbox_categories,
                          sort_keys=True) == json.dumps(entry_categories,
                                                        sort_keys=True)

        entry_annotations = entry.get('annotations', [])
        entry_images = entry.get('images', [])

        images_non_empty = set()
        for anno in entry_annotations:
            assert 'image_id' in anno
            assert 'bbox' in anno
            assert len(anno['bbox']) == 4
            assert 'category_id' in anno
            assert type(anno['category_id']) == int

            # iMerit calls this field image_id; some of these are URL encoded
            image_ref = urllib.parse.unquote(anno['image_id'])

            # dataset = image_ref.split('dataset')[1].split('.')[0]  # prior to batch 10
            dataset = image_ref.split('+')[0]
            if dataset != dataset_name:
                num_bboxes_skipped += 1
                continue

            # lower-case all image filenames !
            # image_filename = image_ref.split('.img')[1].lower()  # prior to batch 10
            image_filename = image_ref.split('+')[1]

            bbox_coords = anno['bbox']  # [x_rel, y_rel, w_rel, h_rel]
            bbox_coords = ct_utils.truncate_float_array(bbox_coords,
                                                        precision=4)

            bbox_entry = {
                'category': bbox_cat_map[anno['category_id']],
                'bbox': bbox_coords
            }

            image_filename_to_bboxes[image_filename].append(bbox_entry)
            num_bboxes += 1
            images_non_empty.add(
                image_ref)  # remember that this image has at least one bbox

        for im in entry_images:
            image_ref = urllib.parse.unquote(im['file_name'])

            #dataset = image_ref.split('dataset')[1].split('.')[0]  # prior to batch 10
            dataset = image_ref.split('+')[0]
            if dataset != dataset_name:
                continue

            #image_filename = image_ref.split('.img')[1].lower()  # prior to batch 10
            image_filename = image_ref.split('+')[1]
            if image_ref not in images_non_empty:
                image_filename_to_bboxes[image_filename] = [
                ]  # to indicate "confirmed emptiness"

    print(
        '{} boxes on {} images were in the annotation file(s). {} boxes skipped because they are not for the requested dataset'
        .format(num_bboxes, len(image_filename_to_bboxes), num_bboxes_skipped))

    # how many boxes of each category?
    print('\nCategory counts for the bboxes:')
    category_count = defaultdict(int)
    for filename, bboxes in image_filename_to_bboxes.items():
        for b in bboxes:
            category_count[b['category']] += 1
    for category, count in sorted(category_count.items()):
        print('{}: {}'.format(category, count))

    return image_filename_to_bboxes
Example #4
0
    def generate_detections_one_image(self, img_original, image_id, detection_threshold):
        """Apply the detector to an image.

        Args:
            img_original: the PIL Image object with EXIF rotation taken into account
            image_id: a path to identify the image; will be in the "file" field of the output object
            detection_threshold: confidence above which to include the detection proposal

        Returns:
        A dict with the following fields, see the 'images' key in https://github.com/microsoft/CameraTraps/tree/master/api/batch_processing#batch-processing-api-output-format
            - 'file' (always present)
            - 'max_detection_conf'
            - 'detections', which is a list of detection objects containing keys 'category', 'conf' and 'bbox'
            - 'failure'
        """

        result = {
            'file': image_id
        }
        detections = []
        max_conf = 0.0

        try:
            img_original = np.asarray(img_original)

            # padded resize
            img = letterbox(img_original, new_shape=PTDetector.IMAGE_SIZE,
                                 stride=PTDetector.STRIDE, auto=True)[0]  # JIT requires auto=False
            img = img.transpose((2, 0, 1))  # HWC to CHW; PIL Image is RGB already
            img = np.ascontiguousarray(img)
            img = torch.from_numpy(img)
            img = img.to(self.device)
            img = img.float()
            img /= 255

            if len(img.shape) == 3:  # always true for now, TODO add inference using larger batch size
                img = torch.unsqueeze(img, 0)

            pred: list = self.model(img)[0]

            # NMS
            pred = non_max_suppression(prediction=pred, conf_thres=detection_threshold)

            # format detections/bounding boxes
            gn = torch.tensor(img_original.shape)[[1, 0, 1, 0]]  # normalization gain whwh

            for det in pred:
                if len(det):
                    # Rescale boxes from img_size to im0 size
                    det[:, :4] = scale_coords(img.shape[2:], det[:, :4], img_original.shape).round()

                    for *xyxy, conf, cls in reversed(det):
                        # normalized center-x, center-y, width and height
                        xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()

                        api_box = ct_utils.convert_yolo_to_xywh(xywh)

                        conf = ct_utils.truncate_float(conf.tolist(), precision=CONF_DIGITS)

                        # MegaDetector output format's categories start at 1, but this model's start at 0
                        cls = int(cls.tolist()) + 1
                        if cls not in (1, 2, 3):
                            raise KeyError(f'{cls} is not a valid class.')

                        detections.append({
                            'category': str(cls),
                            'conf': conf,
                            'bbox': ct_utils.truncate_float_array(api_box, precision=COORD_DIGITS)
                        })
                        max_conf = max(max_conf, conf)

        except Exception as e:
            result['failure'] = FAILURE_INFER
            print('PTDetector: image {} failed during inference: {}'.format(image_id, str(e)))

        result['max_detection_conf'] = max_conf
        result['detections'] = detections

        return result