Beispiel #1
0
    def generate_detections_one_image(
            self,
            image,
            image_id,
            detection_threshold=DEFAULT_OUTPUT_CONFIDENCE_THRESHOLD):
        """Apply the detector to an image.

        Args:
            image: the PIL Image object
            image_id: a path to identify the image; will be in the `file` field of the output object
            detection_threshold: confidence above which to include the detection proposal

        Returns:
        A dict with the following fields, see https://github.com/microsoft/CameraTraps/tree/siyu/inference_refactor/api/batch_processing#batch-processing-api-output-format
            - image_id (always present)
            - max_detection_conf
            - detections, which is a list of detection objects containing `category`, `conf` and `bbox`
            - failure
        """
        result = {'file': image_id}
        try:
            b_box, b_score, b_class = self._generate_detections_one_image(
                image)

            # our batch size is 1; need to loop the batch dim if supporting batch size > 1
            boxes, scores, classes = b_box[0], b_score[0], b_class[0]

            detections_cur_image = [
            ]  # will be empty for an image with no confident detections
            max_detection_conf = 0.0
            for b, s, c in zip(boxes, scores, classes):
                if s > detection_threshold:
                    detection_entry = {
                        'category':
                        str(
                            int(c)
                        ),  # use string type for the numerical class label, not int
                        'conf':
                        truncate_float(
                            float(s),  # cast to float for json serialization
                            precision=TFDetector.CONF_DIGITS),
                        'bbox':
                        TFDetector.__convert_coords(b)
                    }
                    detections_cur_image.append(detection_entry)
                    if s > max_detection_conf:
                        max_detection_conf = s

            result['max_detection_conf'] = truncate_float(
                float(max_detection_conf), precision=TFDetector.CONF_DIGITS)
            result['detections'] = detections_cur_image

        except Exception as e:
            result['failure'] = TFDetector.FAILURE_TF_INFER
            print('TFDetector: image {} failed during inference: {}'.format(
                image_id, str(e)))

        return result
Beispiel #2
0
def row_to_classification_list(row: Mapping[str, Any],
                               label_names: Sequence[str],
                               contains_preds: bool,
                               label_pos: Optional[str],
                               threshold: float,
                               relative_conf: bool = False
                               ) -> List[Tuple[str, float]]:
    """Given a mapping from label name to output probability, returns a list of
    tuples, (str(label_id), prob), which can be serialized into the Batch API
    output format.

    The list of tuples is returned in sorted order by the predicted probability
    for each label.

    If 'label' is in row and label_pos is not None, then we add
    (label_id + 1_000_000, 1.) to the list. If label_pos='first', we put this at
    the front of the list. Otherwise, we put it at the end.
    """
    contains_label = ('label' in row)
    assert contains_label or contains_preds
    if relative_conf:
        assert contains_label and contains_preds

    result = []
    if contains_preds:
        result = [(str(i), row[label]) for i, label in enumerate(label_names)]
        if relative_conf:
            label_conf = row[row['label']]
            result = [(k, max(v - label_conf, 0)) for k, v in result]

        # filter out confidences below the threshold, and set precision to 4
        result = [
            (k, truncate_float(conf, precision=4))
            for k, conf in result if conf >= threshold
        ]

        # sort from highest to lowest probability
        result = sorted(result, key=lambda x: x[1], reverse=True)

    if contains_label and label_pos is not None:
        label = row['label']
        label_id = label_names.index(label)
        item = (str(label_id + 1_000_000), 1.)
        if label_pos == 'first':
            result = [item] + result
        else:
            result.append(item)
    return result
Beispiel #3
0
 def round_and_make_float(d, precision=4):
     return truncate_float(float(d), precision=precision)
Beispiel #4
0
    det_list = list()
    det_boxes_old_format = detections[im_key]['detection_boxes']
    det_classes = detections[im_key]['detection_classes']
    det_conf = detections[im_key]['detection_scores']
    # Convert boxes from [ymin, xmin, ymax, xmax] format to
    # [x_min, y_min, width_of_box, height_of_box]
    tmp = det_boxes_old_format.T
    det_boxes = np.array([tmp[1], tmp[0], tmp[3] - tmp[1], tmp[2] - tmp[0]]).T
    del tmp

    for det_id in range(len(det_boxes)):

        if det_conf[det_id] > DETECTION_CONF_THRESHOLD:
            det_list.append(
                dict(category=str(det_classes[det_id]),
                     conf=ct_utils.truncate_float(det_conf[det_id].item()),
                     bbox=ct_utils.truncate_float_array(
                         det_boxes[det_id].tolist())))
    im_dict['detections'] = det_list
    if len(im_dict['detections']) > 0:
        im_dict['max_detection_conf'] = ct_utils.truncate_float(
            max(det_conf).item())
    else:
        im_dict['max_detection_conf'] = 0.

    js['images'].append(im_dict)

# Write output json
with open(args.output_json, 'wt') as fi:
    json.dump(js, fi, indent=1)
Beispiel #5
0
def make_cct_embedded(image_db=None, bbox_db=None):
    """
    Takes in path to the COCO Camera Trap format jsons for images (species labels) and/or
    bboxes (animal/human/vehicle) labels and embed the class names and annotations into the image entries.

    Since IndexedJsonDb() can take either a path or a loaded json object as a dict, both
    arguments can be paths or loaded json objects

    Returns:
        an embedded version of the COCO Camera Trap format json database
    """


    # at first a dict of image_id: image_obj with annotations embedded, then it becomes
    # an array of image objects
    docs = {}

    # %% integrate the image DB
    if image_db:
        print('Loading image DB...')
        cct_json_db = IndexedJsonDb(image_db)
        docs = cct_json_db.image_id_to_image  # each image entry is first assigned the image object

        # takes in image entries and species and other annotations in the image DB
        num_images_with_more_than_1_species = 0
        for image_id, annotations in cct_json_db.image_id_to_annotations.items():
            docs[image_id]['annotations'] = {
                'species': []
            }
            if len(annotations) > 1:
                num_images_with_more_than_1_species += 1
            for anno in annotations:
                # convert the species category to explicit string name
                cat_name = cct_json_db.cat_id_to_name[anno['category_id']]
                docs[image_id]['annotations']['species'].append(cat_name)

                # there may be other fields in the annotation object
                for anno_field_name, anno_field_val in anno.items():
                    # these fields should already be gotten from the image object
                    if anno_field_name not in ['category_id', 'id', 'image_id', 'datetime', 'location', 'sequence_level_annotation', 'seq_id', 'seq_num_frames', 'frame_num']:
                        docs[image_id]['annotations'][anno_field_name] = anno_field_val

        print('Number of items from the image DB:', len(docs))
        print('Number of images with more than 1 species: {} ({}% of image DB)'.format(
            num_images_with_more_than_1_species, round(100 * num_images_with_more_than_1_species / len(docs), 2)))

    #%% integrate the bbox DB
    if bbox_db:
        print('Loading bbox DB...')
        cct_bbox_json_db = IndexedJsonDb(bbox_db)

        # add any images that are not in the image DB
        # also add any fields in the image object that are not present already
        num_added = 0
        num_amended = 0
        for image_id, image_obj in cct_bbox_json_db.image_id_to_image.items():
            if image_id not in docs:
                docs[image_id] = image_obj
                num_added += 1

            amended = False
            for field_name, val in image_obj.items():
                if field_name not in docs[image_id]:
                    docs[image_id][field_name] = val
                    amended = True
            if amended:
                num_amended += 1

        print('Number of images added from bbox DB entries: ', num_added)
        print('Number of images amended: ', num_amended)
        print('Number of items in total: ', len(docs))

        # add bbox to the annotations field
        num_more_than_1_bbox = 0

        for image_id, bbox_annotations in cct_bbox_json_db.image_id_to_annotations.items():

            # for any newly added images
            if 'annotations' not in docs[image_id]:
                docs[image_id]['annotations'] = {}

            docs[image_id]['annotations']['bbox'] = []

            if len(bbox_annotations) > 1:
                num_more_than_1_bbox += 1

            for bbox_anno in bbox_annotations:
                item_bbox = {
                    'category': cct_bbox_json_db.cat_id_to_name[bbox_anno['category_id']],
                    # 'bbox_abs': bbox_anno['bbox'],
                }

                if 'width' in docs[image_id]:
                    image_w = docs[image_id]['width']
                    image_h = docs[image_id]['height']
                    x, y, w, h = bbox_anno['bbox']
                    item_bbox['bbox_rel'] = [
                        truncate_float(x / image_w),
                        truncate_float(y / image_h),
                        truncate_float(w / image_w),
                        truncate_float(h / image_h)
                    ]

                docs[image_id]['annotations']['bbox'].append(item_bbox)

            # not keeping height and width
            del docs[image_id]['width']
            del docs[image_id]['height']

        print('Number of images with more than one bounding box: {} ({}% of all entries)'.format(
            num_more_than_1_bbox, 100 * num_more_than_1_bbox / len(docs), 2))
    else:
        print('No bbox DB provided.')

    assert len(docs) > 0, 'No image entries found in the image or bbox DB jsons provided.'

    docs = list(docs.values())
    return docs
Beispiel #6
0
    def generate_detections_one_image(self, img_original, image_id, detection_threshold):
        """Apply the detector to an image.

        Args:
            img_original: the PIL Image object with EXIF rotation taken into account
            image_id: a path to identify the image; will be in the "file" field of the output object
            detection_threshold: confidence above which to include the detection proposal

        Returns:
        A dict with the following fields, see the 'images' key in https://github.com/microsoft/CameraTraps/tree/master/api/batch_processing#batch-processing-api-output-format
            - 'file' (always present)
            - 'max_detection_conf'
            - 'detections', which is a list of detection objects containing keys 'category', 'conf' and 'bbox'
            - 'failure'
        """

        result = {
            'file': image_id
        }
        detections = []
        max_conf = 0.0

        try:
            img_original = np.asarray(img_original)

            # padded resize
            img = letterbox(img_original, new_shape=PTDetector.IMAGE_SIZE,
                                 stride=PTDetector.STRIDE, auto=True)[0]  # JIT requires auto=False
            img = img.transpose((2, 0, 1))  # HWC to CHW; PIL Image is RGB already
            img = np.ascontiguousarray(img)
            img = torch.from_numpy(img)
            img = img.to(self.device)
            img = img.float()
            img /= 255

            if len(img.shape) == 3:  # always true for now, TODO add inference using larger batch size
                img = torch.unsqueeze(img, 0)

            pred: list = self.model(img)[0]

            # NMS
            pred = non_max_suppression(prediction=pred, conf_thres=detection_threshold)

            # format detections/bounding boxes
            gn = torch.tensor(img_original.shape)[[1, 0, 1, 0]]  # normalization gain whwh

            for det in pred:
                if len(det):
                    # Rescale boxes from img_size to im0 size
                    det[:, :4] = scale_coords(img.shape[2:], det[:, :4], img_original.shape).round()

                    for *xyxy, conf, cls in reversed(det):
                        # normalized center-x, center-y, width and height
                        xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()

                        api_box = ct_utils.convert_yolo_to_xywh(xywh)

                        conf = ct_utils.truncate_float(conf.tolist(), precision=CONF_DIGITS)

                        # MegaDetector output format's categories start at 1, but this model's start at 0
                        cls = int(cls.tolist()) + 1
                        if cls not in (1, 2, 3):
                            raise KeyError(f'{cls} is not a valid class.')

                        detections.append({
                            'category': str(cls),
                            'conf': conf,
                            'bbox': ct_utils.truncate_float_array(api_box, precision=COORD_DIGITS)
                        })
                        max_conf = max(max_conf, conf)

        except Exception as e:
            result['failure'] = FAILURE_INFER
            print('PTDetector: image {} failed during inference: {}'.format(image_id, str(e)))

        result['max_detection_conf'] = max_conf
        result['detections'] = detections

        return result
def classify_boxes(classification_graph,
                   json_with_classes,
                   image_dir,
                   confidence_threshold=DEFAULT_CONFIDENCE_THRESHOLD,
                   detection_category_whitelist=DETECTION_CATEGORY_WHITELIST,
                   padding_factor=PADDING_FACTOR,
                   num_annotated_classes=NUM_ANNOTATED_CLASSES):
    """
    Takes a classification model and applies it to all detected boxes with a detection confidence
    larger than confidence_threshold.

    Args:
        classification_graph: frozen graph model that includes the TF-slim preprocessing. i.e. it will be given a cropped
                              images with values in [0,1]
        json_with_classes:    Object created from the json file that is generated by the detection API. However, the
                              field 'classification_categories' is already added. The script assumes 0-based indexing.
        image_dir:            Base directory of the images. All paths in the JSON are relative to this folder
        confidence_threshold: Only classify boxes with a threshold larger than this
        detection_category_whitelist: Only boxes with this detection category will be classified
        padding_factor:       The function will enlarge the bounding boxes by this factor before passing them to the
                              classifier.
        num_annotated_classes: Number of top-scoring class predictions to store in the json

    Returns the updated json object. Classification results are added as field 'classifications' to all elements images/detections
    assuming a 0-based indexing of the classifier output, i.e. output with index 0 has the class key '0'
    """

    # Make sure we have the right json object
    assert 'classification_categories' in json_with_classes.keys()
    assert isinstance(detection_category_whitelist, list)
    assert all([isinstance(x, str) for x in detection_category_whitelist])

    with classification_graph.as_default():

        with tf.Session(graph=classification_graph) as sess:

            # Get input and output tensors of classification model
            image_tensor = classification_graph.get_tensor_by_name('input:0')
            predictions_tensor = classification_graph.get_tensor_by_name(
                'output:0')
            predictions_tensor = tf.squeeze(predictions_tensor, [0])

            # For each image
            nImages = len(json_with_classes['images'])
            for iImage in tqdm.tqdm(list(range(0, nImages))):

                image_description = json_with_classes['images'][iImage]

                # Read image
                try:
                    image_path = image_description['file']
                    if image_dir:
                        image_path = os.path.join(image_dir, image_path)
                    image_data = np.array(
                        PIL.Image.open(image_path).convert("RGB"))
                    # Scale pixel values to [0,1]
                    image_data = image_data / 255
                    image_height, image_width, _ = image_data.shape
                except KeyboardInterrupt as e:
                    raise e
                except:
                    print('Couldn\'t load image {}'.format(image_path))
                    continue

                # For each box
                nDetections = len(image_description['detections'])
                for iBox in range(nDetections):

                    cur_detection = image_description['detections'][iBox]

                    # Skip detections with low confidence
                    if cur_detection['conf'] < confidence_threshold:
                        continue

                    # Skip if detection category is not in whitelist
                    if not cur_detection[
                            'category'] in detection_category_whitelist:
                        continue

                    # Skip if already classified
                    if 'classifications' in cur_detection.keys() and len(
                            cur_detection['classifications']) > 0:
                        continue

                    # Get current box in relative coordinates and format [x_min, y_min, width_of_box, height_of_box]
                    box_orig = cur_detection['bbox']

                    # Convert to [ymin, xmin, ymax, xmax] and store it as 1x4 numpy array so we can
                    # re-use the generic multi-box padding code
                    box_coords = np.array([[
                        box_orig[1], box_orig[0], box_orig[1] + box_orig[3],
                        box_orig[0] + box_orig[2]
                    ]])
                    # Convert normalized coordinates to pixel coordinates
                    box_coords_abs = (box_coords *
                                      np.tile([image_height, image_width],
                                              (1, 2)))

                    # Pad the detected animal to a square box and additionally by PADDING_FACTOR, the result will be in crop_boxes.
                    #
                    # However, we need to make sure that it box coordinates are still within the image.
                    bbox_sizes = np.vstack([
                        box_coords_abs[:, 2] - box_coords_abs[:, 0],
                        box_coords_abs[:, 3] - box_coords_abs[:, 1]
                    ]).T
                    offsets = (padding_factor * np.max(
                        bbox_sizes, axis=1, keepdims=True) - bbox_sizes) / 2
                    crop_boxes = box_coords_abs + np.hstack(
                        [-offsets, offsets])
                    crop_boxes = np.maximum(0, crop_boxes).astype(int)

                    # Get the first (and only) row as our bbox to classify
                    crop_box = crop_boxes[0]

                    # Get the image data for that box
                    cropped_img = image_data[crop_box[0]:crop_box[2],
                                             crop_box[1]:crop_box[3]]

                    # Run inference
                    predictions = sess.run(
                        predictions_tensor,
                        feed_dict={image_tensor: cropped_img})

                    # Add an empty list to the json for our predictions
                    cur_detection['classifications'] = list()

                    # Add the *num_annotated_classes* top scoring classes
                    for class_idx in np.argsort(
                            -predictions)[:num_annotated_classes]:
                        class_conf = ct_utils.truncate_float(
                            predictions[class_idx].item())
                        cur_detection['classifications'].append(
                            ['%i' % class_idx, class_conf])

                # ...for each box

            # ...for each image

        # ...with tf.Session

    # with classification_graph

    return json_with_classes
Beispiel #8
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        'dataset_name',
        type=str,
        help=
        'a short string representing the dataset to be used as a partition key in the DB'
    )
    parser.add_argument(
        '--image_db',
        type=str,
        help='path to the json containing the image DB in CCT format')
    parser.add_argument(
        '--bbox_db',
        type=str,
        help='path to the json containing the bbox DB in CCT format')
    parser.add_argument('--embedded_db',
                        type=str,
                        required=True,
                        help='path to store the resulting json')
    args = parser.parse_args()

    assert len(args.dataset_name) > 0, 'dataset name cannot be an empty string'

    if args.image_db:
        assert os.path.exists(
            args.image_db
        ), 'image_db file path provided does not point to a file'
    if args.bbox_db:
        assert os.path.exists(
            args.bbox_db
        ), 'bbox_db file path provided does not point to a file'

    #%% integrate the image DB

    # at first a dict of image_id: image_obj with annotations embedded,
    # then its values becomes the array of documents that will get uploaded to Cosmos DB
    docs = {}

    if args.image_db:
        print('Loading image DB...')
        cct_json_db = IndexedJsonDb(args.image_db)
        docs = cct_json_db.image_id_to_image  # each image entry is first assigned the image object

        # takes in image entries and species and other annotations in the image DB
        num_images_with_more_than_1_species = 0
        for image_id, annotations in cct_json_db.image_id_to_annotations.items(
        ):
            docs[image_id]['annotations'] = {'species': []}
            if len(annotations) > 1:
                num_images_with_more_than_1_species += 1
            for anno in annotations:
                # convert the species category to explicit string name
                cat_name = cct_json_db.cat_id_to_name[anno['category_id']]
                docs[image_id]['annotations']['species'].append(cat_name)

                # there may be other fields in the annotation object
                for anno_field_name, anno_field_val in anno.items():
                    # these fields should already be gotten from the image object
                    if anno_field_name not in [
                            'category_id', 'id', 'image_id', 'datetime',
                            'location', 'sequence_level_annotation', 'seq_id',
                            'seq_num_frames', 'frame_num'
                    ]:
                        docs[image_id]['annotations'][
                            anno_field_name] = anno_field_val

        print('Number of items from the image DB:', len(docs))
        print(
            'Number of images with more than 1 species: {} ({}% of image DB)'.
            format(
                num_images_with_more_than_1_species,
                round(100 * num_images_with_more_than_1_species / len(docs),
                      2)))

    #%% integrate the bbox DB
    if args.bbox_db:
        print('Loading bbox DB...')
        cct_bbox_json_db = IndexedJsonDb(args.bbox_db)

        # add any images that are not in the image DB
        # also add any fields in the image object that are not present already
        num_added = 0
        num_amended = 0
        for image_id, image_obj in cct_bbox_json_db.image_id_to_image.items():
            if image_id not in docs:
                docs[image_id] = image_obj
                num_added += 1

            amended = False
            for field_name, val in image_obj.items():
                if field_name not in docs[image_id]:
                    docs[image_id][field_name] = val
                    amended = True
            if amended:
                num_amended += 1

        print('Number of images added from bbox DB entries: ', num_added)
        print('Number of images amended: ', num_amended)
        print('Number of items in total: ', len(docs))

        # add bbox to the annotations field
        num_more_than_1_bbox = 0

        for image_id, bbox_annotations in cct_bbox_json_db.image_id_to_annotations.items(
        ):

            # for any newly added images
            if 'annotations' not in docs[image_id]:
                docs[image_id]['annotations'] = {}

            docs[image_id]['annotations']['bbox'] = []

            if len(bbox_annotations) > 1:
                num_more_than_1_bbox += 1

            for bbox_anno in bbox_annotations:
                item_bbox = {
                    'category':
                    cct_bbox_json_db.cat_id_to_name[bbox_anno['category_id']],
                    'bbox_abs':
                    bbox_anno['bbox'],
                }

                if 'width' in docs[image_id]:
                    image_w = docs[image_id]['width']
                    image_h = docs[image_id]['height']
                    x, y, w, h = bbox_anno['bbox']
                    item_bbox['bbox_rel'] = [
                        truncate_float(x / image_w),
                        truncate_float(y / image_h),
                        truncate_float(w / image_w),
                        truncate_float(h / image_h)
                    ]

                docs[image_id]['annotations']['bbox'].append(item_bbox)

        print(
            'Number of images with more than one bounding box: {} ({}% of all entries)'
            .format(num_more_than_1_bbox,
                    100 * num_more_than_1_bbox / len(docs), 2))
    else:
        print('No bbox DB provided.')

    assert len(
        docs
    ) > 0, 'No image entries found in the image or bbox DB jsons provided.'

    docs = list(docs.values())

    #%% processing
    # get rid of any trailing '.JPG' for the id field
    # insert the 'dataset' attribute used as the partition key
    # replace illegal chars (for Cosmos DB) in the id field of the image
    # replace directory separator with tilde ~
    # rename the id field (reserved word) to image_id
    illegal_char_map = {'/': '~', '\\': '~', '?': '__qm__', '#': '__pound__'}

    for i in docs:
        i['id'] = i['id'].split('.JPG')[0].split('.jpg')[0]

        for illegal, replacement in illegal_char_map.items():
            i['id'] = i['id'].replace(illegal, replacement)

        i['dataset'] = args.dataset_name

        i['image_id'] = i['id']
        del i['id']

    #%% some validation
    print('Example items:')
    print()
    print(docs[0])
    print()
    print(docs[-1])
    print()

    num_both_species_bbox = 0
    for item in docs:
        if 'annotations' in item:
            if 'species' in item['annotations'] and 'bbox' in item[
                    'annotations']:
                num_both_species_bbox += 1
    print(
        'Number of images with both species and bbox annotations: {} ({}% of all entries)'
        .format(num_both_species_bbox,
                round(100 * num_both_species_bbox / len(docs), 2)))

    #%% save the embedded json database
    write_json(args.embedded_db, docs)