def generate_x(self, cfg: SampleCfg):
        img = scipy.misc.imread(dataset.image_crop_fn(cfg.fish_classification.video_id, cfg.fish_classification.frame))

        crop = utils.get_image_crop(full_rgb=img, rect=cfg.rect,
                                    scale_rect_x=cfg.scale_rect_x, scale_rect_y=cfg.scale_rect_y,
                                    shift_x_ratio=cfg.shift_x_ratio, shift_y_ratio=cfg.shift_y_ratio,
                                    angle=cfg.angle, out_size=INPUT_ROWS)

        crop = crop.astype('float32')
        if cfg.saturation != 0.5:
            crop = img_augmentation.saturation(crop, variance=0.2, r=cfg.saturation)

        if cfg.contrast != 0.5:
            crop = img_augmentation.contrast(crop, variance=0.25, r=cfg.contrast)

        if cfg.brightness != 0.5:
            crop = img_augmentation.brightness(crop, variance=0.3, r=cfg.brightness)

        if cfg.hflip:
            crop = img_augmentation.horizontal_flip(crop)

        if cfg.vflip:
            crop = img_augmentation.vertical_flip(crop)

        if cfg.blurred_by_downscaling != 1:
            crop = img_augmentation.blurred_by_downscaling(crop, 1.0 / cfg.blurred_by_downscaling)
        return crop * 255.0
Ejemplo n.º 2
0
def create_image_lmdb(image_data, split="train", shape=(3, 224, 224), data_root=None, crop_size=224):
    """
    Create the LMDB for the images.
    :param image_data: List of JSON objects with details about the images.
    :param split: Specify which data split this LMDB is for.
    :param shape: The shape of the image with which to store in the LMDB.
    :param data_root: The root folder where to store the LMDB file.
    :param crop_size: The final size of the cropped image.
    :return:
    """
    print("Creating {0} Image LMDB".format(split))
    channels, width, height = shape
    filename = os.path.join(data_root, "image-{0}-lmdb".format(split))

    # We need to prepare the database for the size. We'll set it 100 times
    # greater than what we theoretically need.
    map_size = len(image_data) * channels * width * height * 100

    in_db = lmdb.open(filename, map_size=map_size)
    with in_db.begin(write=True) as in_txn:
        for idx, ann in tqdm(enumerate(image_data), total=len(image_data)):
            # load image:
            # - in BGR (switch from RGB)
            # - in Channel x Height x Width order (switch from H x W x C)

            img = scp.ndimage.imread(ann["path"])

            # If not triple channel image, make it triple channeled
            if len(img.shape) <= 2:
                img = np.dstack((img, img, img))

                # Crop out the object with context padding.
            x, y, width, height = ann["bbox"]
            crop_im = utils.get_image_crop(img, x, y, width, height, crop_size)

            # Resize it to the desired shape.
            im = scp.misc.imresize(crop_im, (crop_size, crop_size, 3))  # resize

            # Convert image from RGB to BGR
            im = im[:, :, ::-1]
            # Convert to CxHxW
            im = im.transpose((2, 0, 1))

            im_data = caffe.io.array_to_datum(im)

            key = '{:0>7d}'.format(idx)
            # if using python3, we need to convert the UTF-8 string to a byte array
            if sys.version_info[0] == 3:
                key = key.encode('utf-8')

            in_txn.put(key, im_data.SerializeToString())

            # print("Saving {0}".format(image_data))

    in_db.close()
    def process_sample(cfg: SampleCfg):
        src_fn = os.path.join(crops_dir,
                              cfg.fish_classification.video_id,
                              '{:04}.jpg'.format(int(cfg.fish_classification.frame) + 1))
        img = scipy.misc.imread(src_fn)
        crop = utils.get_image_crop(full_rgb=img, rect=cfg.rect, out_size=INPUT_ROWS)
        # utils.print_stats('crop', crop)

        os.makedirs(os.path.join(classification_crops_dir,
                                 cfg.fish_classification.video_id), exist_ok=True)
        if save_jpegs:
            dst_jpg_fn = os.path.join(classification_crops_dir,
                                      cfg.fish_classification.video_id,
                                      '{:04}.jpg'.format(int(cfg.fish_classification.frame) + 1))
            scipy.misc.imsave(dst_jpg_fn, crop)
Ejemplo n.º 4
0
    def prepare_y(self, cfg: SampleCfg):
        img = self.masks[cfg.img_idx].astype(np.float32) / 255.0

        crop = utils.get_image_crop(full_rgb=img,
                                    rect=Rect(0, 0, IMG_WITH, IMG_HEIGHT),
                                    scale_rect_x=cfg.scale_rect_x,
                                    scale_rect_y=cfg.scale_rect_y,
                                    shift_x_ratio=cfg.shift_x_ratio,
                                    shift_y_ratio=cfg.shift_y_ratio,
                                    angle=cfg.angle,
                                    out_size=IMG_WITH,
                                    order=1)

        crop = crop.astype('float32')

        if cfg.hflip:
            crop = img_augmentation.horizontal_flip(crop)

        if cfg.vflip:
            crop = img_augmentation.vertical_flip(crop)

        return np.expand_dims(crop, axis=3)
Ejemplo n.º 5
0
    def prepare_x(self, cfg: SampleCfg):
        img = self.images[cfg.img_idx]
        crop = utils.get_image_crop(full_rgb=img,
                                    rect=Rect(0, 0, IMG_WITH, IMG_HEIGHT),
                                    scale_rect_x=cfg.scale_rect_x,
                                    scale_rect_y=cfg.scale_rect_y,
                                    shift_x_ratio=cfg.shift_x_ratio,
                                    shift_y_ratio=cfg.shift_y_ratio,
                                    angle=cfg.angle,
                                    out_size=IMG_WITH)

        crop = crop.astype('float32')
        if cfg.saturation != 0.5:
            crop = img_augmentation.saturation(crop,
                                               variance=0.2,
                                               r=cfg.saturation)

        if cfg.contrast != 0.5:
            crop = img_augmentation.contrast(crop,
                                             variance=0.25,
                                             r=cfg.contrast)

        if cfg.brightness != 0.5:
            crop = img_augmentation.brightness(crop,
                                               variance=0.3,
                                               r=cfg.brightness)

        if cfg.hflip:
            crop = img_augmentation.horizontal_flip(crop)

        if cfg.vflip:
            crop = img_augmentation.vertical_flip(crop)

        if cfg.blurred_by_downscaling != 1:
            crop = img_augmentation.blurred_by_downscaling(
                crop, 1.0 / cfg.blurred_by_downscaling)

        return preprocess_input(crop * 255.0)
Ejemplo n.º 6
0
def create_dataset(image_data,
                   filename="train.hdf5",
                   shape=(3, 224, 224),
                   data_root="",
                   crop_size=224):
    """
    Create a HDF5 file to store the data set, so we can use it with Keras or Blocks.
    Each image is the segment of the object for which the attribute applies.
    We store the images with the ordering in the first element of the shape tuple i.e. (idx, 3, x, y)
    Ref. https://www.getdatajoy.com/learn/Read_and_Write_HDF5_from_Python

    :param image_data: List of JSON objects with details about the images.
    :param filename: The filename for the HDF5 file. You should specify the split here.
    :param shape: The shape of the image to save into the HDF5 file.
    :param data_root: The source path of the MSCOCO dataset.
    :param crop_size: The final size of the cropped image.
    :return: We store the images with the ordering in the first element of the shape tuple i.e. (idx, 3, x, y)
    """
    # Ensure the extension of the filename is `hdf5`
    if filename.split('.')[-1].strip() != 'hdf5':
        filename += ".hdf5"

    filename = os.path.join(data_root, filename)

    try:
        # Get the shape of the HDF5 file based on the number of images in the split
        n_images = len(image_data)
        dataset_shape = (n_images, ) + shape

        with h5py.File(filename, 'w') as f:
            dset = f.create_dataset("data", dataset_shape)
            labels = f.create_dataset("attributes", (n_images, 204))
            metadata = f.create_dataset("metadata", (n_images, 1))

            dset.attrs["desc"] = "This is the image data."
            labels.attrs[
                "desc"] = "These are the attribute labels of the image data having the same index."
            metadata.attrs[
                "desc"] = "Metadata. Contains the COCO image ID of the image at the same index."

            # Now continue creating the HDF5 file with the list of attributes
            for i, ann in tqdm(enumerate(image_data), total=n_images):
                ann_id = ann["id"]
                ann_attrs = ann["attrs_vector"]

                # Get the object attributes as an array of 1s and 0s.
                img_attrs = np.array([np.float(x > 0) for x in ann_attrs])

                # Get the bounding box for the object
                bbox = ann['bbox']

                # Read the image data using the given path.
                try:
                    img = scp.ndimage.imread(ann["path"])
                except:
                    raise Exception("Invalid image path")

                # If single channel image, make it triple channeled
                if len(img.shape) <= 2:
                    img = np.dstack((img, img, img))

                # Crop out the object whose attributes we have.
                x, y, width, height = bbox

                # Now we crop out the object image
                # crop_img = img[y:y + height, x:x + width] - This is the original way to crop
                crop_img = utils.get_image_crop(img, x, y, width, height,
                                                crop_size)

                # Resize it to the desired shape.
                img = scp.misc.imresize(crop_img, (crop_size, crop_size, 3))

                # Order the dimensions as expected (channels, width, height)
                img = np.transpose(img, (2, 0, 1))

                # We have all the required data so we now save it.
                dset[i, :, :, :] = img
                labels[i, :] = img_attrs
                metadata[i, :] = ann_id

        return True

    except (Exception, ):
        traceback.print_exc()
        return False