コード例 #1
0
ファイル: image_feature.py プロジェクト: ludwig-ai/ludwig
    def forward(self, v: TorchscriptPreprocessingInput) -> torch.Tensor:
        """Takes a list of images and adjusts the size and number of channels as specified in the metadata.

        If `v` is already a torch.Tensor, we assume that the images are already preprocessed to be the same size.
        """
        # Nested conditional is a workaround to short-circuit boolean evaluation.
        if not torch.jit.isinstance(v, List[torch.Tensor]):
            if not torch.jit.isinstance(v, torch.Tensor):
                raise ValueError(f"Unsupported input: {v}")

        if torch.jit.isinstance(v, List[torch.Tensor]):
            imgs = [
                resize_image(img, (self.height, self.width),
                             self.resize_method) for img in v
            ]
            imgs_stacked = torch.stack(imgs)
        else:
            imgs_stacked = v

        _, num_channels, height, width = imgs_stacked.shape

        # Ensure images are the size expected by the model
        if height != self.height or width != self.width:
            imgs_stacked = resize_image(imgs_stacked,
                                        (self.height, self.width),
                                        self.resize_method)

        # Ensures images have the number of channels expected by the model
        if num_channels != self.num_channels:
            if self.num_channels == 1:
                imgs_stacked = grayscale(imgs_stacked)
            elif num_channels < self.num_channels:
                extra_channels = self.num_channels - num_channels
                imgs_stacked = torch.nn.functional.pad(
                    imgs_stacked, [0, 0, 0, 0, 0, extra_channels])
            else:
                raise ValueError(
                    f"Number of channels cannot be reconciled. metadata.num_channels = "
                    f"{self.num_channels}, but imgs.shape[1] = {num_channels}")

        return imgs_stacked
コード例 #2
0
    def _read_image_and_resize(filepath, img_width, img_height, should_resize,
                               num_channels, resize_method,
                               user_specified_num_channels):
        """
        :param filepath: path to the image
        :param img_width: expected width of the image
        :param img_height: expected height of the image
        :param should_resize: Should the image be resized?
        :param resize_method: type of resizing method
        :param num_channels: expected number of channels in the first image
        :param user_specified_num_channels: did the user specify num channels?
        :return: image object

        Helper method to read and resize an image according to model defn.
        If the user doesn't specify a number of channels, we use the first image
        in the dataset as the source of truth. If any image in the dataset
        doesn't have the same number of channels as the first image,
        raise an exception.

        If the user specifies a number of channels, we try to convert all the
        images to the specifications by dropping channels/padding 0 channels
        """

        img = imread(filepath)
        img_num_channels = num_channels_in_image(img)
        if img_num_channels == 1:
            img = img.reshape((img.shape[0], img.shape[1], 1))

        if user_specified_num_channels is True:
            # Number of channels is specified by the user
            img_padded = np.zeros((img_height, img_width, num_channels))
            min_num_channels = min(num_channels, img_num_channels)
            img_padded[:, :, :min_num_channels] = img[:, :, :min_num_channels]
            img = img_padded

            if img_num_channels != num_channels:
                logging.warning(
                    "Image {0} has {1} channels, where as {2}"
                    " channels are expected. Dropping/adding channels"
                    "with 0s as appropriate".format(filepath, img_num_channels,
                                                    num_channels))
        else:
            # If the image isn't like the first image, raise exception
            if img_num_channels != num_channels:
                raise ValueError(
                    'Image {0} has {1} channels, unlike the first image, which'
                    ' has {2} channels'.format(filepath, img_num_channels,
                                               num_channels))
        if should_resize:
            img = resize_image(img, (img_height, img_width), resize_method)

        return img
コード例 #3
0
    def _read_image_and_resize(filepath, img_width, img_height, should_resize,
                               num_channels, resize_method,
                               user_specified_num_channels):
        """
        :param filepath: path to the image
        :param img_width: expected width of the image
        :param img_height: expected height of the image
        :param should_resize: Should the image be resized?
        :param resize_method: type of resizing method
        :param num_channels: expected number of channels in the first image
        :param user_specified_num_channels: did the user specify num channels?
        :return: image object

        Helper method to read and resize an image according to model defn.
        If the user doesn't specify a number of channels, we use the first image
        in the dataset as the source of truth. If any image in the dataset
        doesn't have the same number of channels as the first image,
        raise an exception.

        If the user specifies a number of channels, we try to convert all the
        images to the specifications by dropping channels/padding 0 channels
        """
        try:
            from skimage.io import imread
        except ImportError:
            logger.error(
                ' scikit-image is not installed. '
                'In order to install all image feature dependencies run '
                'pip install ludwig[image]')
            sys.exit(-1)

        img = imread(filepath)
        img_num_channels = num_channels_in_image(img)
        if img_num_channels == 1:
            img = img.reshape((img.shape[0], img.shape[1], 1))

        if should_resize:
            img = resize_image(img, (img_height, img_width), resize_method)

        if user_specified_num_channels is True:

            # convert to greyscale if needed
            if num_channels == 1 and (img_num_channels == 3
                                      or img_num_channels == 4):
                img = greyscale(img)
                img_num_channels = 1

            # Number of channels is specified by the user
            img_padded = np.zeros((img_height, img_width, num_channels),
                                  dtype=np.uint8)
            min_num_channels = min(num_channels, img_num_channels)
            img_padded[:, :, :min_num_channels] = img[:, :, :min_num_channels]
            img = img_padded

            if img_num_channels != num_channels:
                logger.warning(
                    "Image {0} has {1} channels, where as {2}"
                    " channels are expected. Dropping/adding channels"
                    "with 0s as appropriate".format(filepath, img_num_channels,
                                                    num_channels))
        else:
            # If the image isn't like the first image, raise exception
            if img_num_channels != num_channels:
                raise ValueError(
                    'Image {0} has {1} channels, unlike the first image, which'
                    ' has {2} channels. Make sure all the iamges have the same'
                    'number of channels or use the num_channels property in'
                    'image preprocessing'.format(filepath, img_num_channels,
                                                 num_channels))

        if img.shape[0] != img_height or img.shape[1] != img_width:
            raise ValueError(
                "Images are not of the same size. "
                "Expected size is {0}, "
                "current image size is {1}."
                "Images are expected to be all of the same size"
                "or explicit image width and height are expected"
                "to be provided. "
                "Additional information: "
                "https://ludwig-ai.github.io/ludwig-docs/user_guide/#image-features-preprocessing"
                .format([img_height, img_width, num_channels], img.shape))

        return img
コード例 #4
0
    def add_feature_data(feature, dataset_df, data, metadata,
                         preprocessing_parameters):
        csv_path = os.path.dirname(os.path.abspath(dataset_df.csv))
        an_image = imread(
            os.path.join(csv_path, dataset_df[feature['name']][0]))
        im_height = an_image.shape[0]
        im_width = an_image.shape[1]

        if an_image.ndim == 2:
            num_channels = 1
        else:
            num_channels = an_image.shape[2]

        num_images = len(dataset_df)

        if feature['should_resize']:
            im_height = feature[HEIGHT]
            im_width = feature[WIDTH]

        metadata[feature['name']] = {
            'height': im_height,
            'width': im_width,
            'num_channels': num_channels,
            'in_memory': feature['in_memory']
        }

        if feature['in_memory']:
            data[feature['name']] = np.empty(
                (num_images, im_height, im_width, num_channels), dtype=np.int8)
            for i in range(len(dataset_df)):
                filename = os.path.join(csv_path,
                                        dataset_df[feature['name']][i])
                img = imread(filename)
                if img.ndim == 2:
                    img = img.reshape((img.shape[0], img.shape[1], 1))
                if feature['should_resize']:
                    img = resize_image(img, (im_height, im_width),
                                       feature['resize_method'])
                data[feature['name']][i, :, :, :] = img
        else:
            data_fp = os.path.splitext(dataset_df.csv)[0] + '.hdf5'
            mode = 'w'
            if os.path.isfile(data_fp):
                mode = 'r+'
            with h5py.File(data_fp, mode) as h5_file:
                image_dataset = h5_file.create_dataset(
                    feature['name'] + '_data',
                    (num_images, im_height, im_width, num_channels),
                    dtype=np.uint8)
                for i in range(len(dataset_df)):
                    filename = os.path.join(csv_path,
                                            dataset_df[feature['name']][i])
                    img = imread(filename)
                    if img.ndim == 2:
                        img = img.reshape((img.shape[0], img.shape[1], 1))
                    if feature['should_resize']:
                        img = resize_image(
                            img,
                            (im_height, im_width),
                            feature['resize_method'],
                        )

                    image_dataset[i, :im_height, :im_width, :] = img

            data[feature['name']] = np.arange(num_images)
コード例 #5
0
    def _read_image_and_resize(img_entry: Union[str,
                                                'numpy.array'], img_width: int,
                               img_height: int, should_resize: bool,
                               num_channels: int, resize_method: str,
                               user_specified_num_channels: int):
        """
        :param img_entry Union[str, 'numpy.array']: if str file path to the
                image else numpy.array of the image itself
        :param img_width: expected width of the image
        :param img_height: expected height of the image
        :param should_resize: Should the image be resized?
        :param resize_method: type of resizing method
        :param num_channels: expected number of channels in the first image
        :param user_specified_num_channels: did the user specify num channels?
        :return: image object

        Helper method to read and resize an image according to model defn.
        If the user doesn't specify a number of channels, we use the first image
        in the dataset as the source of truth. If any image in the dataset
        doesn't have the same number of channels as the first image,
        raise an exception.

        If the user specifies a number of channels, we try to convert all the
        images to the specifications by dropping channels/padding 0 channels
        """
        img = read_image(img_entry)
        if img is None:
            logger.info(f"{img_entry} cannot be read")
            return None
        img_num_channels = num_channels_in_image(img)
        if img_num_channels == 1:
            img = img.reshape((img.shape[0], img.shape[1], 1))

        if should_resize:
            img = resize_image(img, (img_height, img_width), resize_method)

        if user_specified_num_channels is True:

            # convert to greyscale if needed
            if num_channels == 1 and (img_num_channels == 3
                                      or img_num_channels == 4):
                img = greyscale(img)
                img_num_channels = 1

            # Number of channels is specified by the user
            img_padded = np.zeros((img_height, img_width, num_channels),
                                  dtype=np.uint8)
            min_num_channels = min(num_channels, img_num_channels)
            img_padded[:, :, :min_num_channels] = img[:, :, :min_num_channels]
            img = img_padded

            if img_num_channels != num_channels:
                logger.warning(
                    "Image has {0} channels, where as {1} "
                    "channels are expected. Dropping/adding channels "
                    "with 0s as appropriate".format(img_num_channels,
                                                    num_channels))
        else:
            # If the image isn't like the first image, raise exception
            if img_num_channels != num_channels:
                raise ValueError(
                    'Image has {0} channels, unlike the first image, which '
                    'has {1} channels. Make sure all the images have the same '
                    'number of channels or use the num_channels property in '
                    'image preprocessing'.format(img_num_channels,
                                                 num_channels))

        if img.shape[0] != img_height or img.shape[1] != img_width:
            raise ValueError(
                "Images are not of the same size. "
                "Expected size is {0}, "
                "current image size is {1}."
                "Images are expected to be all of the same size "
                "or explicit image width and height are expected "
                "to be provided. "
                "Additional information: "
                "https://ludwig-ai.github.io/ludwig-docs/user_guide/#image-features-preprocessing"
                .format([img_height, img_width, num_channels], img.shape))

        return img
コード例 #6
0
ファイル: image_feature.py プロジェクト: teochenglim/ludwig
    def add_feature_data(
            feature,
            dataset_df,
            data,
            metadata,
            preprocessing_parameters
    ):
        set_default_value(
            feature['preprocessing'],
            'in_memory',
            preprocessing_parameters['in_memory']
        )

        if ('height' in preprocessing_parameters or
                'width' in preprocessing_parameters):
            should_resize = True
            try:
                provided_height = int(preprocessing_parameters[HEIGHT])
                provided_width = int(preprocessing_parameters[WIDTH])
            except ValueError as e:
                raise ValueError(
                    'Image height and width must be set and have '
                    'positive integer values: ' + str(e)
                )
            if (provided_height <= 0 or provided_width <= 0):
                raise ValueError(
                    'Image height and width must be positive integers'
                )
        else:
            should_resize = False

        csv_path = None
        if hasattr(dataset_df, 'csv'):
            csv_path = os.path.dirname(os.path.abspath(dataset_df.csv))

        num_images = len(dataset_df)

        height = 0
        width = 0
        num_channels = 1

        if num_images > 0:
            # here if a width and height have not been specified
            # we assume that all images have the same wifth and im_height
            # thus the width and height of the first one are the same
            # of all the other ones
            if (csv_path is None and
                    not os.path.isabs(dataset_df[feature['name']][0])):
                raise ValueError(
                    'Image file paths must be absolute'
                )

            first_image = imread(
                get_abs_path(
                    csv_path,
                    dataset_df[feature['name']][0]
                )
            )

            height = first_image.shape[0]
            width = first_image.shape[1]

            if first_image.ndim == 2:
                num_channels = 1
            else:
                num_channels = first_image.shape[2]

        if should_resize:
            height = provided_height
            width = provided_width

        metadata[feature['name']]['preprocessing']['height'] = height
        metadata[feature['name']]['preprocessing']['width'] = width
        metadata[feature['name']]['preprocessing'][
            'num_channels'] = num_channels

        if feature['preprocessing']['in_memory']:
            data[feature['name']] = np.empty(
                (num_images, height, width, num_channels),
                dtype=np.int8
            )
            for i in range(len(dataset_df)):
                img = imread(
                    get_abs_path(
                        csv_path,
                        dataset_df[feature['name']][i]
                    )
                )
                if img.ndim == 2:
                    img = img.reshape((img.shape[0], img.shape[1], 1))
                if should_resize:
                    img = resize_image(
                        img,
                        (height, width),
                        preprocessing_parameters['resize_method']
                    )
                # todo: temporary workaround for images with alpha channel, replace
                if img.ndim == 3 and img.shape[2] != num_channels:
                    img = img[:, :, :num_channels]
                data[feature['name']][i, :, :, :] = img
        else:
            data_fp = os.path.splitext(dataset_df.csv)[0] + '.hdf5'
            mode = 'w'
            if os.path.isfile(data_fp):
                mode = 'r+'
            with h5py.File(data_fp, mode) as h5_file:
                image_dataset = h5_file.create_dataset(
                    feature['name'] + '_data',
                    (num_images, height, width, num_channels),
                    dtype=np.uint8
                )
                for i in range(len(dataset_df)):
                    img = imread(
                        get_abs_path(
                            csv_path,
                            dataset_df[feature['name']][i]
                        )
                    )
                    if img.ndim == 2:
                        img = img.reshape((img.shape[0], img.shape[1], 1))
                    if should_resize:
                        img = resize_image(
                            img,
                            (height, width),
                            preprocessing_parameters['resize_method'],
                        )

                    image_dataset[i, :height, :width, :] = img

            data[feature['name']] = np.arange(num_images)
コード例 #7
0
ファイル: test_image_utils.py プロジェクト: yarenty/ludwig
def test_resize_image(img: torch.Tensor, new_size: int, resize_method: str,
                      expected_img: torch.Tensor):
    output_img = resize_image(img, new_size, resize_method)
    assert torch.equal(output_img, expected_img)
コード例 #8
0
ファイル: image_feature.py プロジェクト: ludwig-ai/ludwig
    def _read_image_if_bytes_obj_and_resize(
        img_entry: Union[bytes, torch.Tensor, np.ndarray],
        img_width: int,
        img_height: int,
        should_resize: bool,
        num_channels: int,
        resize_method: str,
        user_specified_num_channels: bool,
    ) -> Optional[np.ndarray]:
        """
        :param img_entry Union[bytes, torch.Tensor, np.ndarray]: if str file path to the
            image else torch.Tensor of the image itself
        :param img_width: expected width of the image
        :param img_height: expected height of the image
        :param should_resize: Should the image be resized?
        :param resize_method: type of resizing method
        :param num_channels: expected number of channels in the first image
        :param user_specified_num_channels: did the user specify num channels?
        :return: image object as a numpy array

        Helper method to read and resize an image according to model definition.
        If the user doesn't specify a number of channels, we use the first image
        in the dataset as the source of truth. If any image in the dataset
        doesn't have the same number of channels as the first image,
        raise an exception.

        If the user specifies a number of channels, we try to convert all the
        images to the specifications by dropping channels/padding 0 channels
        """

        if isinstance(img_entry, bytes):
            img = read_image_from_bytes_obj(img_entry, num_channels)
        elif isinstance(img_entry, np.ndarray):
            img = torch.from_numpy(img_entry).permute(2, 0, 1)
        else:
            img = img_entry

        if not isinstance(img, torch.Tensor):
            warnings.warn(f"Image with value {img} cannot be read")
            return None

        img_num_channels = num_channels_in_image(img)
        # Convert to grayscale if needed.
        if num_channels == 1 and img_num_channels != 1:
            img = grayscale(img)
            img_num_channels = 1

        if should_resize:
            img = resize_image(img, (img_height, img_width), resize_method)

        if user_specified_num_channels:
            # Number of channels is specified by the user
            # img_padded = np.zeros((img_height, img_width, num_channels),
            #                       dtype=np.uint8)
            # min_num_channels = min(num_channels, img_num_channels)
            # img_padded[:, :, :min_num_channels] = img[:, :, :min_num_channels]
            # img = img_padded
            if num_channels > img_num_channels:
                extra_channels = num_channels - img_num_channels
                img = torch.nn.functional.pad(img,
                                              [0, 0, 0, 0, 0, extra_channels])

            if img_num_channels != num_channels:
                logging.warning(
                    "Image has {} channels, where as {} "
                    "channels are expected. Dropping/adding channels "
                    "with 0s as appropriate".format(img_num_channels,
                                                    num_channels))
        else:
            # If the image isn't like the first image, raise exception
            if img_num_channels != num_channels:
                raise ValueError(
                    "Image has {} channels, unlike the first image, which "
                    "has {} channels. Make sure all the images have the same "
                    "number of channels or use the num_channels property in "
                    "image preprocessing".format(img_num_channels,
                                                 num_channels))

        if img.shape[1] != img_height or img.shape[2] != img_width:
            raise ValueError(
                "Images are not of the same size. "
                "Expected size is {}, "
                "current image size is {}."
                "Images are expected to be all of the same size "
                "or explicit image width and height are expected "
                "to be provided. "
                "Additional information: "
                "https://ludwig-ai.github.io/ludwig-docs/latest/configuration/features/image_features"
                "#image-features-preprocessing".format(
                    [img_height, img_width, num_channels], img.shape))

        return img.numpy()