Exemple #1
0
    def _finalize_preprocessing_parameters(preprocessing_parameters,
                                           first_image_path):
        """
        Helper method to determine the height, width and number of channels for
        preprocessing the image data. This is achieved by looking at the
        parameters provided by the user. When there are some missing parameters,
        we fall back on to the first image in the dataset. The assumption being
        that all the images in the data are expected be of the same size with
        the same number of channels
        """
        # Read the first image in the dataset
        try:
            from skimage.io import imread
        except ImportError:
            logger.error(
                ' scikit-image is not installed. '
                'In order to install all image feature dependencies run '
                'pip install ludwig[image]')
            sys.exit(-1)

        first_image = imread(first_image_path)
        first_img_height = first_image.shape[0]
        first_img_width = first_image.shape[1]
        first_img_num_channels = num_channels_in_image(first_image)

        should_resize = False
        if (HEIGHT in preprocessing_parameters
                or WIDTH in preprocessing_parameters):
            should_resize = True
            try:
                height = int(preprocessing_parameters[HEIGHT])
                width = int(preprocessing_parameters[WIDTH])
            except ValueError as e:
                raise ValueError('Image height and width must be set and have '
                                 'positive integer values: ' + str(e))
            if height <= 0 or width <= 0:
                raise ValueError(
                    'Image height and width must be positive integers')
        else:
            # User hasn't specified height and width.
            # So we assume that all images have the same width and height.
            # Thus the width and height of the first one are the same
            # as all the other ones
            height = first_img_height
            width = first_img_width

        if NUM_CHANNELS in preprocessing_parameters:
            # User specified num_channels in the model/feature config
            user_specified_num_channels = True
            num_channels = preprocessing_parameters[NUM_CHANNELS]
        else:
            user_specified_num_channels = False
            num_channels = first_img_num_channels

        assert isinstance(
            num_channels,
            int), ValueError('Number of image channels needs to be an integer')

        return (should_resize, width, height, num_channels,
                user_specified_num_channels, first_image)
def test_num_channels_in_image():
    assert num_channels_in_image(image_2d) == 1
    assert num_channels_in_image(image_3d) == 3

    with pytest.raises(ValueError):
        num_channels_in_image(np.arange(5))
        num_channels_in_image(None)
Exemple #3
0
    def _read_image_and_resize(filepath, img_width, img_height, should_resize,
                               num_channels, resize_method,
                               user_specified_num_channels):
        """
        :param filepath: path to the image
        :param img_width: expected width of the image
        :param img_height: expected height of the image
        :param should_resize: Should the image be resized?
        :param resize_method: type of resizing method
        :param num_channels: expected number of channels in the first image
        :param user_specified_num_channels: did the user specify num channels?
        :return: image object

        Helper method to read and resize an image according to model defn.
        If the user doesn't specify a number of channels, we use the first image
        in the dataset as the source of truth. If any image in the dataset
        doesn't have the same number of channels as the first image,
        raise an exception.

        If the user specifies a number of channels, we try to convert all the
        images to the specifications by dropping channels/padding 0 channels
        """

        img = imread(filepath)
        img_num_channels = num_channels_in_image(img)
        if img_num_channels == 1:
            img = img.reshape((img.shape[0], img.shape[1], 1))

        if user_specified_num_channels is True:
            # Number of channels is specified by the user
            img_padded = np.zeros((img_height, img_width, num_channels))
            min_num_channels = min(num_channels, img_num_channels)
            img_padded[:, :, :min_num_channels] = img[:, :, :min_num_channels]
            img = img_padded

            if img_num_channels != num_channels:
                logging.warning(
                    "Image {0} has {1} channels, where as {2}"
                    " channels are expected. Dropping/adding channels"
                    "with 0s as appropriate".format(filepath, img_num_channels,
                                                    num_channels))
        else:
            # If the image isn't like the first image, raise exception
            if img_num_channels != num_channels:
                raise ValueError(
                    'Image {0} has {1} channels, unlike the first image, which'
                    ' has {2} channels'.format(filepath, img_num_channels,
                                               num_channels))
        if should_resize:
            img = resize_image(img, (img_height, img_width), resize_method)

        return img
Exemple #4
0
def test_num_channels_in_image():
    image_2d = torch.randint(0, 1, (10, 10))
    image_3d = torch.randint(0, 1, (3, 10, 10))
    assert num_channels_in_image(image_2d) == 1
    assert num_channels_in_image(image_3d) == 3

    with pytest.raises(ValueError):
        num_channels_in_image(torch.rand(5))
        num_channels_in_image(None)
Exemple #5
0
    def _infer_number_of_channels(image_sample: List[torch.Tensor]):
        """Infers the channel depth to use from a group of images.

        We make the assumption that the majority of datasets scraped from the web will be RGB, so if we get a mixed bag
        of images we should default to that. However, if the majority of the sample images have a specific channel depth
        (other than 3) this is probably intentional so we keep it, but log an info message.
        """
        n_images = len(image_sample)
        channel_frequency = Counter(
            [num_channels_in_image(x) for x in image_sample])
        if channel_frequency[1] > n_images / 2:
            # If the majority of images in sample are 1 channel, use 1.
            num_channels = 1
        elif channel_frequency[2] > n_images / 2:
            # If the majority of images in sample are 2 channel, use 2.
            num_channels = 2
        elif channel_frequency[4] > n_images / 2:
            # If the majority of images in sample are 4 channel, use 4.
            num_channels = 4
        else:
            # Default case: use 3 channels.
            num_channels = 3
        logging.info(
            f"Inferring num_channels from the first {n_images} images.")
        logging.info("\n".join([
            f"  images with {k} channels: {v}"
            for k, v in sorted(channel_frequency.items())
        ]))
        if num_channels == max(channel_frequency, key=channel_frequency.get):
            logging.info(
                f"Using {num_channels} channels because it is the majority in sample. If an image with"
                f" a different depth is read, will attempt to convert to {num_channels} channels."
            )
        else:
            logging.info(f"Defaulting to {num_channels} channels.")
        logging.info(
            "To explicitly set the number of channels, define num_channels in the preprocessing dictionary of "
            "the image input feature config.")
        return num_channels
Exemple #6
0
    def _read_image_and_resize(filepath, img_width, img_height, should_resize,
                               num_channels, resize_method,
                               user_specified_num_channels):
        """
        :param filepath: path to the image
        :param img_width: expected width of the image
        :param img_height: expected height of the image
        :param should_resize: Should the image be resized?
        :param resize_method: type of resizing method
        :param num_channels: expected number of channels in the first image
        :param user_specified_num_channels: did the user specify num channels?
        :return: image object

        Helper method to read and resize an image according to model defn.
        If the user doesn't specify a number of channels, we use the first image
        in the dataset as the source of truth. If any image in the dataset
        doesn't have the same number of channels as the first image,
        raise an exception.

        If the user specifies a number of channels, we try to convert all the
        images to the specifications by dropping channels/padding 0 channels
        """
        try:
            from skimage.io import imread
        except ImportError:
            logger.error(
                ' scikit-image is not installed. '
                'In order to install all image feature dependencies run '
                'pip install ludwig[image]')
            sys.exit(-1)

        img = imread(filepath)
        img_num_channels = num_channels_in_image(img)
        if img_num_channels == 1:
            img = img.reshape((img.shape[0], img.shape[1], 1))

        if should_resize:
            img = resize_image(img, (img_height, img_width), resize_method)

        if user_specified_num_channels is True:

            # convert to greyscale if needed
            if num_channels == 1 and (img_num_channels == 3
                                      or img_num_channels == 4):
                img = greyscale(img)
                img_num_channels = 1

            # Number of channels is specified by the user
            img_padded = np.zeros((img_height, img_width, num_channels),
                                  dtype=np.uint8)
            min_num_channels = min(num_channels, img_num_channels)
            img_padded[:, :, :min_num_channels] = img[:, :, :min_num_channels]
            img = img_padded

            if img_num_channels != num_channels:
                logger.warning(
                    "Image {0} has {1} channels, where as {2}"
                    " channels are expected. Dropping/adding channels"
                    "with 0s as appropriate".format(filepath, img_num_channels,
                                                    num_channels))
        else:
            # If the image isn't like the first image, raise exception
            if img_num_channels != num_channels:
                raise ValueError(
                    'Image {0} has {1} channels, unlike the first image, which'
                    ' has {2} channels. Make sure all the iamges have the same'
                    'number of channels or use the num_channels property in'
                    'image preprocessing'.format(filepath, img_num_channels,
                                                 num_channels))

        if img.shape[0] != img_height or img.shape[1] != img_width:
            raise ValueError(
                "Images are not of the same size. "
                "Expected size is {0}, "
                "current image size is {1}."
                "Images are expected to be all of the same size"
                "or explicit image width and height are expected"
                "to be provided. "
                "Additional information: "
                "https://ludwig-ai.github.io/ludwig-docs/user_guide/#image-features-preprocessing"
                .format([img_height, img_width, num_channels], img.shape))

        return img
Exemple #7
0
    def add_feature_data(feature, dataset_df, data, metadata,
                         preprocessing_parameters):
        set_default_value(feature['preprocessing'], 'in_memory',
                          preprocessing_parameters['in_memory'])

        csv_path = None
        if hasattr(dataset_df, 'csv'):
            csv_path = os.path.dirname(os.path.abspath(dataset_df.csv))

        num_images = len(dataset_df)
        if num_images == 0:
            raise ValueError('There are no images in the dataset provided.')

        height = 0
        width = 0
        should_resize = False
        if ('height' in preprocessing_parameters
                or 'width' in preprocessing_parameters):
            should_resize = True
            try:
                height = int(preprocessing_parameters[HEIGHT])
                width = int(preprocessing_parameters[WIDTH])
            except ValueError as e:
                raise ValueError('Image height and width must be set and have '
                                 'positive integer values: ' + str(e))
            if height <= 0 or width <= 0:
                raise ValueError(
                    'Image height and width must be positive integers')

        # here if a width and height have not been specified
        # we assume that all images have the same width and height
        # thus the width and height of the first one are the same
        # of all the other ones
        if (csv_path is None
                and not os.path.isabs(dataset_df[feature['name']][0])):
            raise ValueError('Image file paths must be absolute')

        first_image = imread(
            get_abs_path(csv_path, dataset_df[feature['name']][0]))

        first_img_height = first_image.shape[0]
        first_img_width = first_image.shape[1]
        first_img_num_channels = num_channels_in_image(first_image)

        if height == 0 or width == 0:
            # User hasn't specified height and width
            height = first_img_height
            width = first_img_width

        # User specified num_channels in the model/feature definition
        user_specified_num_channels = False
        num_channels = first_img_num_channels
        if NUM_CHANNELS in preprocessing_parameters:
            user_specified_num_channels = True
            num_channels = preprocessing_parameters[NUM_CHANNELS]

        assert isinstance(
            num_channels,
            int), ValueError('Number of image channels needs to be an integer')

        metadata[feature['name']]['preprocessing']['height'] = height
        metadata[feature['name']]['preprocessing']['width'] = width
        metadata[
            feature['name']]['preprocessing']['num_channels'] = num_channels

        if feature['preprocessing']['in_memory']:
            data[feature['name']] = np.empty(
                (num_images, height, width, num_channels), dtype=np.int8)
            for i in range(len(dataset_df)):
                filepath = get_abs_path(csv_path,
                                        dataset_df[feature['name']][i])

                img = ImageBaseFeature._read_image_and_resize(
                    filepath, width, height, should_resize, num_channels,
                    preprocessing_parameters['resize_method'],
                    user_specified_num_channels)
                data[feature['name']][i, :, :, :] = img
        else:
            data_fp = os.path.splitext(dataset_df.csv)[0] + '.hdf5'
            mode = 'w'
            if os.path.isfile(data_fp):
                mode = 'r+'
            with h5py.File(data_fp, mode) as h5_file:
                image_dataset = h5_file.create_dataset(
                    feature['name'] + '_data',
                    (num_images, height, width, num_channels),
                    dtype=np.uint8)
                for i in range(len(dataset_df)):
                    filepath = get_abs_path(csv_path,
                                            dataset_df[feature['name']][i])

                    img = ImageBaseFeature._read_image_and_resize(
                        filepath, width, height, should_resize, num_channels,
                        preprocessing_parameters['resize_method'],
                        user_specified_num_channels)

                    image_dataset[i, :height, :width, :] = img

            data[feature['name']] = np.arange(num_images)
Exemple #8
0
    def _finalize_preprocessing_parameters(
            preprocessing_parameters: dict,
            first_img_entry: Union[str, 'numpy.array'], src_path: str,
            input_feature_col: np.array):
        """
        Helper method to determine the height, width and number of channels for
        preprocessing the image data. This is achieved by looking at the
        parameters provided by the user. When there are some missing parameters,
        we fall back on to the first image in the dataset. The assumption being
        that all the images in the data are expected be of the same size with
        the same number of channels
        """
        first_image = read_image(first_img_entry)

        explicit_height_width = HEIGHT in preprocessing_parameters or WIDTH in preprocessing_parameters
        explicit_num_channels = NUM_CHANNELS in preprocessing_parameters

        inferred_sample = None
        if preprocessing_parameters[INFER_IMAGE_DIMENSIONS] and not (
                explicit_height_width and explicit_num_channels):
            sample_size = min(
                len(input_feature_col),
                preprocessing_parameters[INFER_IMAGE_SAMPLE_SIZE])
            sample = [
                read_image(get_image_from_path(src_path, img))
                for img in input_feature_col.head(sample_size)
            ]
            inferred_sample = [img for img in sample if img is not None]
            if len(inferred_sample) == 0:
                raise ValueError(
                    "No readable images in sample, image dimensions cannot be inferred"
                )

        should_resize = False
        if explicit_height_width:
            should_resize = True
            try:
                height = int(preprocessing_parameters[HEIGHT])
                width = int(preprocessing_parameters[WIDTH])
            except ValueError as e:
                raise ValueError('Image height and width must be set and have '
                                 'positive integer values: ' + str(e))
            if height <= 0 or width <= 0:
                raise ValueError(
                    'Image height and width must be positive integers')
        else:
            # User hasn't specified height and width.
            # Default to inferring from sample or first image.
            if preprocessing_parameters[INFER_IMAGE_DIMENSIONS]:
                should_resize = True

                height_avg = min(
                    sum(x.shape[0]
                        for x in inferred_sample) / len(inferred_sample),
                    preprocessing_parameters[INFER_IMAGE_MAX_HEIGHT])
                width_avg = min(
                    sum(x.shape[1]
                        for x in inferred_sample) / len(inferred_sample),
                    preprocessing_parameters[INFER_IMAGE_MAX_WIDTH])

                height, width = round(height_avg), round(width_avg)
                logger.debug("Inferring height: {0} and width: {1}".format(
                    height, width))
            elif first_image is not None:
                height, width = first_image.shape[0], first_image.shape[1]
            else:
                raise ValueError(
                    "Explicit image width/height are not set, infer_image_dimensions is false, "
                    "and first image cannot be read, so image dimensions are unknown"
                )

        if explicit_num_channels:
            # User specified num_channels in the model/feature config
            user_specified_num_channels = True
            num_channels = preprocessing_parameters[NUM_CHANNELS]
        else:
            user_specified_num_channels = False
            if preprocessing_parameters[INFER_IMAGE_DIMENSIONS]:
                user_specified_num_channels = True
                num_channels = round(
                    sum(num_channels_in_image(x)
                        for x in inferred_sample) / len(inferred_sample))
            elif first_image is not None:
                num_channels = num_channels_in_image(first_image)
            else:
                raise ValueError(
                    "Explicit image num channels is not set, infer_image_dimensions is false, "
                    "and first image cannot be read, so image num channels is unknown"
                )

        assert isinstance(
            num_channels,
            int), ValueError('Number of image channels needs to be an integer')

        return (should_resize, width, height, num_channels,
                user_specified_num_channels, first_image)
Exemple #9
0
    def _read_image_and_resize(img_entry: Union[str,
                                                'numpy.array'], img_width: int,
                               img_height: int, should_resize: bool,
                               num_channels: int, resize_method: str,
                               user_specified_num_channels: int):
        """
        :param img_entry Union[str, 'numpy.array']: if str file path to the
                image else numpy.array of the image itself
        :param img_width: expected width of the image
        :param img_height: expected height of the image
        :param should_resize: Should the image be resized?
        :param resize_method: type of resizing method
        :param num_channels: expected number of channels in the first image
        :param user_specified_num_channels: did the user specify num channels?
        :return: image object

        Helper method to read and resize an image according to model defn.
        If the user doesn't specify a number of channels, we use the first image
        in the dataset as the source of truth. If any image in the dataset
        doesn't have the same number of channels as the first image,
        raise an exception.

        If the user specifies a number of channels, we try to convert all the
        images to the specifications by dropping channels/padding 0 channels
        """
        img = read_image(img_entry)
        if img is None:
            logger.info(f"{img_entry} cannot be read")
            return None
        img_num_channels = num_channels_in_image(img)
        if img_num_channels == 1:
            img = img.reshape((img.shape[0], img.shape[1], 1))

        if should_resize:
            img = resize_image(img, (img_height, img_width), resize_method)

        if user_specified_num_channels is True:

            # convert to greyscale if needed
            if num_channels == 1 and (img_num_channels == 3
                                      or img_num_channels == 4):
                img = greyscale(img)
                img_num_channels = 1

            # Number of channels is specified by the user
            img_padded = np.zeros((img_height, img_width, num_channels),
                                  dtype=np.uint8)
            min_num_channels = min(num_channels, img_num_channels)
            img_padded[:, :, :min_num_channels] = img[:, :, :min_num_channels]
            img = img_padded

            if img_num_channels != num_channels:
                logger.warning(
                    "Image has {0} channels, where as {1} "
                    "channels are expected. Dropping/adding channels "
                    "with 0s as appropriate".format(img_num_channels,
                                                    num_channels))
        else:
            # If the image isn't like the first image, raise exception
            if img_num_channels != num_channels:
                raise ValueError(
                    'Image has {0} channels, unlike the first image, which '
                    'has {1} channels. Make sure all the images have the same '
                    'number of channels or use the num_channels property in '
                    'image preprocessing'.format(img_num_channels,
                                                 num_channels))

        if img.shape[0] != img_height or img.shape[1] != img_width:
            raise ValueError(
                "Images are not of the same size. "
                "Expected size is {0}, "
                "current image size is {1}."
                "Images are expected to be all of the same size "
                "or explicit image width and height are expected "
                "to be provided. "
                "Additional information: "
                "https://ludwig-ai.github.io/ludwig-docs/user_guide/#image-features-preprocessing"
                .format([img_height, img_width, num_channels], img.shape))

        return img
Exemple #10
0
    def _finalize_preprocessing_parameters(
            preprocessing_parameters: dict,
            first_img_entry: Union[str, 'numpy.array'], src_path: str,
            input_feature_col: np.array):
        """
        Helper method to determine the height, width and number of channels for
        preprocessing the image data. This is achieved by looking at the
        parameters provided by the user. When there are some missing parameters,
        we fall back on to the first image in the dataset. The assumption being
        that all the images in the data are expected be of the same size with
        the same number of channels
        """
        first_image = read_image(first_img_entry)
        first_img_height = first_image.shape[0]
        first_img_width = first_image.shape[1]
        first_img_num_channels = num_channels_in_image(first_image)

        should_resize = False
        if (HEIGHT in preprocessing_parameters
                or WIDTH in preprocessing_parameters):
            should_resize = True
            try:
                height = int(preprocessing_parameters[HEIGHT])
                width = int(preprocessing_parameters[WIDTH])
            except ValueError as e:
                raise ValueError('Image height and width must be set and have '
                                 'positive integer values: ' + str(e))
            if height <= 0 or width <= 0:
                raise ValueError(
                    'Image height and width must be positive integers')
        else:
            # User hasn't specified height and width.
            # Default to first image, or infer from sample.
            height, width = first_img_height, first_img_width

            if preprocessing_parameters[INFER_IMAGE_DIMENSIONS]:
                should_resize = True
                sample_size = min(
                    len(input_feature_col),
                    preprocessing_parameters[INFER_IMAGE_SAMPLE_SIZE])
                sample_images = [
                    read_image(get_image_from_path(src_path, img))
                    for img in input_feature_col[:sample_size]
                ]

                if sample_images:
                    height_avg = min(
                        sum(x.shape[0]
                            for x in sample_images) / len(sample_images),
                        preprocessing_parameters[INFER_IMAGE_MAX_HEIGHT])
                    width_avg = min(
                        sum(x.shape[1]
                            for x in sample_images) / len(sample_images),
                        preprocessing_parameters[INFER_IMAGE_MAX_WIDTH])

                    height, width = round(height_avg), round(width_avg)

                    logger.debug("Inferring height: {0} and width: {1}".format(
                        height, width))
                else:
                    logger.warning(
                        "Sample set for inference is empty, default to height and width of first image"
                    )

        if NUM_CHANNELS in preprocessing_parameters:
            # User specified num_channels in the model/feature config
            user_specified_num_channels = True
            num_channels = preprocessing_parameters[NUM_CHANNELS]
        else:
            user_specified_num_channels = False
            num_channels = first_img_num_channels

        assert isinstance(
            num_channels,
            int), ValueError('Number of image channels needs to be an integer')

        return (should_resize, width, height, num_channels,
                user_specified_num_channels, first_image)
Exemple #11
0
    def _finalize_preprocessing_parameters(
        preprocessing_parameters: dict,
        column: Series,
    ) -> Tuple:
        """Helper method to determine the height, width and number of channels for preprocessing the image data.

        This is achieved by looking at the parameters provided by the user. When there are some missing parameters, we
        fall back on to the first image in the dataset. The assumption being that all the images in the data are
        expected be of the same size with the same number of channels
        """

        explicit_height_width = HEIGHT in preprocessing_parameters or WIDTH in preprocessing_parameters
        explicit_num_channels = NUM_CHANNELS in preprocessing_parameters and preprocessing_parameters[
            NUM_CHANNELS]

        sample = []
        if preprocessing_parameters[INFER_IMAGE_DIMENSIONS] and not (
                explicit_height_width and explicit_num_channels):
            sample_size = min(
                len(column), preprocessing_parameters[INFER_IMAGE_SAMPLE_SIZE])
        else:
            sample_size = 1  # Take first image

        failed_entries = []
        for image_entry in column.head(sample_size):
            if isinstance(image_entry, str):
                # Tries to read image as PNG or numpy file from the path.
                image = read_image_from_path(image_entry)
            else:
                image = image_entry

            if isinstance(image, torch.Tensor):
                sample.append(image)
            elif isinstance(image, np.ndarray):
                sample.append(torch.from_numpy(image).permute(2, 0, 1))
            else:
                failed_entries.append(image_entry)
        if len(sample) == 0:
            failed_entries_repr = "\n\t- ".join(failed_entries)
            raise ValueError(
                f"Images dimensions cannot be inferred. Failed to read {sample_size} images as samples:\n\t- "
                f"{failed_entries_repr}.")

        should_resize = False
        if explicit_height_width:
            should_resize = True
            try:
                height = int(preprocessing_parameters[HEIGHT])
                width = int(preprocessing_parameters[WIDTH])
            except ValueError as e:
                raise ValueError("Image height and width must be set and have "
                                 "positive integer values: " + str(e))
            if height <= 0 or width <= 0:
                raise ValueError(
                    "Image height and width must be positive integers")
        else:
            # User hasn't specified height and width.
            # Default to inferring from sample or first image.
            if preprocessing_parameters[INFER_IMAGE_DIMENSIONS]:
                should_resize = True
                height, width = ImageFeatureMixin._infer_image_size(
                    sample,
                    max_height=preprocessing_parameters[
                        INFER_IMAGE_MAX_HEIGHT],
                    max_width=preprocessing_parameters[INFER_IMAGE_MAX_WIDTH],
                )
            else:
                raise ValueError(
                    "Explicit image width/height are not set, infer_image_dimensions is false, "
                    "and first image cannot be read, so image dimensions are unknown"
                )

        if explicit_num_channels:
            # User specified num_channels in the model/feature config
            user_specified_num_channels = True
            num_channels = preprocessing_parameters[NUM_CHANNELS]
        else:
            user_specified_num_channels = False
            if preprocessing_parameters[INFER_IMAGE_DIMENSIONS]:
                user_specified_num_channels = True
                num_channels = ImageFeatureMixin._infer_number_of_channels(
                    sample)
            elif len(sample) > 0:
                num_channels = num_channels_in_image(sample[0])
            else:
                raise ValueError(
                    "Explicit image num channels is not set, infer_image_dimensions is false, "
                    "and first image cannot be read, so image num channels is unknown"
                )

        assert isinstance(
            num_channels,
            int), ValueError("Number of image channels needs to be an integer")
        return (should_resize, width, height, num_channels,
                user_specified_num_channels)
Exemple #12
0
    def _read_image_if_bytes_obj_and_resize(
        img_entry: Union[bytes, torch.Tensor, np.ndarray],
        img_width: int,
        img_height: int,
        should_resize: bool,
        num_channels: int,
        resize_method: str,
        user_specified_num_channels: bool,
    ) -> Optional[np.ndarray]:
        """
        :param img_entry Union[bytes, torch.Tensor, np.ndarray]: if str file path to the
            image else torch.Tensor of the image itself
        :param img_width: expected width of the image
        :param img_height: expected height of the image
        :param should_resize: Should the image be resized?
        :param resize_method: type of resizing method
        :param num_channels: expected number of channels in the first image
        :param user_specified_num_channels: did the user specify num channels?
        :return: image object as a numpy array

        Helper method to read and resize an image according to model definition.
        If the user doesn't specify a number of channels, we use the first image
        in the dataset as the source of truth. If any image in the dataset
        doesn't have the same number of channels as the first image,
        raise an exception.

        If the user specifies a number of channels, we try to convert all the
        images to the specifications by dropping channels/padding 0 channels
        """

        if isinstance(img_entry, bytes):
            img = read_image_from_bytes_obj(img_entry, num_channels)
        elif isinstance(img_entry, np.ndarray):
            img = torch.from_numpy(img_entry).permute(2, 0, 1)
        else:
            img = img_entry

        if not isinstance(img, torch.Tensor):
            warnings.warn(f"Image with value {img} cannot be read")
            return None

        img_num_channels = num_channels_in_image(img)
        # Convert to grayscale if needed.
        if num_channels == 1 and img_num_channels != 1:
            img = grayscale(img)
            img_num_channels = 1

        if should_resize:
            img = resize_image(img, (img_height, img_width), resize_method)

        if user_specified_num_channels:
            # Number of channels is specified by the user
            # img_padded = np.zeros((img_height, img_width, num_channels),
            #                       dtype=np.uint8)
            # min_num_channels = min(num_channels, img_num_channels)
            # img_padded[:, :, :min_num_channels] = img[:, :, :min_num_channels]
            # img = img_padded
            if num_channels > img_num_channels:
                extra_channels = num_channels - img_num_channels
                img = torch.nn.functional.pad(img,
                                              [0, 0, 0, 0, 0, extra_channels])

            if img_num_channels != num_channels:
                logging.warning(
                    "Image has {} channels, where as {} "
                    "channels are expected. Dropping/adding channels "
                    "with 0s as appropriate".format(img_num_channels,
                                                    num_channels))
        else:
            # If the image isn't like the first image, raise exception
            if img_num_channels != num_channels:
                raise ValueError(
                    "Image has {} channels, unlike the first image, which "
                    "has {} channels. Make sure all the images have the same "
                    "number of channels or use the num_channels property in "
                    "image preprocessing".format(img_num_channels,
                                                 num_channels))

        if img.shape[1] != img_height or img.shape[2] != img_width:
            raise ValueError(
                "Images are not of the same size. "
                "Expected size is {}, "
                "current image size is {}."
                "Images are expected to be all of the same size "
                "or explicit image width and height are expected "
                "to be provided. "
                "Additional information: "
                "https://ludwig-ai.github.io/ludwig-docs/latest/configuration/features/image_features"
                "#image-features-preprocessing".format(
                    [img_height, img_width, num_channels], img.shape))

        return img.numpy()