def image_grid(images=None, data=None, sort_by=None, image_dir='', image_size=None, padding=None, n_jobs=1, **kwargs): """Create a grid of images. Parameters ---------- images : str or array-like of shape [n_samples, width, height, channels], optional Image array or name of the variable containing the image file paths within `data`. data : pandas.DataFrame, optional Tidy ("long-form") dataframe where each column is a variable and each row is an observation. If `images` is a variable name, then it should be contained in `data`. sort_by : str or array-like of shape [n_samples,], optional Data or name of the variable to sort images by. image_dir : str, optional The location of the image files on disk. Images will be loaded from files matching the pattern 'image_dir + os.path.sep + image_path'. image_size : int, optional The size of each image displayed in the scatter plot. Images will be sampled to `image_size` if the size of the images do not match `image_size`. padding : int, optional The padding between images in the grid. n_jobs : int, optional The number of parallel workers to use for loading the image files when reading from disk. The default uses a single core. kwargs : key, value pairings Additional keyword arguments are passed to the function used to draw the plot on the Axes. Returns ------- ax : matplotlib Axes Returns the Axes object with the plot for further tweaking. See Also -------- scatter_grid : Combines an image grid with a :func:`scatter_plot`. Examples -------- Create a image grid. .. plot:: ../examples/image_grid.py Create a image grid with custom ordering on MNIST. .. plot:: ../examples/image_grid_mnist.py """ images = data_utils.get_images(data, images, as_image=True, image_size=image_size, n_jobs=n_jobs) if sort_by is not None: if sort_by in features.HSVFeatures.all_features(): hsv = features.extract_hsv_stats(images, n_jobs=n_jobs) sort_by_values = hsv[:, features.HSVFeatures.feature_index(sort_by)] sorted_indices = np.argsort(sort_by_values) images = [images[i] for i in sorted_indices] else: sort_by = data_utils.get_variable(data, sort_by) images = [images[i] for i in np.argsort(sort_by)] grid = images_to_grid(images, padding=padding) return plots.pillow_to_matplotlib(grid, **kwargs)
def image_barplot(y, images=None, data=None, sort_by=None, bar_height=50, image_dir='', image_size=(40, 40), n_jobs=1, **kwargs): """Create a barplot where the bars are created from images in the dataset. This visualization is useful for analyzing the relationship between images and categorical variables. Parameters ---------- y : str or array-like Data or the name of a variables in `data`. This variable is assumed to be categoricel, e.g. discrete, and is used to split the bars on the vertical axis. images : str or array-like of shape [n_samples, width, height, channels], optional Image array or name of the variable containing the image file paths within `data`. data : pandas.DataFrame, optional Tidy ("long-form") dataframe where each column is a variable and each row is an observation. If `images` is a variable name, then it should be contained in `data`. sort_by : str or array-like of shape [n_samples,], optional Data or name of the variable to sort images by in the horizontal direction. bar_height : int The number of images placed in a single horizontal bar before creating a new bar. image_dir : str, optional The location of the image files on disk. Images will be loaded from files matching the pattern 'image_dir + os.path.sep + image_path'. image_size : int, optional The size of each image displayed in the scatter plot. Images will be sampled to `image_size` if the size of the images do not match `image_size`. n_jobs : int, optional The number of parallel workers to use for loading the image files when reading from disk. The default uses a single core. kwargs : key, value pairings Additional keyword arguments are passed to the function used to draw the plot on the Axes. Returns ------- ax : matplotlib Axes Returns the Axes object with the plot for further tweaking. Examples -------- Create a image barplot. .. plot:: ../examples/image_barplot.py """ y = data_utils.get_variable(data, y) images = data_utils.get_images(data, images, as_image=False, image_size=image_size, n_jobs=n_jobs) if sort_by is not None: if sort_by in features.HSVFeatures.all_features(): hsv = features.extract_hsv_stats(images, n_jobs=n_jobs) sort_by_values = hsv[:, features.HSVFeatures.feature_index(sort_by)] images = images[np.argsort(sort_by_values)] else: sort_by = data_utils.get_variable(data, sort_by) images = images[np.argsort(sort_by)] return images_to_barplot(images, y, bar_height=bar_height, **kwargs)
def scatter_plot(x, y, images=None, data=None, hue=None, image_dir='', image_size=None, threshold=None, alpha=0.9, color=None, n_jobs=1, **kwargs): """Create an image scatter plot based on columns `x` vs. `y`. Parameters ---------- x, y : str or array-like Data or names of variables in `data`. These variables are used for the x and y axes respectively. images : str or array-like, optional Image arrays or names of the column pointing to the image paths within `data`. data : pandas.DataFrame, optional Tidy ("long-form") dataframe where each column is a variable and each row is an observation. If `images` is a variable name, then it should be contained in `data`. image_dir : str, optional The location of the image files on disk. Images will be loaded from files matching the pattern 'image_dir + os.path.sep + image_path'. image_size : int, optional The size of each image displayed in the scatter plot. Images will be sampled to `image_size` if the size of the images do not match `image_size`. threshold : float, optional In order to avoid clutter only one point in a ball of radius `threshold` is displayed. Note that features are re-scaled to lie on the unit square [0, 1] x [0, 1]. The default of None means all points are displayed. alpha : float, optional Alpha level used when displaying images. n_jobs : int The number of parallel jobs used to load the images from disk. Examples -------- Create a scatter plot with hue labels. .. plot:: ../examples/scatter_plot.py """ # get co-variates x = data_utils.get_variable(data, x) y = data_utils.get_variable(data, y) # load images images = data_utils.get_images(data, images, image_dir=image_dir, as_image=False, image_size=image_size, n_jobs=n_jobs) # TODO (seaborn is only required for a color palette. Remove this) if hue is not None: hue = data_utils.get_variable(data, hue) values, value_map = np.unique(hue, return_inverse=True) palette = sns.husl_palette(len(values)) images = [ features.color_image(img, hue=palette[val]) for img, val in zip(images, value_map) ] elif color is not None: images = [features.color_image(img, hue=color) for img in images] return images_to_scatter(images, x, y, threshold=threshold, alpha=alpha, **kwargs)
def image_histogram(x, images=None, data=None, n_bins=None, sort_by=features.HSVFeatures.SATURATION, image_dir='', image_size=None, n_jobs=1, **kwargs): """Create an univariate image histogram binned by the `x` variable. Parameters ---------- x : str or array-like of shape [n_samples,] Data or names of variables in `data`. images : str or array-like of shape [n_samples, width, height, channels], optional Image array or name of the variable containing the image file paths within `data`. data : pandas.DataFrame, optional Tidy ("long-form") dataframe where each column is a variable and each row is an observation. If `images`, `x`, or `sort_by` is a variable name, then it should be contained in `data`. n_bins : int or None Specification of the number of bins. If None, then the Freedman-Diaconis estimator is used to determine the number of bins. sort_by : str, HSVFeatures enum or array-like of shape [n_samples,], optional Data or name of the variable to sort images by on the y-axis. image_dir : str (default='') The location of the image files on disk. image_size : int The size of each image in the scatter plot. n_jobs : int (default=1) The number of parallel workers to use for loading the image files. Returns ------- ax : matplotlib Axes Returns the Axes object with the plot for further tweaking. Examples -------- Create an image histogram. .. plot:: ../examples/image_histogram.py """ images = data_utils.get_images( data, images, image_dir=image_dir, image_size=image_size, index=None, #x.index, as_image=False, n_jobs=n_jobs) x = data_utils.get_variable(data, x) if sort_by is not None: if sort_by in features.HSVFeatures.all_features(): hsv = features.extract_hsv_stats(images, n_jobs=n_jobs) sort_by = hsv[:, features.HSVFeatures.feature_index(sort_by)] else: sort_by = data_utils.get_variable(data, sort_by) #histo = images_to_histogram(images, x, n_bins=n_bins, sort_by=sort_by) #return plots.pillow_to_matplotlib(histo, **kwargs) return histogram_matplotlib(images, x, n_bins=n_bins, sort_by=sort_by, **kwargs)
def scatter_grid(x, y, images=None, data=None, hue=None, image_dir='', image_size=None, padding=None, n_jobs=1, **kwargs): """Draw a plot ordering images in a regularly spaced 2-d grid based on their distance in the x-y plane. The distance between points is assumed to be euclidean. Parameters ---------- x, y : str or array-like Data or names of variables in `data`. These variables correspond to the x-y coordinates in the euclidean space. images : str or array-like Image arrays or names of the column pointing to the image paths within `data`. data : pd.DataFrame Pandas dataframe holding the dataset. hue : str or array-like Data or the name of the variable to use to color the individual images on the grid. image_dir : str (default='') The location of the image files on disk. image_size : int The size of each image in the scatter plot. padding : int, optional The padding between images in the grid. n_jobs : int (default=1) The number of parallel workers to use for loading the image files. Returns ------- A properly shaped NxWx3 image with any necessary padding. Examples -------- Create a grid plot with hue labels. .. plot:: ../examples/scatter_grid.py """ x_var = data_utils.get_variable(data, x) y_var = data_utils.get_variable(data, y) # TODO (seaborn is only required for a color palette. Remove this) if hue is not None: images = data_utils.get_images( data, images, image_dir=image_dir, as_image=False, image_size=image_size, n_jobs=n_jobs) hue = data_utils.get_variable(data, hue) values, value_map = np.unique(hue, return_inverse=True) palette = sns.husl_palette(len(values)) images = [features.color_image(img, hue=palette[val]) for img, val in zip(images, value_map)] images = [image_io.to_pillow_image(img) for img in images] else: # load images images = data_utils.get_images( data, images, image_dir=image_dir, as_image=True, image_size=image_size, n_jobs=n_jobs) return images_to_scatter_grid(images, x_var, y_var, padding=padding, **kwargs)