Beispiel #1
0
def image_grid(images=None,
               data=None,
               sort_by=None,
               image_dir='',
               image_size=None,
               padding=None,
               n_jobs=1,
               **kwargs):
    """Create a grid of images.

    Parameters
    ----------
    images : str or array-like of shape [n_samples, width, height, channels], optional
        Image array or name of the variable containing the image file
        paths within `data`.

    data : pandas.DataFrame, optional
        Tidy ("long-form") dataframe where each column is a variable
        and each row is an observation. If `images` is a variable name,
        then it should be contained in `data`.

    sort_by : str or array-like of shape [n_samples,], optional
        Data or name of the variable to sort images by.

    image_dir : str, optional
        The location of the image files on disk. Images will
        be loaded from files matching the pattern
        'image_dir + os.path.sep + image_path'.

    image_size : int, optional
        The size of each image displayed in the scatter plot. Images
        will be sampled to `image_size` if the size of the images
        do not match `image_size`.

    padding : int, optional
        The padding between images in the grid.

    n_jobs : int, optional
        The number of parallel workers to use for loading
        the image files when reading from disk. The default
        uses a single core.

    kwargs : key, value pairings
        Additional keyword arguments are passed to the function used to draw
        the plot on the Axes.

    Returns
    -------
    ax : matplotlib Axes
        Returns the Axes object with the plot for further tweaking.

    See Also
    --------
    scatter_grid : Combines an image grid with a :func:`scatter_plot`.

    Examples
    --------

    Create a image grid.

    .. plot:: ../examples/image_grid.py

    Create a image grid with custom ordering on MNIST.

    .. plot:: ../examples/image_grid_mnist.py
    """
    images = data_utils.get_images(data,
                                   images,
                                   as_image=True,
                                   image_size=image_size,
                                   n_jobs=n_jobs)

    if sort_by is not None:
        if sort_by in features.HSVFeatures.all_features():
            hsv = features.extract_hsv_stats(images, n_jobs=n_jobs)
            sort_by_values = hsv[:,
                                 features.HSVFeatures.feature_index(sort_by)]
            sorted_indices = np.argsort(sort_by_values)
            images = [images[i] for i in sorted_indices]
        else:
            sort_by = data_utils.get_variable(data, sort_by)
            images = [images[i] for i in np.argsort(sort_by)]

    grid = images_to_grid(images, padding=padding)

    return plots.pillow_to_matplotlib(grid, **kwargs)
Beispiel #2
0
def image_barplot(y,
                  images=None,
                  data=None,
                  sort_by=None,
                  bar_height=50,
                  image_dir='',
                  image_size=(40, 40),
                  n_jobs=1,
                  **kwargs):
    """Create a barplot where the bars are created from images in the dataset.

    This visualization is useful for analyzing the relationship between images
    and categorical variables.

    Parameters
    ----------
    y : str or array-like
        Data or the name of a variables in `data`. This variable is assumed
        to be categoricel, e.g. discrete, and is used to split the bars
        on the vertical axis.

    images : str or array-like of shape [n_samples, width, height, channels], optional
        Image array or name of the variable containing the image file
        paths within `data`.

    data : pandas.DataFrame, optional
        Tidy ("long-form") dataframe where each column is a variable
        and each row is an observation. If `images` is a variable name,
        then it should be contained in `data`.

    sort_by : str or array-like of shape [n_samples,], optional
        Data or name of the variable to sort images by in the horizontal
        direction.

    bar_height : int
        The number of images placed in a single horizontal bar before
        creating a new bar.

    image_dir : str, optional
        The location of the image files on disk. Images will
        be loaded from files matching the pattern
        'image_dir + os.path.sep + image_path'.

    image_size : int, optional
        The size of each image displayed in the scatter plot. Images
        will be sampled to `image_size` if the size of the images
        do not match `image_size`.

    n_jobs : int, optional
        The number of parallel workers to use for loading
        the image files when reading from disk. The default
        uses a single core.

    kwargs : key, value pairings
        Additional keyword arguments are passed to the function used to draw
        the plot on the Axes.

    Returns
    -------
    ax : matplotlib Axes
        Returns the Axes object with the plot for further tweaking.


    Examples
    --------
    Create a image barplot.

    .. plot:: ../examples/image_barplot.py
    """
    y = data_utils.get_variable(data, y)
    images = data_utils.get_images(data,
                                   images,
                                   as_image=False,
                                   image_size=image_size,
                                   n_jobs=n_jobs)

    if sort_by is not None:
        if sort_by in features.HSVFeatures.all_features():
            hsv = features.extract_hsv_stats(images, n_jobs=n_jobs)
            sort_by_values = hsv[:,
                                 features.HSVFeatures.feature_index(sort_by)]
            images = images[np.argsort(sort_by_values)]
        else:
            sort_by = data_utils.get_variable(data, sort_by)
            images = images[np.argsort(sort_by)]

    return images_to_barplot(images, y, bar_height=bar_height, **kwargs)
Beispiel #3
0
def scatter_plot(x,
                 y,
                 images=None,
                 data=None,
                 hue=None,
                 image_dir='',
                 image_size=None,
                 threshold=None,
                 alpha=0.9,
                 color=None,
                 n_jobs=1,
                 **kwargs):
    """Create an image scatter plot based on columns `x` vs. `y`.

    Parameters
    ----------
    x, y : str or array-like
        Data or names of variables in `data`. These variables are
        used for the x and y axes respectively.

    images : str or array-like, optional
        Image arrays or names of the column pointing to the
        image paths within `data`.

    data : pandas.DataFrame, optional
        Tidy ("long-form") dataframe where each column is a variable
        and each row is an observation. If `images` is a variable name,
        then it should be contained in `data`.

    image_dir : str, optional
        The location of the image files on disk. Images will
        be loaded from files matching the pattern
        'image_dir + os.path.sep + image_path'.

    image_size : int, optional
        The size of each image displayed in the scatter plot. Images
        will be sampled to `image_size` if the size of the images
        do not match `image_size`.

    threshold : float, optional
        In order to avoid clutter only one point in a ball of
        radius `threshold` is displayed. Note that features
        are re-scaled to lie on the unit square [0, 1] x [0, 1].
        The default of None means all points are displayed.

    alpha : float, optional
        Alpha level used when displaying images.

    n_jobs : int
        The number of parallel jobs used to load the
        images from disk.

    Examples
    --------

    Create a scatter plot with hue labels.

    .. plot:: ../examples/scatter_plot.py
    """
    # get co-variates
    x = data_utils.get_variable(data, x)
    y = data_utils.get_variable(data, y)

    # load images
    images = data_utils.get_images(data,
                                   images,
                                   image_dir=image_dir,
                                   as_image=False,
                                   image_size=image_size,
                                   n_jobs=n_jobs)

    # TODO (seaborn is only required for a color palette. Remove this)
    if hue is not None:
        hue = data_utils.get_variable(data, hue)
        values, value_map = np.unique(hue, return_inverse=True)
        palette = sns.husl_palette(len(values))
        images = [
            features.color_image(img, hue=palette[val])
            for img, val in zip(images, value_map)
        ]
    elif color is not None:
        images = [features.color_image(img, hue=color) for img in images]

    return images_to_scatter(images,
                             x,
                             y,
                             threshold=threshold,
                             alpha=alpha,
                             **kwargs)
Beispiel #4
0
def image_histogram(x,
                    images=None,
                    data=None,
                    n_bins=None,
                    sort_by=features.HSVFeatures.SATURATION,
                    image_dir='',
                    image_size=None,
                    n_jobs=1,
                    **kwargs):
    """Create an univariate image histogram binned by the `x`
    variable.

    Parameters
    ----------
    x : str or array-like of shape [n_samples,]
        Data or names of variables in `data`.

    images : str or array-like of shape [n_samples, width, height, channels], optional
        Image array or name of the variable containing the image file
        paths within `data`.

    data : pandas.DataFrame, optional
        Tidy ("long-form") dataframe where each column is a variable
        and each row is an observation. If `images`, `x`, or `sort_by`
        is a variable name, then it should be contained in `data`.

    n_bins : int or None
        Specification of the number of bins. If None, then the
        Freedman-Diaconis estimator is used to determine the number of bins.

    sort_by : str, HSVFeatures enum or array-like of shape [n_samples,], optional
        Data or name of the variable to sort images by on the y-axis.

    image_dir : str (default='')
        The location of the image files on disk.

    image_size : int
        The size of each image in the scatter plot.

    n_jobs : int (default=1)
        The number of parallel workers to use for loading
        the image files.

    Returns
    -------
    ax : matplotlib Axes
        Returns the Axes object with the plot for further tweaking.


    Examples
    --------

    Create an image histogram.

    .. plot:: ../examples/image_histogram.py
    """
    images = data_utils.get_images(
        data,
        images,
        image_dir=image_dir,
        image_size=image_size,
        index=None,  #x.index,
        as_image=False,
        n_jobs=n_jobs)

    x = data_utils.get_variable(data, x)

    if sort_by is not None:
        if sort_by in features.HSVFeatures.all_features():
            hsv = features.extract_hsv_stats(images, n_jobs=n_jobs)
            sort_by = hsv[:, features.HSVFeatures.feature_index(sort_by)]
        else:
            sort_by = data_utils.get_variable(data, sort_by)

    #histo = images_to_histogram(images, x, n_bins=n_bins, sort_by=sort_by)
    #return plots.pillow_to_matplotlib(histo, **kwargs)

    return histogram_matplotlib(images,
                                x,
                                n_bins=n_bins,
                                sort_by=sort_by,
                                **kwargs)
Beispiel #5
0
def scatter_grid(x, y,
                  images=None,
                  data=None,
                  hue=None,
                  image_dir='',
                  image_size=None,
                  padding=None,
                  n_jobs=1,
                  **kwargs):
    """Draw a plot ordering images in a regularly spaced 2-d grid
    based on their distance in the x-y plane. The distance between
    points is assumed to be euclidean.

    Parameters
    ----------
    x, y : str or array-like
        Data or names of variables in `data`.
        These variables correspond to the x-y coordinates
        in the euclidean space.

    images : str or array-like
        Image arrays or names of the column pointing to the
        image paths within `data`.

    data : pd.DataFrame
        Pandas dataframe holding the dataset.

    hue : str or array-like
        Data or the name of the variable to use to color
        the individual images on the grid.

    image_dir : str (default='')
        The location of the image files on disk.

    image_size : int
        The size of each image in the scatter plot.

    padding : int, optional
        The padding between images in the grid.

    n_jobs : int (default=1)
        The number of parallel workers to use for loading
        the image files.

    Returns
    -------
    A properly shaped NxWx3 image with any necessary padding.

    Examples
    --------

    Create a grid plot with hue labels.

    .. plot:: ../examples/scatter_grid.py
    """
    x_var = data_utils.get_variable(data, x)
    y_var = data_utils.get_variable(data, y)

    # TODO (seaborn is only required for a color palette. Remove this)
    if hue is not None:
        images = data_utils.get_images(
            data, images,
            image_dir=image_dir,
            as_image=False,
            image_size=image_size,
            n_jobs=n_jobs)

        hue = data_utils.get_variable(data, hue)
        values, value_map = np.unique(hue, return_inverse=True)
        palette = sns.husl_palette(len(values))
        images = [features.color_image(img, hue=palette[val]) for
                  img, val in zip(images, value_map)]
        images = [image_io.to_pillow_image(img) for img in images]
    else:
        # load images
        images = data_utils.get_images(
            data, images,
            image_dir=image_dir,
            as_image=True,
            image_size=image_size,
            n_jobs=n_jobs)

    return images_to_scatter_grid(images, x_var, y_var, padding=padding, **kwargs)