def test_multiple_context(): assert contexts.get_image_dir() == '' assert contexts.get_image_col() == None with ivs.plotting_context(image_col='test_col', image_dir='test_dir'): assert contexts.get_image_dir() == 'test_dir' assert contexts.get_image_col() == 'test_col' assert contexts.get_image_dir() == '' assert contexts.get_image_col() == None
def test_image_dir(): assert contexts.get_image_dir() == '' assert contexts.get_image_col() == None with ivs.plotting_context(image_dir='test'): assert contexts.get_image_dir() == 'test' assert contexts.get_image_col() == None assert contexts.get_image_dir() == '' assert contexts.get_image_col() == None
def directory_to_dataframe(image_dir='', features=None, n_jobs=-1): """Create a pandas.DataFrame containing the path to all images in a directory. The pandas.DataFrame has a single column `image_path`, which contains the paths to the various images. The paths are relative to the given `image_dir`. For example, a directory named `image_dir` with two images `image1.jpg` and `image2.jpg` would result in a single column dataframe. The column is named 'image_path' and it contains the paths to the two images. Parameters ---------- image_dir : str The directory to search of images and place their paths in a dataframe. features : list or None A list of features to include in the dataframe. The default (None) includes no additional features. n_jobs : int The number of parallel jobs used to load the images from disk. Returns ------- pandas.DataFrame A dataframe containing the image paths. """ if not image_dir: image_dir = contexts.get_image_dir() image_files = list( itertools.chain.from_iterable( [image_glob(image_dir, ext) for ext in image_extensions])) image_files = [f.split(image_dir + os.path.sep)[1] for f in image_files] data = pd.DataFrame({'image_path': image_files}) if features: if set(features) & set(feature_lib.HSVFeatures.all_features()): images = load_images(data['image_path'], image_dir=image_dir, as_image=True, n_jobs=n_jobs) hsv = feature_lib.extract_hsv_stats(images, n_jobs=n_jobs) for feature in features: feature_idx = feature_lib.HSVFeatures.feature_index(feature) data[feature] = hsv[:, feature_idx] else: raise ValueError('Unknown features.') return data
def get_images(data, images, image_dir='', image_size=None, as_image=False, index=None, n_jobs=1): """Helper function to load images from disk or properly format an already existing image array. Parameters ---------- data : pandas.DataFrame Tidy ("long-form") dataframe where each column is a variable and each row is an observation. images : str or array-like of shape [n_samples, width, height, channels], optional Image array or name of the variable containing the image file paths within `data`. image_dir : str, optional The location of the image files on disk. Images will be loaded from files matching the pattern 'image_dir + os.path.sep + image_path'. image_size : int, optional The size of each image displayed in the scatter plot. Images will be sampled to `image_size` if the size of the images do not match `image_size`. as_image : bool If True, the returned images are converted to PIL.Image objects. Returns ------- images : array-like Either a list of PIL.Images or a np.array of shape [n_samples, width, height, channels]. """ if isinstance(images, np.ndarray): if as_image: return [image_io.to_pillow_image(img, image_size=image_size) for img in images] elif image_size: return np.asarray([scipy.misc.imresize(img, image_size, interp='lanczos') for img in images]) return images if not image_dir: image_dir = contexts.get_image_dir() if not images: images = contexts.get_image_data() if images is None: images = data[contexts.get_image_col()] if index is not None: images = images.iloc[index] images = image_io.load_images( images, image_dir=image_dir, as_image=as_image, image_size=image_size, n_jobs=n_jobs) return images