Ejemplo n.º 1
0
def test_show_remote_ref():
    from IPython.display import Image as IPyImage

    uri = "https://octodex.github.com/images/original.png"
    img = Image(uri)
    # TODO check the actual content
    assert img._repr_html_() == img.display()._repr_html_()
    assert img.display()._repr_html_() == IPyImage(uri)._repr_html_()
Ejemplo n.º 2
0
def test_embeded_image_from_bytesio():
    data = np.random.random((100, 100))
    rescaled = (255.0 / data.max() * (data - data.min())).astype(np.uint8)
    im = PILImage.fromarray(rescaled)
    buf = BytesIO()
    im.save(buf, format="PNG")
    buf.seek(0)
    image = Image(buf)
    assert np.array_equal(image.to_numpy(), rescaled)
Ejemplo n.º 3
0
def test_image_use_https_uri():
    img = Image(WIKIPEDIA)

    fobj = BytesIO(
        requests.get(WIKIPEDIA, headers={
            "User-Agent": "curl/7.72.0"
        }).content)
    pic = PIL.Image.open(fobj)
    assert np.array_equal(img.to_numpy(), np.array(pic))
Ejemplo n.º 4
0
def test_crop_in_batch():
    uri = "http://farm2.staticflickr.com/1129/4726871278_4dd241a03a_z.jpg"
    img = Image(uri)
    data = img.to_numpy()
    patches = img.crop(
        [Box2d(10, 10, 30, 30),
         Box2d(15, 15, 35, 35),
         Box2d(20, 20, 40, 40)])
    assert len(patches) == 3
    assert np.array_equal(patches[0].to_numpy(), data[10:30, 10:30, :])
    assert np.array_equal(patches[1].to_numpy(), data[15:35, 15:35, :])
    assert np.array_equal(patches[2].to_numpy(), data[20:40, 20:40, :])
Ejemplo n.º 5
0
def test_show_embedded_jpeg(tmp_path):
    data = np.random.random((100, 100))
    rescaled = (255.0 / data.max() * (data - data.min())).astype(np.uint8)
    im = PILImage.fromarray(rescaled)
    uri = tmp_path / "test.jpg"
    im.save(uri)
    result = Image(uri)._repr_jpeg_()
    with open(uri, "rb") as fh:
        expected = b2a_base64(fh.read()).decode("ascii")
        assert result == expected

        fh.seek(0)
        embedded_image = Image(fh)
        assert result == embedded_image._repr_jpeg_()
Ejemplo n.º 6
0
def numpy_to_image(array: ndarray, uri: str) -> Image:
    """Convert a numpy array to image, and upload to external storage.

    Parameters
    ----------
    array : :py:class:`numpy.ndarray`
        Image data.
    uri : str
        The base directory to copy the image to.

    Return
    ------
    Image
        Return a new image pointed to the new URI.

    Example
    -------

    >>> spark.createDataFrame(..).registerTempTable("df")
    >>>
    >>> spark.sql(\"\"\"SELECT numpy_to_image(
    ...        resize(grayscale(image)),
    ...        lit('s3://asset')
    ...    ) AS new_image FROM df\"\"\")

    See Also
    --------
    :py:meth:`rikai.types.vision.Image.from_array`
    """
    return Image.from_array(array, uri)
Ejemplo n.º 7
0
def video_to_images(
    video: Union[VideoStream, YouTubeVideo],
    output_uri: str,
    segment: Segment = Segment(0, -1),
    sample_rate: int = 1,
    max_samples: int = 15000,
    quality: str = "worst",
) -> list:
    """Extract video frames into a list of images.

    Parameters
    ----------
    video : Video
        An video object, either YouTubeVideo or VideoStream.
    output_uri: str
        Frames will be written as <output_uri>/<fno>.jpg
    segment: Segment, default Segment(0, -1)
        A Segment object, localizing video in time to (start_fno, end_fno)
    sample_rate : int, default 1
        Keep 1 out of every sample_rate frames.
    max_samples : int, default 15000
        Return at most this many frames (-1 means no max)
    quality: str, default 'worst'
        Either 'worst' (lowest bitrate) or 'best' (highest bitrate)
        See: https://pythonhosted.org/Pafy/index.html#Pafy.Pafy.getbest

    Return
    ------
    List
        Return a list of images from video indexed by frame number.
    """
    assert isinstance(
        video, (YouTubeVideo,
                VideoStream)), "Input type must be YouTubeVideo or VideoStream"
    assert isinstance(segment, Segment), "Second input type must be Segment"

    start_frame = segment.start_fno
    if segment.end_fno > 0:
        max_samples = min((segment.end_fno - start_frame), max_samples)

    if isinstance(video, YouTubeVideo):
        video_iterator = SingleFrameSampler(
            video.get_stream(quality=quality),
            sample_rate,
            start_frame,
            max_samples,
        )
    else:
        video_iterator = SingleFrameSampler(video, sample_rate, start_frame,
                                            max_samples)

    return [
        Image.from_array(
            img,
            os.path.join(output_uri, "{}.jpg".format(
                (start_frame + idx) * sample_rate)),
        ) for idx, img in enumerate(video_iterator)
    ]
Ejemplo n.º 8
0
def video_to_images(
    video,
    segment: Segment = Segment(0, -1),
    sample_rate: int = 1,
    max_samples: int = 15000,
    quality: str = "worst",
) -> list:
    """Extract video frames into a list of images.

    Parameters
    ----------
    video : Video
        An video object, either YouTubeVideo or VideoStream.
    segment: Segment
        A Segment object, localizing video in time to (start_fno, end_fno)
    sample_rate : Int
        The sampling rate in number of frames
    max_samples : Int
        Yield at most this many frames (-1 means no max)
    quality: str, default 'worst'
                Either 'worst' (lowest bitrate) or 'best' (highest bitrate)
                See: https://pythonhosted.org/Pafy/index.html#Pafy.Pafy.getbest

    Return
    ------
    List
        Return a list of images from video indexed by frame number.
    """
    assert isinstance(
        video, (YouTubeVideo,
                VideoStream)), "Input type must be YouTubeVideo or VideoStream"
    assert isinstance(segment, Segment), "Second input type must be Segment"

    base_path = video.uri

    start_frame = segment.start_fno
    if segment.end_fno > 0:
        max_samples = min((segment.end_fno - start_frame), max_samples)

    if isinstance(video, YouTubeVideo):
        base_path = video.vid
        video_iterator = SingleFrameSampler(
            video.get_stream(quality=quality),
            sample_rate,
            start_frame,
            max_samples,
        )
    else:
        video_iterator = SingleFrameSampler(video, sample_rate, start_frame,
                                            max_samples)

    return [
        Image.from_array(
            img,
            "{}_{}.jpg".format(base_path, (start_frame + idx) * sample_rate),
        ) for idx, img in enumerate(video_iterator)
    ]
Ejemplo n.º 9
0
def spectrogram_image(
    video: Union[VideoStream, YouTubeVideo],
    output_uri: str,
    segment: Segment = Segment(0, -1),
    size: int = 224,
    max_samples: int = 15000,
) -> Image:
    """Applies ffmpeg filter to generate spectrogram image.

    Parameters
    ----------
    video : VideoStream or YouTubeVideo
        A video object whose audio track will be converted to spectrogram
    output_uri: str
        The uri to which the spectrogram image will be written to
    segment: Segment
            A Segment object, localizing video in time to (start_fno, end_fno)
    max_samples : Int
            Yield at most this many frames (-1 means no max)
    size : Int
        Sets resolution of frequency, time spectrogram image.

    Return
    ------
    Image
        Return an Image of the audio spectrogram.
    """
    try:
        import ffmpeg
    except ImportError:
        raise ValueError("Couldn't import ffmpeg. Please make sure to "
                         "`pip install ffmpeg-python` explicitly or install "
                         "the correct extras like `pip install rikai[all]`")
    assert isinstance(
        video, (YouTubeVideo,
                VideoStream)), "Input type must be YouTubeVideo or VideoStream"
    assert isinstance(segment, Segment), "Second input type must be Segment"

    start_frame = segment.start_fno
    if segment.end_fno > 0:
        max_samples = min((segment.end_fno - start_frame), max_samples)
    video_uri = (video.get_stream().uri
                 if isinstance(video, YouTubeVideo) else video.uri)
    output, _ = (ffmpeg.input(video_uri).filter("showspectrumpic",
                                                "{}x{}".format(size, size),
                                                legend=0).output(
                                                    "pipe:",
                                                    format="rawvideo",
                                                    pix_fmt="rgb24",
                                                    start_number=start_frame,
                                                    vframes=max_samples,
                                                ).run(capture_stdout=True))
    return Image.from_array(
        np.frombuffer(output, np.uint8).reshape([size, size, 3]), output_uri)
Ejemplo n.º 10
0
def test_format_kwargs(tmp_path):
    data = np.random.random((100, 100))
    rescaled = (255.0 / data.max() * (data - data.min())).astype(np.uint8)
    result_uri = tmp_path / "result.jpg"
    Image.from_array(rescaled, result_uri, format="jpeg", optimize=True)

    expected_uri = tmp_path / "expected.jpg"
    PILImage.fromarray(rescaled).save(expected_uri,
                                      format="jpeg",
                                      optimize=True)

    assert filecmp.cmp(result_uri, expected_uri)

    result_uri = tmp_path / "result.png"
    Image.from_array(rescaled, result_uri, format="png", compress_level=1)

    expected_uri = tmp_path / "expected.png"
    PILImage.fromarray(rescaled).save(expected_uri,
                                      format="png",
                                      compress_level=1)
    assert filecmp.cmp(result_uri, expected_uri)
Ejemplo n.º 11
0
def spectrogram_image(
    video,
    segment: Segment = Segment(0, -1),
    size: int = 224,
    max_samples: int = 15000,
) -> Image:
    """Applies ffmpeg filter to generate spectrogram image.

    Parameters
    ----------
    video : Video
        A video object, either YouTubeVideo or VideoStream.
    segment: Segment
            A Segment object, localizing video in time to (start_fno, end_fno)
    max_samples : Int
            Yield at most this many frames (-1 means no max)
    size : Int
        Sets resolution of frequency, time spectrogram image.

    Return
    ------
    Image
        Return an Image of the audio spectrogram.
    """
    import ffmpeg

    assert isinstance(
        video, (YouTubeVideo,
                VideoStream)), "Input type must be YouTubeVideo or VideoStream"
    assert isinstance(segment, Segment), "Second input type must be Segment"

    base_path = video.vid if isinstance(video, YouTubeVideo) else video.uri
    start_frame = segment.start_fno
    if segment.end_fno > 0:
        max_samples = min((segment.end_fno - start_frame), max_samples)
    video_uri = (video.get_stream().uri
                 if isinstance(video, YouTubeVideo) else video.uri)
    output, _ = (ffmpeg.input(video_uri).filter("showspectrumpic",
                                                "{}x{}".format(size, size),
                                                legend=0).output(
                                                    "pipe:",
                                                    format="rawvideo",
                                                    pix_fmt="rgb24",
                                                    start_number=start_frame,
                                                    vframes=max_samples,
                                                ).run(capture_stdout=True))
    return Image.from_array(
        np.frombuffer(output, np.uint8).reshape([size, size, 3]),
        "{}_spectrogram.jpg".format(base_path),
    )
Ejemplo n.º 12
0
def image_copy(img: Image, uri: str) -> Image:
    """Copy the image to a new destination, specified by the URI.

    Parameters
    ----------
    img : Image
        An image object
    uri : str
        The base directory to copy the image to.

    Return
    ------
    Image
        Return a new image pointed to the new URI
    """
    logger.info("Copying image src=%s dest=%s", img.uri, uri)
    return Image(_copy(img.uri, uri))
Ejemplo n.º 13
0
def to_image(image_data: Union[bytes, bytearray, str, Path]) -> Image:
    """Build an :py:class:`Image` from
    bytes, file-like object, str, or :py:class:`~pathlib.Path`.

    Parameters
    ----------
    image_data : bytes, bytearray, str, Path
        The resource identifier or bytes of the source image.

    Return
    ------
    img: Image
        An Image from the given embedded data or URI

    Example
    -------

    >>> df = spark.read.format("image").load("<path-to-data>")
    >>>
    >>> df.withColumn("new_image", to_image("image.data"))
    """
    return Image(image_data)
Ejemplo n.º 14
0
def numpy_to_image(array: ndarray,
                   uri: str,
                   format: str = None,
                   **kwargs) -> Image:
    """Convert a numpy array to image, and upload to external storage.

    Parameters
    ----------
    array : :py:class:`numpy.ndarray`
        Image data.
    uri : str
        The base directory to copy the image to.
    format : str, optional
        The image format to save as. See
        `supported formats <https://pillow.readthedocs.io/en/stable/reference/Image.html#PIL.Image.Image.save>`_ for details.
    kwargs : dict, optional
        Optional arguments to pass to `PIL.Image.save <https://pillow.readthedocs.io/en/stable/reference/Image.html#PIL.Image.Image.save>`_.

    Return
    ------
    Image
        Return a new image pointed to the new URI.

    Example
    -------

    >>> spark.createDataFrame(..).registerTempTable("df")
    >>>
    >>> spark.sql(\"\"\"SELECT numpy_to_image(
    ...        resize(grayscale(image)),
    ...        lit('s3://asset')
    ...    ) AS new_image FROM df\"\"\")

    See Also
    --------
    :py:meth:`rikai.types.vision.Image.from_array`
    """  # noqa: E501
    return Image.from_array(array, uri, format=format, **kwargs)
Ejemplo n.º 15
0
    def deserialize(self, datum) -> "Image":
        from rikai.types.vision import Image

        return Image(datum[0] or datum[1])
Ejemplo n.º 16
0
def video_to_images(
    video: Union[VideoStream, YouTubeVideo],
    output_uri: str,
    segment: Segment = Segment(0, -1),
    sample_rate: int = 1,
    max_samples: int = 15000,
    quality: str = "worst",
    image_format: str = "png",
    **image_kwargs,
) -> list:
    """Extract video frames into a list of images.

    Parameters
    ----------
    video : Video
        An video object, either YouTubeVideo or VideoStream.
    output_uri: str
        Frames will be written as <output_uri>/<fno>.<img_format>
    segment: Segment, default Segment(0, -1)
        A Segment object, localizing video in time to (start_fno, end_fno)
    sample_rate : int, default 1
        Keep 1 out of every sample_rate frames.
    max_samples : int, default 15000
        Return at most this many frames (-1 means no max)
    quality: str, default 'worst'
        Either 'worst' (lowest bitrate) or 'best' (highest bitrate)
        See: https://pythonhosted.org/Pafy/index.html#Pafy.Pafy.getbest
    image_format : str, optional
        The image format to save as. See
        `supported formats <https://pillow.readthedocs.io/en/stable/reference/Image.html#PIL.Image.Image.save>`_ for details.
    image_kwargs : dict, optional
        Optional arguments to pass to `PIL.Image.save <https://pillow.readthedocs.io/en/stable/reference/Image.html#PIL.Image.Image.save>`_.
    ------
    List
        Return a list of images from video indexed by frame number.
    """  # noqa: E501
    assert isinstance(
        video, (YouTubeVideo,
                VideoStream)), "Input type must be YouTubeVideo or VideoStream"
    assert isinstance(segment, Segment), "Second input type must be Segment"

    start_frame = segment.start_fno
    if segment.end_fno > 0:
        max_samples = min((segment.end_fno - start_frame), max_samples)

    if isinstance(video, YouTubeVideo):
        video_iterator = SingleFrameSampler(
            video.get_stream(quality=quality),
            sample_rate,
            start_frame,
            max_samples,
        )
    else:
        video_iterator = SingleFrameSampler(video, sample_rate, start_frame,
                                            max_samples)

    return [
        Image.from_array(
            img,
            os.path.join(
                output_uri,
                "{}.{}".format((start_frame + idx) * sample_rate,
                               image_format),
            ),
            format=image_format,
            **image_kwargs,
        ) for idx, img in enumerate(video_iterator)
    ]
Ejemplo n.º 17
0
def test_crop_image():
    data = np.random.randint(0, 255, size=(100, 100), dtype=np.uint8)
    im = Image.from_array(data)
    patch = im.crop(Box2d(10, 10, 30, 30))
    cropped_data = patch.to_numpy()
    assert np.array_equal(cropped_data, data[10:30, 10:30])
Ejemplo n.º 18
0
def image(uri: str) -> Image:
    """Build an :py:class:`Image` from a URI."""
    return Image(uri)
Ejemplo n.º 19
0
def uri_to_pil(uri):
    # TODO: We can remove this after UDT is supported in Spark
    return Image(uri).to_pil()
Ejemplo n.º 20
0
def test_crop_real_image():
    uri = "http://farm2.staticflickr.com/1129/4726871278_4dd241a03a_z.jpg"
    img = Image(uri)
    data = img.to_numpy()
    patch = img.crop(Box2d(10, 10, 30, 30))
    assert np.array_equal(patch.to_numpy(), data[10:30, 10:30, :])
Ejemplo n.º 21
0
    def deserialize(self, datum) -> "Image":
        from rikai.types.vision import Image  # pylint: disable=import-outside-toplevel

        return Image(datum[0])
Ejemplo n.º 22
0
def test_image_use_https_uri():
    img = Image(WIKIPEDIA)

    fobj = BytesIO(requests.get(WIKIPEDIA).content)
    pic = PIL.Image.open(fobj)
    assert np.array_equal(img.to_numpy(), np.array(pic))