def test_video_to_images( spark: SparkSession, tmp_path: Path, asset_path: Path ): """Test extract video frames from YouTubeVideo/VideoStream types into list of Image assets. """ sample_rate = 2 max_samples = 10 video = VideoStream(str(asset_path / "big_buck_bunny_short.mp4")) df1 = spark.createDataFrame( [(video, Segment(0, 20))], ["video", "segment"] ) output_dir = tmp_path / "videostream_test" output_dir.mkdir(parents=True) df1 = df1.withColumn( "images", video_to_images( col("video"), lit(str(output_dir)), col("segment"), lit(sample_rate), lit(max_samples), ), ) df2 = spark.createDataFrame( [(YouTubeVideo(vid="rUWxSEwctFU"), Segment(0, 20))], ["video", "segment"], ) output_dir = tmp_path / "youtube_test" output_dir.mkdir(parents=True) df2 = df2.withColumn( "images", video_to_images( col("video"), lit(str(output_dir)), col("segment"), lit(sample_rate), lit(max_samples), ), ) videostream_sample = df1.first()["images"] youtube_sample = df2.first()["images"] assert ( type(videostream_sample) == list and type(videostream_sample[0]) == Image and len(videostream_sample) == max_samples ) assert ( type(youtube_sample) == list and type(youtube_sample[0]) == Image and len(youtube_sample) == max_samples )
def test_video_to_images(spark: SparkSession): """Test extract video frames from YouTubeVideo/VideoStream types into list of Image assets. """ sample_rate = 2 max_samples = 10 videostream_df = spark.createDataFrame( [ ( VideoStream(uri=os.path.abspath( os.path.join( os.path.dirname(__file__), "..", "assets", "big_buck_bunny_short.mp4", ))), Segment(0, 20), ), ], ["video", "segment"], ) youtube_df = spark.createDataFrame( [ (YouTubeVideo(vid="rUWxSEwctFU"), Segment(0, 20)), ], ["video", "segment"], ) videostream_df = videostream_df.withColumn( "images", video_to_images(col("video"), col("segment"), lit(sample_rate), lit(max_samples)), ) youtube_df = youtube_df.withColumn( "images", video_to_images(col("video"), col("segment"), lit(sample_rate), lit(max_samples)), ) videostream_sample = videostream_df.first()["images"] youtube_sample = youtube_df.first()["images"] assert (type(videostream_sample) == list and type(videostream_sample[0]) == Image and len(videostream_sample) == max_samples) assert (type(youtube_sample) == list and type(youtube_sample[0]) == Image and len(youtube_sample) == max_samples)
def test_segment(spark, tmpdir): df = spark.createDataFrame([Row(Segment(0, 10)), Row(Segment(15, -1))]) _check_roundtrip(spark, df, tmpdir)
def deserialize(self, datum) -> "Segment": from rikai.types import Segment # pylint: disable=import-outside-toplevel return Segment(datum[0], datum[1])
def test_segment(self): df = self.spark.createDataFrame([Row(Segment(0, 10)), Row(Segment(15, -1))]) self._check_roundtrip(df)