Example #1
0
    def test_read_images(self):
        data_path = "data/mllib/images/origin/kittens"
        df = (self.spark.read.format("image").option("dropInvalid",
                                                     True).option(
                                                         "recursiveFileLookup",
                                                         True).load(data_path))
        self.assertEqual(df.count(), 4)
        first_row = df.take(1)[0][0]
        # compare `schema.simpleString()` instead of directly compare schema,
        # because the df loaded from datasource may change schema column nullability.
        self.assertEqual(df.schema.simpleString(),
                         ImageSchema.imageSchema.simpleString())
        self.assertEqual(df.schema["image"].dataType.simpleString(),
                         ImageSchema.columnSchema.simpleString())
        array = ImageSchema.toNDArray(first_row)
        self.assertEqual(len(array), first_row[1])
        self.assertEqual(ImageSchema.toImage(array, origin=first_row[0]),
                         first_row)
        expected = {
            "CV_8UC3": 16,
            "Undefined": -1,
            "CV_8U": 0,
            "CV_8UC1": 0,
            "CV_8UC4": 24
        }
        self.assertEqual(ImageSchema.ocvTypes, expected)
        expected = ["origin", "height", "width", "nChannels", "mode", "data"]
        self.assertEqual(ImageSchema.imageFields, expected)
        self.assertEqual(ImageSchema.undefinedImageType, "Undefined")

        with QuietTest(self.sc):
            self.assertRaisesRegex(
                TypeError,
                "image argument should be pyspark.sql.types.Row; however",
                lambda: ImageSchema.toNDArray("a"),
            )

        with QuietTest(self.sc):
            self.assertRaisesRegex(
                ValueError,
                "image argument should have attributes specified in",
                lambda: ImageSchema.toNDArray(Row(a=1)),
            )

        with QuietTest(self.sc):
            self.assertRaisesRegex(
                TypeError,
                "array argument should be numpy.ndarray; however, it got",
                lambda: ImageSchema.toImage("a"),
            )
Example #2
0
    def test_read_images(self):
        data_path = 'data/mllib/images/origin/kittens'
        df = ImageSchema.readImages(data_path,
                                    recursive=True,
                                    dropImageFailures=True)
        self.assertEqual(df.count(), 4)
        first_row = df.take(1)[0][0]
        array = ImageSchema.toNDArray(first_row)
        self.assertEqual(len(array), first_row[1])
        self.assertEqual(ImageSchema.toImage(array, origin=first_row[0]),
                         first_row)
        self.assertEqual(df.schema, ImageSchema.imageSchema)
        self.assertEqual(df.schema["image"].dataType, ImageSchema.columnSchema)
        expected = {
            'CV_8UC3': 16,
            'Undefined': -1,
            'CV_8U': 0,
            'CV_8UC1': 0,
            'CV_8UC4': 24
        }
        self.assertEqual(ImageSchema.ocvTypes, expected)
        expected = ['origin', 'height', 'width', 'nChannels', 'mode', 'data']
        self.assertEqual(ImageSchema.imageFields, expected)
        self.assertEqual(ImageSchema.undefinedImageType, "Undefined")

        with QuietTest(self.sc):
            self.assertRaisesRegexp(
                TypeError,
                "image argument should be pyspark.sql.types.Row; however",
                lambda: ImageSchema.toNDArray("a"))

        with QuietTest(self.sc):
            self.assertRaisesRegexp(
                ValueError,
                "image argument should have attributes specified in",
                lambda: ImageSchema.toNDArray(Row(a=1)))

        with QuietTest(self.sc):
            self.assertRaisesRegexp(
                TypeError,
                "array argument should be numpy.ndarray; however, it got",
                lambda: ImageSchema.toImage("a"))
Example #3
0
images_df = ImageSchema.readImages('/mnt/roy/object-detection/images/',
                                   numPartitions=16)

# COMMAND ----------

# MAGIC %md
# MAGIC # Prediction on `test.jpg`
# MAGIC Use it later to assert prediction using `UDF`

# COMMAND ----------

test_row = images_df.where(
    "image.origin='dbfs:/mnt/roy/object-detection/images/test.jpg'").take(
        1)[0][0]
array = ImageSchema.toNDArray(test_row)

# COMMAND ----------

out_scores, out_boxes, out_classes = predict_util(array)
print("Classes: " + str(out_classes))
print("Scores: " + str(out_scores))
print("Boxes: " + str(out_boxes))

# COMMAND ----------

reveal("test.jpg")

# COMMAND ----------

# MAGIC %md