def test_DeepImagePredictorNoReshape(self): """ Run sparkDL predictor on manually-resized images and compare result to the keras result. """ imageArray = self.imageArray kerasPredict = self.kerasPredict def rowWithImage(img): # return [imageIO.imageArrayToStruct(img.astype('uint8'), imageType.sparkMode)] row = imageIO.imageArrayToStruct(img.astype('uint8')) # re-order row to avoid pyspark bug return [[ getattr(row, field.name) for field in ImageSchema.imageSchema['image'].dataType ]] # test: predictor vs keras on resized images rdd = self.sc.parallelize([rowWithImage(img) for img in imageArray]) dfType = ImageSchema.imageSchema imageDf = rdd.toDF(dfType) if self.numPartitionsOverride: imageDf = imageDf.coalesce(self.numPartitionsOverride) transformer = DeepImagePredictor(inputCol='image', modelName=self.name, outputCol="prediction") dfPredict = transformer.transform(imageDf).collect() dfPredict = np.array([i.prediction for i in dfPredict]) self.assertEqual(kerasPredict.shape, dfPredict.shape) np.testing.assert_array_almost_equal(kerasPredict, dfPredict)
def test_DeepImagePredictorNoReshape(self): """ Run sparkDL predictor on manually-resized images and compare result to the keras result. """ imageArray = self.imageArray kerasPredict = self.kerasPredict def rowWithImage(img): # return [imageIO.imageArrayToStruct(img.astype('uint8'), imageType.sparkMode)] row = imageIO.imageArrayToStruct(img.astype('uint8'), imageIO.SparkMode.RGB) # re-order row to avoid pyspark bug return [[getattr(row, field.name) for field in imageIO.imageSchema]] # test: predictor vs keras on resized images rdd = self.sc.parallelize([rowWithImage(img) for img in imageArray]) dfType = StructType([StructField("image", imageIO.imageSchema)]) imageDf = rdd.toDF(dfType) transformer = DeepImagePredictor(inputCol='image', modelName=self.name, outputCol="prediction") dfPredict = transformer.transform(imageDf).collect() dfPredict = np.array([i.prediction for i in dfPredict]) self.assertEqual(kerasPredict.shape, dfPredict.shape) np.testing.assert_array_almost_equal(kerasPredict, dfPredict)
def test_DeepImagePredictor(self): """ Tests that predictor returns (almost) the same values as Keras. """ kerasPredict = self.kerasPredict transformer = DeepImagePredictor(inputCol='image', modelName=self.name, outputCol="prediction",) fullPredict = transformer.transform(self.imageDF).collect() fullPredict = np.array([i.prediction for i in fullPredict]) self.assertEqual(kerasPredict.shape, fullPredict.shape) np.testing.assert_array_almost_equal(kerasPredict, fullPredict, decimal=6)
def test_DeepImagePredictor(self): """ Tests that predictor returns (almost) the same values as Keras. """ kerasPredict = self.kerasPredict transformer = DeepImagePredictor(inputCol='image', modelName=self.name, outputCol="prediction",) fullPredict = self._sortByFileOrder(transformer.transform(self.imageDF).collect()) fullPredict = np.array([i.prediction for i in fullPredict]) self.assertEqual(kerasPredict.shape, fullPredict.shape) np.testing.assert_array_almost_equal(kerasPredict, fullPredict, decimal=self.featurizerCompareDigitsExact)
def test_inceptionV3_prediction_decoded(self): output_col = "prediction" topK = 10 transformer = DeepImagePredictor(inputCol="image", outputCol=output_col, modelName="InceptionV3", decodePredictions=True, topK=topK) image_df = getSampleImageDF() transformed_df = transformer.transform(image_df.limit(5)) collected = transformed_df.collect() for row in collected: predictions = row[output_col] self.assertEqual(len(predictions), topK)
def test_prediction_decoded(self): """ Tests that predictor with decoded=true returns reasonable values. """ output_col = "prediction" topK = 10 transformer = DeepImagePredictor(inputCol="image", outputCol=output_col, modelName=self.name, decodePredictions=True, topK=topK) transformed_df = transformer.transform(self.imageDF) collected = transformed_df.collect() for row in collected: predictions = row[output_col] self.assertEqual(len(predictions), topK)
def test_DeepImagePredictor(self): """ Run sparkDL inceptionV3 transformer on raw (original size) images and compare result to above keras (using keras resizing) result. """ kerasPredict = self.kerasPredict transformer = DeepImagePredictor(inputCol='image', modelName="InceptionV3", outputCol="prediction",) origImgDf = getSampleImageDF() fullPredict = transformer.transform(origImgDf).collect() fullPredict = np.array([i.prediction for i in fullPredict]) self.assertEqual(kerasPredict.shape, fullPredict.shape) # We use a large tolerance below because of differences in the resize step # TODO: match keras resize step to get closer prediction np.testing.assert_array_almost_equal(kerasPredict, fullPredict, decimal=6)
def test_DeepImagePredictor(self): """ Run sparkDL transformer on raw (original size) images and compare result to above keras (using keras resizing) result. """ kerasPredict = self.kerasPredict transformer = DeepImagePredictor( inputCol='image', modelName=self.name, outputCol="prediction", ) fullPredict = transformer.transform(self.imageDF).collect() fullPredict = np.array([i.prediction for i in fullPredict]) self.assertEqual(kerasPredict.shape, fullPredict.shape) # We use a large tolerance below because of differences in the resize step # TODO: match keras resize step to get closer prediction np.testing.assert_array_almost_equal(kerasPredict, fullPredict, decimal=6)
def test_DeepImagePredictorNoReshape(self): """ Run sparkDL predictor on manually-resized images and compare result to the keras result. """ imageArray = self.imageArray kerasPredict = self.kerasPredict # test: predictor vs keras on resized images rdd = self.sc.parallelize( [self._rowWithImage(img) for img in imageArray]) dfType = ImageSchema.imageSchema imageDf = rdd.toDF(dfType) if self.numPartitionsOverride: imageDf = imageDf.coalesce(self.numPartitionsOverride) transformer = DeepImagePredictor(inputCol='image', modelName=self.name, outputCol="prediction") dfPredict = transformer.transform(imageDf).collect() dfPredict = np.array([i.prediction for i in dfPredict]) self.assertEqual(kerasPredict.shape, dfPredict.shape) np.testing.assert_array_almost_equal(kerasPredict, dfPredict)
def test_inceptionV3_prediction(self): """ Test inceptionV3 using keras, tensorflow and sparkDL We run the sparkDL test with and without resizing beforehand """ imgFiles, images = getSampleImageList() imageArray = np.empty((len(images), 299, 299, 3), 'uint8') for i, img in enumerate(images): assert img is not None and img.mode == "RGB" imageArray[i] = np.array(img.resize((299, 299))) # Basic keras flow # We predict the class probabilities for the images in our test library using keras API. prepedImaged = inception_v3.preprocess_input( imageArray.astype('float32')) model = inception_v3.InceptionV3() kerasPredict = model.predict(prepedImaged) # test: _buildTfGraphForName # Run the graph produced by _buildTfGraphForName and compare the result to above keras # result. modelGraphInfo = _buildTFGraphForName("InceptionV3", False) graph = modelGraphInfo["graph"] sess = tf.Session(graph=graph) with sess.as_default(): inputTensor = graph.get_tensor_by_name( modelGraphInfo["inputTensorName"]) outputTensor = graph.get_tensor_by_name( modelGraphInfo["outputTensorName"]) tfPredict = sess.run(outputTensor, {inputTensor: imageArray}) self.assertEqual(kerasPredict.shape, tfPredict.shape) np.testing.assert_array_almost_equal(kerasPredict, tfPredict) imageType = imageIO.pilModeLookup["RGB"] def rowWithImage(img): # return [imageIO.imageArrayToStruct(img.astype('uint8'), imageType.sparkMode)] row = imageIO.imageArrayToStruct(img.astype('uint8'), imageType.sparkMode) # re-order row to avoid pyspark bug return [[ getattr(row, field.name) for field in imageIO.imageSchema ]] # test: predictor vs keras on resized images # Run sparkDL inceptionV3 transformer on resized images and compare result to above keras # result. rdd = self.sc.parallelize([rowWithImage(img) for img in imageArray]) dfType = StructType([StructField("image", imageIO.imageSchema)]) imageDf = rdd.toDF(dfType) transformer = DeepImagePredictor( inputCol='image', modelName="InceptionV3", outputCol="prediction", ) dfPredict = transformer.transform(imageDf).collect() dfPredict = np.array([i.prediction for i in dfPredict]) self.assertEqual(kerasPredict.shape, dfPredict.shape) np.testing.assert_array_almost_equal(kerasPredict, dfPredict) # test: predictor vs keras on raw images # Run sparkDL inceptionV3 transformer on raw (original size) images and compare result to # above keras (using keras resizing) result. origImgDf = getSampleImageDF() fullPredict = transformer.transform(origImgDf).collect() fullPredict = np.array([i.prediction for i in fullPredict]) self.assertEqual(kerasPredict.shape, fullPredict.shape) # We use a large tolerance below because of differences in the resize step # TODO: match keras resize step to get closer prediction np.testing.assert_array_almost_equal(kerasPredict, fullPredict, decimal=6)