def test_featurization(self): output_col = "prediction" transformer = DeepImageFeaturizer(inputCol="image", outputCol=output_col, modelName=self.name) transformed_df = transformer.transform(self.imageDF) collected = transformed_df.collect() for row in collected: predictions = row[output_col] self.assertEqual(len(predictions), self.appModel.numOutputFeatures())
def test_inceptionV3_featurization(self): output_col = "prediction" transformer = DeepImageFeaturizer(inputCol="image", outputCol=output_col, modelName="InceptionV3") image_df = getSampleImageDF() transformed_df = transformer.transform(image_df.limit(5)) collected = transformed_df.collect() for row in collected: predictions = row[output_col] self.assertEqual(len(predictions), InceptionV3Constants.NUM_OUTPUT_FEATURES)
def test_featurization(self): """ Tests that featurizer returns (almost) the same values as Keras. """ output_col = "prediction" transformer = DeepImageFeaturizer(inputCol="image", outputCol=output_col, modelName=self.name) transformed_df = transformer.transform(self.imageDF) collected = transformed_df.collect() features = np.array([i.prediction for i in collected]) # Note: keras features may be multi-dimensional np arrays, but transformer features # will be 1-d vectors. Regardless, the dimensions should add up to the same. self.assertEqual(np.prod(self.kerasFeatures.shape), np.prod(features.shape)) kerasReshaped = self.kerasFeatures.reshape(self.kerasFeatures.shape[0], -1) np.testing.assert_array_almost_equal(kerasReshaped, features, decimal=6)
def test_featurization(self): """ Tests that featurizer returns (almost) the same values as Keras. """ # Since we use different libraries for image resizing (PIL in python vs. java.awt.Image in scala), # the result will not match keras exactly. In fact the best we can do is a "somewhat similar" result. # At least compare cosine distance is < 1e-2 featurizer_sc = DeepImageFeaturizer(modelName=self.name, inputCol="image", outputCol="features", scaleHint="SCALE_FAST") features_sc = np.array([i.features for i in featurizer_sc.transform( self.imageDF).select("features").collect()]) kerasReshaped = self.kerasFeatures.reshape(self.kerasFeatures.shape[0], -1) diffs = [ spatial.distance.cosine( kerasReshaped[i], features_sc[i]) for i in range( len(features_sc))] np.testing.assert_array_almost_equal(0, diffs, decimal=self.featurizerCompareDigitsCosine)
def test_featurization_no_reshape(self): """ Run sparkDL predictor on manually-resized images and compare result to the keras result. """ imageArray = self.imageArray # test: predictor vs keras on resized images rdd = self.sc.parallelize([self._rowWithImage(img) for img in imageArray]) dfType = ImageSchema.imageSchema imageDf = rdd.toDF(dfType) if self.numPartitionsOverride: imageDf = imageDf.coalesce(self.numPartitionsOverride) transformer = DeepImageFeaturizer(inputCol='image', modelName=self.name, outputCol="features") dfFeatures = transformer.transform(imageDf).collect() dfFeatures = np.array([i.features for i in dfFeatures]) kerasReshaped = self.kerasFeatures.reshape(self.kerasFeatures.shape[0], -1) np.testing.assert_array_almost_equal(kerasReshaped, dfFeatures, decimal=self.featurizerCompareDigitsExact)