コード例 #1
0
    def test_featurization(self):
        output_col = "prediction"
        transformer = DeepImageFeaturizer(inputCol="image",
                                          outputCol=output_col,
                                          modelName=self.name)
        transformed_df = transformer.transform(self.imageDF)

        collected = transformed_df.collect()
        for row in collected:
            predictions = row[output_col]
            self.assertEqual(len(predictions),
                             self.appModel.numOutputFeatures())
コード例 #2
0
    def test_inceptionV3_featurization(self):
        output_col = "prediction"
        transformer = DeepImageFeaturizer(inputCol="image", outputCol=output_col,
                                          modelName="InceptionV3")

        image_df = getSampleImageDF()
        transformed_df = transformer.transform(image_df.limit(5))

        collected = transformed_df.collect()
        for row in collected:
            predictions = row[output_col]
            self.assertEqual(len(predictions), InceptionV3Constants.NUM_OUTPUT_FEATURES)
コード例 #3
0
    def test_inceptionV3_featurization(self):
        output_col = "prediction"
        transformer = DeepImageFeaturizer(inputCol="image",
                                          outputCol=output_col,
                                          modelName="InceptionV3")

        image_df = getSampleImageDF()
        transformed_df = transformer.transform(image_df.limit(5))

        collected = transformed_df.collect()
        for row in collected:
            predictions = row[output_col]
            self.assertEqual(len(predictions),
                             InceptionV3Constants.NUM_OUTPUT_FEATURES)
コード例 #4
0
    def test_featurization(self):
        """
        Tests that featurizer returns (almost) the same values as Keras.
        """
        output_col = "prediction"
        transformer = DeepImageFeaturizer(inputCol="image", outputCol=output_col,
                                          modelName=self.name)
        transformed_df = transformer.transform(self.imageDF)
        collected = transformed_df.collect()
        features = np.array([i.prediction for i in collected])

        # Note: keras features may be multi-dimensional np arrays, but transformer features
        # will be 1-d vectors. Regardless, the dimensions should add up to the same.
        self.assertEqual(np.prod(self.kerasFeatures.shape), np.prod(features.shape))
        kerasReshaped = self.kerasFeatures.reshape(self.kerasFeatures.shape[0], -1)
        np.testing.assert_array_almost_equal(kerasReshaped, features, decimal=6)
コード例 #5
0
 def test_featurization(self):
     """
     Tests that featurizer returns (almost) the same values as Keras.
     """
     # Since we use different libraries for image resizing (PIL in python vs. java.awt.Image in scala),
     # the result will not match keras exactly. In fact the best we can do is a "somewhat similar" result.
     # At least compare cosine distance is < 1e-2
     featurizer_sc = DeepImageFeaturizer(modelName=self.name, inputCol="image",
                                         outputCol="features", scaleHint="SCALE_FAST")
     features_sc = np.array([i.features for i in featurizer_sc.transform(
         self.imageDF).select("features").collect()])
     kerasReshaped = self.kerasFeatures.reshape(self.kerasFeatures.shape[0], -1)
     diffs = [
         spatial.distance.cosine(
             kerasReshaped[i],
             features_sc[i]) for i in range(
             len(features_sc))]
     np.testing.assert_array_almost_equal(0, diffs, decimal=self.featurizerCompareDigitsCosine)
コード例 #6
0
 def test_featurization_no_reshape(self):
     """
     Run sparkDL predictor on manually-resized images and compare result to the
     keras result.
     """
     imageArray = self.imageArray
     # test: predictor vs keras on resized images
     rdd = self.sc.parallelize([self._rowWithImage(img) for img in imageArray])
     dfType = ImageSchema.imageSchema
     imageDf = rdd.toDF(dfType)
     if self.numPartitionsOverride:
         imageDf = imageDf.coalesce(self.numPartitionsOverride)
     transformer = DeepImageFeaturizer(inputCol='image', modelName=self.name,
                                       outputCol="features")
     dfFeatures = transformer.transform(imageDf).collect()
     dfFeatures = np.array([i.features for i in dfFeatures])
     kerasReshaped = self.kerasFeatures.reshape(self.kerasFeatures.shape[0], -1)
     np.testing.assert_array_almost_equal(kerasReshaped,
                                          dfFeatures,
                                          decimal=self.featurizerCompareDigitsExact)
コード例 #7
0
    def test_featurization(self):
        """
        Tests that featurizer returns (almost) the same values as Keras.
        """
        output_col = "prediction"
        transformer = DeepImageFeaturizer(inputCol="image",
                                          outputCol=output_col,
                                          modelName=self.name)
        transformed_df = transformer.transform(self.imageDF)
        collected = transformed_df.collect()
        features = np.array([i.prediction for i in collected])

        # Note: keras features may be multi-dimensional np arrays, but transformer features
        # will be 1-d vectors. Regardless, the dimensions should add up to the same.
        self.assertEqual(np.prod(self.kerasFeatures.shape),
                         np.prod(features.shape))
        kerasReshaped = self.kerasFeatures.reshape(self.kerasFeatures.shape[0],
                                                   -1)
        np.testing.assert_array_almost_equal(kerasReshaped,
                                             features,
                                             decimal=6)