Esempio n. 1
0
 def test_cross_feature_stats_generator(self):
     generator = cross_feature_stats_generator.CrossFeatureStatsGenerator(
         sample_rate=1.0)
     b1 = pa.RecordBatch.from_arrays([
         pa.array([[1.0], [3.0], [5.0]]),
         pa.array([[2.0], [4.0], [6.0]]),
         pa.array([[5.0], [3.0], [7.0]]),
     ], ['a', 'b', 'c'])
     b2 = pa.RecordBatch.from_arrays([
         pa.array([[6.0], [10.0]]),
         pa.array([[14.0], [16.0]]),
         pa.array([[-1.0], [0]]),
     ], ['a', 'b', 'c'])
     b3 = pa.RecordBatch.from_arrays([
         pa.array([None, None], type=pa.null()),
         pa.array([None, None], type=pa.null()),
         pa.array([None, None], type=pa.null()),
     ], ['a', 'b', 'c'])
     batches = [b1, b2, b3]
     expected_result = {
         ('a', 'b'):
         text_format.Parse(
             """
         path_x { step: "a" }
         path_y { step: "b" }
         count: 5
         num_cross_stats {
           correlation: 0.923145
           covariance: 15.6
         }
         """, statistics_pb2.CrossFeatureStatistics()),
         ('a', 'c'):
         text_format.Parse(
             """
         path_x { step: "a" }
         path_y { step: "c" }
         count: 5
         num_cross_stats {
           correlation: -0.59476602
           covariance: -5.4000001
         }
         """, statistics_pb2.CrossFeatureStatistics()),
         ('b', 'c'):
         text_format.Parse(
             """
         path_x { step: "b" }
         path_y { step: "c" }
         count: 5
         num_cross_stats {
           correlation: -0.81070298
           covariance: -13.52
         }
         """, statistics_pb2.CrossFeatureStatistics())
     }
     self.assertCombinerOutputEqual(batches, generator, {}, expected_result)
Esempio n. 2
0
 def test_cross_feature_stats_generator_multivalent_feature(self):
     generator = cross_feature_stats_generator.CrossFeatureStatsGenerator(
         sample_rate=1.0)
     b1 = pa.RecordBatch.from_arrays(
         [pa.array([[1.0], [3.0], [5.0]]),
          pa.array([[2.0], [4.0], [6.0]])], ['a', 'b'])
     b2 = pa.RecordBatch.from_arrays([
         pa.array([[6.0], [10.0], [1.0, 2.0]]),
         pa.array([[14.0], [16.0], [3.9]])
     ], ['a', 'b'])
     batches = [b1, b2]
     expected_result = {
         ('a', 'b'):
         text_format.Parse(
             """
         path_x { step: "a" }
         path_y { step: "b" }
         count: 5
         num_cross_stats {
           correlation: 0.923145
           covariance: 15.6
         }
         """, statistics_pb2.CrossFeatureStatistics())
     }
     self.assertCombinerOutputEqual(batches, generator, {}, expected_result)