Beispiel #1
0
 def testCombineOnBatchSimple(self):
   batch_1 = [np.ones((2, 6))]
   batch_2 = [np.ones((1, 6))]
   out = [3 for _ in range(6)]
   analyzer = impl._CombineFnWrapper(
       analyzers._NumPyCombinerSpec(np.sum, reduce_instance_dims=False))
   self.assertCombine(analyzer, [batch_1, batch_2], out)
Beispiel #2
0
 def testCombineOnBatchLotsOfData(self):
   shards = [[np.ones(3)] for _ in range(
       beam_impl._DEFAULT_DESIRED_BATCH_SIZE * 2)]
   out = [1 for _ in range(3)]
   analyzer = impl._CombineFnWrapper(
       analyzers._NumPyCombinerSpec(np.min, reduce_instance_dims=False))
   self.assertCombine(analyzer, shards, out)
Beispiel #3
0
 def testCombineOnBatchLotsOfData(self):
     shards = [[np.ones((1, 3))] for _ in range(2000)]
     out = [1 for _ in range(3)]
     analyzer = impl._CombineFnWrapper(
         analyzers._NumPyCombinerSpec(np.min,
                                      reduce_instance_dims=False,
                                      output_dtypes=[np.int64]))
     self.assertCombine(analyzer, shards, out)
Beispiel #4
0
  def testCombineOnBatchWithBeamPipeline(self):
    # Test with a real Beam pipeline instead of calling the Combiner methods
    # directly.  This surfaces bugs that only occur within a Beam pipeline, e.g.
    # due to Beam passing iterators to merge_accumulators instead of lists.
    with beam.Pipeline() as p:
      batch_1 = [np.ones((2, 6), dtype=np.int)]
      batch_2 = [np.ones((1, 6), dtype=np.int)]
      expected_output = np.ones(6) * 3
      def assert_equals_expected(outputs):
        output, = outputs  # Expect exactly one analyzer output
        return np.array_equal(output, expected_output)

      analyzer = impl._CombineFnWrapper(
          analyzers._NumPyCombinerSpec(np.sum, reduce_instance_dims=False))
      assert_that(p
                  | beam.Create([batch_1, batch_2])
                  | beam.CombineGlobally(analyzer)
                  | beam.Map(assert_equals_expected),
                  equal_to([True]))
Beispiel #5
0
 def testCombineOnBatchAllEmptyRow(self):
   analyzer = impl._CombineFnWrapper(
       analyzers._NumPyCombinerSpec(np.sum, reduce_instance_dims=False))
   self.assertCombine(analyzer, [[[[]]], [[[]]], [[[]]]], [])