def test_create_source_dynamic_splitting(self): # 2 values source = Create._create_source_from_iterable(range(2), self.coder) source_test_utils.assert_split_at_fraction_exhaustive(source) # Multiple values. source = Create._create_source_from_iterable(range(11), self.coder) source_test_utils.assert_split_at_fraction_exhaustive( source, perform_multi_threaded_test=True)
def check_read_with_initial_splits(self, values, coder, num_splits): """A test that splits the given source into `num_splits` and verifies that the data read from original source is equal to the union of the data read from the split sources. """ source = Create._create_source_from_iterable(values, coder) desired_bundle_size = source._total_size / num_splits splits = source.split(desired_bundle_size) splits_info = [ (split.source, split.start_position, split.stop_position) for split in splits] source_test_utils.assert_sources_equal_reference_source( (source, None, None), splits_info)
def test_create_source_progress(self): num_values = 10 source = Create._create_source_from_iterable(range(num_values), self.coder) splits = [split for split in source.split(desired_bundle_size=100)] assert len(splits) == 1 fraction_consumed_report = [] split_points_report = [] range_tracker = splits[0].source.get_range_tracker( splits[0].start_position, splits[0].stop_position) for _ in splits[0].source.read(range_tracker): fraction_consumed_report.append(range_tracker.fraction_consumed()) split_points_report.append(range_tracker.split_points()) self.assertEqual( [float(i) / num_values for i in range(num_values)], fraction_consumed_report) expected_split_points_report = [ ((i - 1), num_values - (i - 1)) for i in range(1, num_values + 1)] self.assertEqual( expected_split_points_report, split_points_report)
def test_assert_missing(self): with self.assertRaisesRegex(BeamAssertException, r"missing elements \['c'\]"): with TestPipeline() as p: assert_that(p | Create(['a', 'b']), equal_to(['a', 'b', 'c']))
def test_assert_that_passes_empty_is_empty(self): with TestPipeline() as p: assert_that(p | Create([]), is_empty())
def test_assert_that_fails(self): with self.assertRaises(Exception): with TestPipeline() as p: assert_that(p | Create([1, 10, 100]), equal_to([1, 2, 3]))
def test_assert_that_passes_order_does_not_matter_with_negatives(self): with TestPipeline() as p: assert_that(p | Create([1, -2, 3]), equal_to([-2, 1, 3]))
def test_assert_that_passes_empty_equal_to(self): with TestPipeline() as p: assert_that(p | Create([]), equal_to([]))
def test_reified_value_passes(self): expected = [TestWindowedValue(v, MIN_TIMESTAMP, [GlobalWindow()]) for v in [1, 2, 3]] with TestPipeline() as p: assert_that(p | Create([2, 3, 1]), equal_to(expected), reify_windows=True)
def test_assert_that_passes(self): with TestPipeline() as p: assert_that(p | Create([1, 2, 3]), equal_to([1, 2, 3]))
def test_create_source_read_reentrant(self): source = Create._create_source_from_iterable(range(9), self.coder) source_test_utils.assert_reentrant_reads_succeed((source, None, None))
def test_create_transform(self): with TestPipeline() as p: assert_that(p | 'Empty' >> Create([]), equal_to([]), label='empty') assert_that(p | 'One' >> Create([None]), equal_to([None]), label='one') assert_that(p | Create(list(range(10))), equal_to(list(range(10))))
def test_create_source_read_reentrant_with_initial_splits(self): source = Create._create_source_from_iterable(range(24), self.coder) for split in source.split(desired_bundle_size=5): source_test_utils.assert_reentrant_reads_succeed( (split.source, split.start_position, split.stop_position))
def expand(self, pcoll): return (pcoll | Create([self._consumer_args]) | ParDo(_ConsumeKafkaTopic()))
def check_read(self, values, coder): source = Create._create_source_from_iterable(values, coder) read_values = source_test_utils.read_from_source(source) self.assertEqual(sorted(values), sorted(read_values))
def test_create_transform(self): with TestPipeline() as p: assert_that(p | Create(range(10)), equal_to(range(10)))
def test_read_all_from_avro_file_pattern(self): file_pattern = self._write_pattern(5) with TestPipeline() as p: assert_that(p | Create([file_pattern]) | avroio.ReadAllFromAvro(), equal_to(self.RECORDS * 5))
def test_assert_that_fails_on_empty_input(self): with self.assertRaises(Exception): with TestPipeline() as p: assert_that(p | Create([]), equal_to([1, 2, 3]))
def expand(self, pcoll): return (pcoll | Create([self._read_args]) | ParDo(_ReadFromRelationalDBFn()))
def test_create_source_read_reentrant_with_initial_splits(self): source = Create._create_source_from_iterable(range(24), self.coder) for split in source.split(desired_bundle_size=5): source_test_utils.assert_reentrant_reads_succeed((split.source, split.start_position, split.stop_position))
def test_assert_that_fails_on_empty_expected(self): with self.assertRaises(Exception): with TestPipeline() as p: assert_that(p | Create([1, 2, 3]), is_empty())
def test_assert_that_fails_on_is_not_empty_expected(self): with self.assertRaises(BeamAssertException): with TestPipeline() as p: assert_that(p | Create([]), is_not_empty())
def test_read_all_from_avro_single_file(self): path = self._write_data() with TestPipeline() as p: assert_that(p | Create([path]) | avroio.ReadAllFromAvro(), equal_to(self.RECORDS))