def test_produces_splits_desiredsize_large_than_size(self): fbs = LineSource('dummy_pattern', validate=False) file_name, expected_data = write_data(10) assert len(expected_data) == 10 source = SingleFileSource(fbs, file_name, 0, 10 * 6) splits = [split for split in source.split(desired_bundle_size=100)] self.assertEquals(1, len(splits)) self.assertEquals(60, splits[0].weight) self.assertEquals(0, splits[0].start_position) self.assertEquals(60, splits[0].stop_position) range_tracker = splits[0].source.get_range_tracker(None, None) read_data = [value for value in splits[0].source.read(range_tracker)] self.assertItemsEqual(expected_data, read_data)
def test_produces_splits_desiredsize_large_than_size(self): fbs = LineSource('dummy_pattern', validate=False) file_name, expected_data = write_data(10) assert len(expected_data) == 10 source = SingleFileSource(fbs, file_name, 0, 10 * 6) splits = [split for split in source.split(desired_bundle_size=100)] self.assertEquals(1, len(splits)) self.assertEquals(60, splits[0].weight) self.assertEquals(0, splits[0].start_position) self.assertEquals(60, splits[0].stop_position) range_tracker = splits[0].source.get_range_tracker(None, None) read_data = [value for value in splits[0].source.read(range_tracker)] self.assertCountEqual(expected_data, read_data)
def test_produces_splits_desiredsize_smaller_than_size(self): fbs = LineSource('dummy_pattern', validate=False) file_name, expected_data = write_data(10) assert len(expected_data) == 10 source = SingleFileSource(fbs, file_name, 0, 10 * 6) splits = [split for split in source.split(desired_bundle_size=25)] self.assertEquals(3, len(splits)) read_data = [] for split in splits: source = split.source range_tracker = source.get_range_tracker(split.start_position, split.stop_position) data_from_split = [data for data in source.read(range_tracker)] read_data.extend(data_from_split) self.assertItemsEqual(expected_data, read_data)
def test_produces_splits_desiredsize_smaller_than_size(self): fbs = LineSource('dummy_pattern', validate=False) file_name, expected_data = write_data(10) assert len(expected_data) == 10 source = SingleFileSource(fbs, file_name, 0, 10 * 6) splits = [split for split in source.split(desired_bundle_size=25)] self.assertEquals(3, len(splits)) read_data = [] for split in splits: source = split.source range_tracker = source.get_range_tracker(split.start_position, split.stop_position) data_from_split = [data for data in source.read(range_tracker)] read_data.extend(data_from_split) self.assertCountEqual(expected_data, read_data)
def test_produce_split_with_start_and_end_positions(self): fbs = LineSource('dummy_pattern', validate=False) file_name, expected_data = write_data(10) assert len(expected_data) == 10 source = SingleFileSource(fbs, file_name, 0, 10 * 6) splits = [split for split in source.split(desired_bundle_size=15, start_offset=10, stop_offset=50)] self.assertEquals(3, len(splits)) read_data = [] for split in splits: source = split.source range_tracker = source.get_range_tracker(split.start_position, split.stop_position) data_from_split = [data for data in source.read(range_tracker)] read_data.extend(data_from_split) self.assertItemsEqual(expected_data[2:9], read_data)
def test_produce_split_with_start_and_end_positions(self): fbs = LineSource('dummy_pattern', validate=False) file_name, expected_data = write_data(10) assert len(expected_data) == 10 source = SingleFileSource(fbs, file_name, 0, 10 * 6) splits = [split for split in source.split(desired_bundle_size=15, start_offset=10, stop_offset=50)] self.assertEquals(3, len(splits)) read_data = [] for split in splits: source = split.source range_tracker = source.get_range_tracker(split.start_position, split.stop_position) data_from_split = [data for data in source.read(range_tracker)] read_data.extend(data_from_split) self.assertCountEqual(expected_data[2:9], read_data)