def test_read_range_at_beginning(self): fbs = LineSource('dummy_pattern', validate=False) file_name, expected_data = write_data(10) assert len(expected_data) == 10 source = SingleFileSource(fbs, file_name, 0, 10 * 6) range_tracker = source.get_range_tracker(0, 20) read_data = [value for value in source.read(range_tracker)] self.assertCountEqual(expected_data[:4], read_data)
def test_read_range_at_middle(self): fbs = LineSource('dummy_pattern', validate=False) file_name, expected_data = write_data(10) assert len(expected_data) == 10 source = SingleFileSource(fbs, file_name, 0, 10 * 6) range_tracker = source.get_range_tracker(20, 40) read_data = [value for value in source.read(range_tracker)] self.assertItemsEqual(expected_data[4:7], read_data)
def test_read_range_at_middle(self): fbs = LineSource('dummy_pattern', validate=False) file_name, expected_data = write_data(10) assert len(expected_data) == 10 source = SingleFileSource(fbs, file_name, 0, 10 * 6) range_tracker = source.get_range_tracker(20, 40) read_data = [value for value in source.read(range_tracker)] self.assertCountEqual(expected_data[4:7], read_data)
def test_read_range_at_beginning(self): fbs = LineSource('dummy_pattern', validate=False) file_name, expected_data = write_data(10) assert len(expected_data) == 10 source = SingleFileSource(fbs, file_name, 0, 10 * 6) range_tracker = source.get_range_tracker(0, 20) read_data = [value for value in source.read(range_tracker)] self.assertItemsEqual(expected_data[:4], read_data)
def test_produces_splits_desiredsize_smaller_than_size(self): fbs = LineSource('dummy_pattern', validate=False) file_name, expected_data = write_data(10) assert len(expected_data) == 10 source = SingleFileSource(fbs, file_name, 0, 10 * 6) splits = [split for split in source.split(desired_bundle_size=25)] self.assertEquals(3, len(splits)) read_data = [] for split in splits: source = split.source range_tracker = source.get_range_tracker(split.start_position, split.stop_position) data_from_split = [data for data in source.read(range_tracker)] read_data.extend(data_from_split) self.assertItemsEqual(expected_data, read_data)
def test_produces_splits_desiredsize_smaller_than_size(self): fbs = LineSource('dummy_pattern', validate=False) file_name, expected_data = write_data(10) assert len(expected_data) == 10 source = SingleFileSource(fbs, file_name, 0, 10 * 6) splits = [split for split in source.split(desired_bundle_size=25)] self.assertEquals(3, len(splits)) read_data = [] for split in splits: source = split.source range_tracker = source.get_range_tracker(split.start_position, split.stop_position) data_from_split = [data for data in source.read(range_tracker)] read_data.extend(data_from_split) self.assertCountEqual(expected_data, read_data)
def test_produce_split_with_start_and_end_positions(self): fbs = LineSource('dummy_pattern', validate=False) file_name, expected_data = write_data(10) assert len(expected_data) == 10 source = SingleFileSource(fbs, file_name, 0, 10 * 6) splits = [split for split in source.split(desired_bundle_size=15, start_offset=10, stop_offset=50)] self.assertEquals(3, len(splits)) read_data = [] for split in splits: source = split.source range_tracker = source.get_range_tracker(split.start_position, split.stop_position) data_from_split = [data for data in source.read(range_tracker)] read_data.extend(data_from_split) self.assertItemsEqual(expected_data[2:9], read_data)
def test_produce_split_with_start_and_end_positions(self): fbs = LineSource('dummy_pattern', validate=False) file_name, expected_data = write_data(10) assert len(expected_data) == 10 source = SingleFileSource(fbs, file_name, 0, 10 * 6) splits = [split for split in source.split(desired_bundle_size=15, start_offset=10, stop_offset=50)] self.assertEquals(3, len(splits)) read_data = [] for split in splits: source = split.source range_tracker = source.get_range_tracker(split.start_position, split.stop_position) data_from_split = [data for data in source.read(range_tracker)] read_data.extend(data_from_split) self.assertCountEqual(expected_data[2:9], read_data)