class BaseXlSeriesScraper(object): """Base class for the highest level algorithms of `xlseries`. Attributes: wb (Workbook): An openpyxl workbook loaded with "data_only=True" parameter (this avoids reading formulae). params (Parameters): An optional attribute with parameters ready to be used in parsing wb. If not passed, the strategy will have to discover them or adopt a different approach to parse wb. """ def __init__(self, wb, params_path_or_obj=None, ws_name=None, headers_validation=False): self.wb = wb self.ws_name = ws_name if self.ws_name: self.ws = self.wb[self.ws_name] else: self.ws = self.wb.active if isinstance(params_path_or_obj, Parameters): self.params = params_path_or_obj else: self.params = Parameters(params_path_or_obj) if headers_validation: # remove header coordinates that don't have any cell value (blanks) self.params.remove_blank_headers(self.ws) # PUBLIC INTERFACE @classmethod def accepts(cls, wb): return cls._accepts(wb) def get_data_frames(self, safe_mode): return self._get_data_frames(self.ws, self.params, safe_mode)
def test_remove_blank_headers(self): wb = Workbook() ws = wb.active params = Parameters({ "headers_coord": ["A1", "B1", "C1"], "data_starts": 2, "data_ends": 256, "frequency": "m", "time_header_coord": "A1", }) ws["A1"].value = "Importaciones" ws["B1"].value = "Exportaciones" params.remove_blank_headers(ws) self.assertEqual(params["headers_coord"], ["A1", "B1"]) self.assertEqual(params["data_starts"], [2, 2]) self.assertEqual(params["data_ends"], [256, 256]) params = Parameters({ "headers_coord": ["A1_A2", "B1", "C1_C2"], "data_starts": 2, "data_ends": 256, "frequency": "m", "time_header_coord": "A1", }) ws["A1"].value = "Importaciones" ws["B1"].value = "Exportaciones" ws["C1"].value = "Saldo" params.remove_blank_headers(ws) self.assertEqual(params["headers_coord"], ["A2", "B1", "C2"]) self.assertEqual(params["data_starts"], [2, 2, 2]) self.assertEqual(params["data_ends"], [256, 256, 256]) ws["E4"].value = "dont remove!" params = Parameters({ "headers_coord": ["A1", "E1", "E2", "E3", "E4"], "data_starts": 2, "data_ends": 256, "frequency": "m", "time_header_coord": "A1", }) ws["A1"].value = "Importaciones" ws["B1"].value = "Exportaciones" ws["C1"].value = "Saldo" params.remove_blank_headers(ws) self.assertEqual(params["headers_coord"], ["A1", "E4"]) self.assertEqual(params["data_starts"], [2, 2]) self.assertEqual(params["data_ends"], [256, 256])