def test_right_of_locator_regex(wb: Workbook): rol = RightOfRegexLocator(regex='the key\\s\\d') result = rol.locate(anchor_cell_location=CellLocation( sheet_name="Sheet", coordinate="A1" ), workbook=wb) cell_loc = CellLocation( sheet_name="Sheet", coordinate="C4" ) assert result == LocatingResult.good(cell_loc)
def shift_column_direction(self, cl: CellLocation, offset: int) -> CellLocation: """Shift cell location in the column direction. Args: cl (): offset (): Returns: """ if self.item_direction == TableItemDirection.DOWNWARD: return cl.shift_col(offset) else: return cl.shift_row(offset)
def test_right_of_locator_fail(wb: Workbook): rol = BelowOfLocator(label='the key') result = rol.locate(anchor_cell_location=CellLocation(sheet_name="Sheet", coordinate="A3"), workbook=wb) assert result == LocatingResult.bad( msg='Unable to find cell below of the key')
def test_location_abstract(): with pytest.raises(NotImplementedError): locator = Locator() locator.locate(anchor_cell_location=CellLocation( sheet_name="Sheet", coordinate="A1" ), workbook=Workbook())
def test_right_of_locator_regex_failed(wb: Workbook): rol = RightOfRegexLocator(regex='the key\\s\\ds') result = rol.locate(anchor_cell_location=CellLocation( sheet_name="Sheet", coordinate="A1" ), workbook=wb) assert result == LocatingResult.bad( msg='Unable to find cell to the right of the key\\s\\ds')
def test_deref_recursive(): wb = openpyxl.Workbook() sheet = wb.active for i in range(10): sheet.cell(i + 1, 1).value = i + 1 dereferator = Dereferator.template_to_spec(workbook=wb, anchor=CellLocation( sheet_name=sheet.title, coordinate='A1')) assert dereferator.deref(['a', '<<A2>>']) == ['a', 2]
def test_assumption_spec_deref(): assumption = {"name": "<<A1>>", "label": "something"} workbook = openpyxl.Workbook() sheet = workbook.active sheet['A1'] = 'hello' dereferator = Dereferator.template_to_spec(workbook=workbook, anchor=CellLocation( sheet.title, 'A2')) assumption_spec = AssumptionSpec.from_dict(assumption).deref(dereferator) assert assumption_spec.name == 'hello'
def locate(self, anchor_cell_location: CellLocation, workbook: Workbook) -> LocatingResult: sheet: Worksheet = workbook[anchor_cell_location.sheet_name] for row in sheet.iter_rows(): for cell in row: if cell.value == self.label: cell_loc = CellLocation( sheet_name=anchor_cell_location.sheet_name, coordinate=util.shift_coord(cell.coordinate, (1, 0))) return LocatingResult.good(cell_loc) return LocatingResult.bad( msg=f"Unable to find cell below of {self.label}")
def test_shift_cell_rightward(cell_loc): tt = TableExtractionTask(key="some_table", locator=AtCommentCellLocator(), columns={ CellOffset(0, 0): CellExtractionTask.simple( key='some_key', parser=StringParser()) }, end_condition=EndConditionCollection.default(), item_direction=TableItemDirection.RIGHTWARD) cl = CellLocation(sheet_name='Sheet', coordinate='A1') assert tt.shift_column_direction(cl, 1).coordinate == 'A2' assert tt.shift_item_direction(cl, 1).coordinate == 'B1'
def locate(self, anchor_cell_location: CellLocation, workbook: Workbook) -> LocatingResult: sheet: Worksheet = workbook[anchor_cell_location.sheet_name] compiled_regex = re.compile(self.regex) for row in sheet.iter_rows(): for cell in row: if compiled_regex.fullmatch(str(cell.value)) is not None: cell_loc = CellLocation( sheet_name=anchor_cell_location.sheet_name, coordinate=util.shift_coord(cell.coordinate, (0, 1))) return LocatingResult.good(cell_loc) return LocatingResult.bad( msg=f"Unable to find cell to the right of {self.regex}")
def test_extraction_task_locating_failed(wb: Workbook): et = CellExtractionTask(key="h1", locator=RightOfLocator(label="hi"), parser=IntParser(), validators={}, assumptions={}, fallback='fallback', metadata={}) result = et.process(anchor_cell_location=CellLocation(sheet_name="Sheet", coordinate="A1"), workbook=wb) assert not result.is_ok assert result.get_value() == 'fallback'
def test_extraction_task_assumption(wb: Workbook): et = CellExtractionTask( key="h1", locator=AtCommentCellLocator(), parser=IntParser(), validators={}, assumptions={"B2": LeftCellMatchAssumption(label="the koi")}, fallback='fallback', metadata={}) result = et.process(anchor_cell_location=CellLocation(sheet_name="Sheet", coordinate="B2"), workbook=wb) assert not result.is_ok assert result.get_value() == 'fallback'
def _build_row_cell_locations( self, key_cell: CellLocation) -> Dict[str, CellLocation]: """Build a dictionary from column key to each columns' CellLocation. The offset is applied and the key is change to column key Args: key_cell (CellLocation): Returns: Dict[str, CellLocation]. column key -> CellLocation """ return { cet.key: key_cell.shift(offset) for offset, cet in self.columns.items() }
def test_fail_extraction_creation(simple_path: str): template = ExcoTemplate.from_excel(simple_path) spec = template.to_raw_excel_processor_spec() spec.cell_specs[CellLocation(sheet_name='TestSheet', coordinate='Z1')] = [ CellExtractionSpec(locator=LocatorSpec(name="right_of"), apv=APVSpec(key="something", parser=ParserSpec(name='int'), source=UnknownSource(), validations={}, fallback=None)) ] with pytest.raises(ExtractionTaskCreationException): ExcelProcessorFactory.default().create_from_spec( spec=spec).process_workbook(None)
def test_fail_table_creation(): cl = CellLocation('S', 'A1') bad_spec = ExcelProcessorSpec( cell_specs={}, table_specs={ cl: [ TableExtractionSpec(key='table', locator=LocatorSpec(name='badname'), end_conditions=[], columns={}, item_direction=TableItemDirection.DOWNWARD, source=UnknownSource()) ] }) with pytest.raises(TableExtractionTaskCreationException): ExcelProcessorFactory.default().create_from_spec( spec=bad_spec).process_workbook(None)
def cell_loc() -> CellLocation: return CellLocation(sheet_name='Sheet', coordinate='A1')
def test_processor_key_hash(): pk = ProcessorKey(cell_location=CellLocation(sheet_name="Sheet", coordinate="A1"), key="something") assert pk.__hash__() is not None
def test_deref_post_spec(workbook): dereferator = Dereferator.spec_to_extractor(workbook, CellLocation('SHEET1', 'A5')) assert dereferator.deref_text('==A1==') == 1 assert dereferator.deref_text('==A2== world') == 'hello world' assert dereferator.deref_text('the value is ==A1==') == 'the value is 1'
def test_exco_block_with_location(): eb = ExcoBlockWithLocation( cell_location=CellLocation(sheet_name='S1', coordinate='A1'), exco_block=ExcoBlock.simple("key: hello\nparser: int") ) assert isinstance(eb.describe(), str)
def test_deref_pre_spec(workbook): dereferator = Dereferator.template_to_spec(workbook, CellLocation('SHEET1', 'A5')) assert dereferator.deref_text('<<A1>>') == 1 assert dereferator.deref_text('<<A2>> world') == 'hello world' assert dereferator.deref_text('the value is <<A1>>') == 'the value is 1'