Example #1
0
    def load_array_formulas(self):
        # expand array formulas
        for ws in self.workbook:
            if not hasattr(ws, 'array_formulae'):  # pragma: no cover
                # array_formulae was introduced in openpyxl 3.0.8 & removed in 3.0.9
                # https://foss.heptapod.net/openpyxl/openpyxl/-/
                #   commit/b71b6ba667e9fcf8de3f899382d446626e55970c
                # ... evidently will be coming back in 3.1
                # https://openpyxl.readthedocs.io/en/stable/changes.html
                self.old_load_array_formulas()
                return

            for address, ref_addr in ws.array_formulae.items(
            ):  # pragma: no cover
                # get the reference address for the array formula
                ref_addr = AddressRange(ref_addr)
                if not isinstance(ref_addr, AddressRange):
                    ref_addr = AddressRange(ref_addr)

                if isinstance(ref_addr, AddressRange):
                    formula = ws[address].value
                    for i, row in enumerate(ref_addr.rows, start=1):
                        for j, addr in enumerate(row, start=1):
                            ws[addr.coordinate] = ARRAY_FORMULA_FORMAT % (
                                formula.text[1:], i, j, *ref_addr.size)
                else:
                    # ::TODO:: At some point consider dropping support for openpyxl < 3.0.8
                    # This has the effect of replacing the ArrayFormula object with just the
                    # formula text. This matches the openpyxl < 3.0.8 behavior, at some point
                    # consider using the new behavior.
                    ws[ref_addr.coordinate] = ws[
                        ref_addr.coordinate].value.text
Example #2
0
    def _gen_graph(self, seed, recursed=False):
        """Given a starting point (e.g., A6, or A3:B7) on a particular sheet,
        generate a Spreadsheet instance that captures the logic and control
        flow of the equations.
        """
        if not isinstance(seed, (AddressRange, AddressCell)):
            if isinstance(seed, str):
                seed = AddressRange(seed)
            elif isinstance(seed, collections.Iterable):
                for s in seed:
                    self._gen_graph(s, recursed=True)
                self._process_gen_graph()
                return
            else:
                raise ValueError('Unknown seed: {}'.format(seed))

        # get/set the current sheet
        if not seed.has_sheet:
            seed = AddressRange(seed, sheet=self.excel.get_active_sheet_name())

        if seed.address in self.cell_map:
            # already did this cell/range
            return

        # process the seed
        self._make_cells(seed)

        if not recursed:
            # if not entered to process one cell / cellrange process other work
            self._process_gen_graph()
Example #3
0
    def get_range(self, address):
        if not isinstance(address, (AddressRange, AddressCell)):
            address = AddressRange(address)

        if address.has_sheet:
            sheet = self.workbook[address.sheet]
            sheet_dataonly = self.workbook_dataonly[address.sheet]
        else:
            sheet = self.workbook.active
            sheet_dataonly = self.workbook_dataonly.active

        with mock.patch('openpyxl.worksheet._reader.from_excel',
                        self.from_excel):
            # work around type coercion to datetime that causes some issues

            if address.is_unbounded_range:
                # bound the address range to the data in the spreadsheet
                address = address & AddressRange(
                    (1, 1, *self.max_col_row(sheet.title)), sheet=sheet.title)

            cells = sheet[address.coordinate]
            cells_dataonly = sheet_dataonly[address.coordinate]
            if isinstance(cells, (Cell, MergedCell)):
                return _OpxCell(cells, cells_dataonly, address)
            else:
                return _OpxRange(cells, cells_dataonly, address)
Example #4
0
def test_address_range_errors():

    with pytest.raises(ValueError):
        AddressRange('B32:B')

    with pytest.raises(ValueError):
        AddressRange('B32:B33:B')
Example #5
0
def test_address_pickle(tmpdir):
    addrs = [
        AddressRange('B1'),
        AddressRange('B1:C1'),
        AddressRange('B1:B2'),
        AddressRange('B1:C2'),
        AddressRange('sh!B1'),
        AddressRange('sh!B1:C1'),
        AddressRange('sh!B1:B2'),
        AddressRange('sh!B1:C2'),
        AddressRange('B:C'),
        AddressRange('2:4'),
        AddressCell('sh!XFC1048575'),
        AddressCell('sh!XFD1048576'),
        AddressCell('sh!A1'),
        AddressCell('sh!E5'),
        AddressCell('sh!F6'),
    ]

    filename = os.path.join(str(tmpdir), 'test_addrs.pkl')
    with open(filename, 'wb') as f:
        pickle.dump(addrs, f)

    with open(filename, 'rb') as f:
        new_addrs = pickle.load(f)

    assert addrs == new_addrs
Example #6
0
def test_address_range():
    a = AddressRange('a1:b2')
    b = AddressRange('A1:B2')
    c = AddressRange(a)

    assert a == b
    assert b == c

    assert b == AddressRange(b)
    assert b == AddressRange.create(b)

    assert AddressRange('sh!a1:b2') == AddressRange(a, sheet='sh')
    assert AddressCell('C13') == AddressCell('R13C3')

    with pytest.raises(ValueError):
        AddressRange(AddressRange('sh!a1:b2'), sheet='sheet')

    a = AddressRange('A:A')
    assert 'A' == a.start.column
    assert 'A' == a.end.column
    assert 0 == a.start.row
    assert 0 == a.end.row

    b = AddressRange('1:1')
    assert '' == b.start.column
    assert '' == b.end.column
    assert 1 == b.start.row
    assert 1 == b.end.row
Example #7
0
def test_address_sort_keys():

    a1_b2 = AddressRange('sh!A1:B2')
    a1 = AddressRange('sh!A1')
    b2 = AddressRange('sh!B2')

    assert a1.sort_key == a1_b2.sort_key
    assert a1.sort_key < b2.sort_key
Example #8
0
    def get_range(self, address):
        cell = self._get_cell(address)

        if not address.is_range:
            return cell

        elif address.is_unbounded_range:
            # this is a unbounded range to range mapping, disassemble
            formula = cell.formula
            assert formula.startswith(REF_START)
            assert formula.endswith(REF_END)
            ref_addr = formula[len(REF_START):-len(REF_END)]
            return self.get_range(AddressRange(ref_addr))

        elif cell.formula:
            return cell

        else:
            # need to map col or row ranges to a specific range
            addresses = address.resolve_range

            cells = [[self._get_cell(addr) for addr in row]
                     for row in addresses]
            values = [[c.values for c in row] for row in cells]

            return ExcelOpxWrapper.RangeData(address, None, values)
Example #9
0
 def conditional_format(self, address):
     """ Return the conditional formats applicable for this cell """
     address = AddressCell(address)
     all_formats = self.workbook[address.sheet].conditional_formatting
     formats = (cf for cf in all_formats if address.coordinate in cf)
     rules = []
     for cf in formats:
         origin = AddressRange(cf.cells.ranges[0].coord).start
         row_offset = address.row - origin.row
         col_offset = address.col_idx - origin.col_idx
         for rule in cf.rules:
             if rule.formula:
                 trans = Translator('={}'.format(rule.formula[0]),
                                    origin.coordinate)
                 formula = trans.translate_formula(row_delta=row_offset,
                                                   col_delta=col_offset)
                 rules.append(
                     self.CfRule(
                         formula=formula,
                         priority=rule.priority,
                         dxf_id=rule.dxfId,
                         dxf=rule.dxf,
                         stop_if_true=rule.stopIfTrue,
                     ))
     return sorted(rules, key=lambda x: x.priority)
Example #10
0
    def evaluate(self, address):
        """ evaluate a cell or cells in the spreadsheet

        :param address: str, AddressRange, AddressCell or a tuple or list
            or iterable of these three
        :return: evaluated value/values
        """

        if str(address) not in self.cell_map:
            if list_like(address):
                if not isinstance(address, (tuple, list)):
                    address = tuple(address)

                # process a tuple or list of addresses
                return type(address)(self.evaluate(c) for c in address)

            address = AddressRange.create(address)

            # get the sheet if not specified
            if not address.has_sheet:
                address = AddressRange(
                    address, sheet=self.excel.get_active_sheet_name())

            if address.address not in self.cell_map:
                self._gen_graph(address.address)

        return self._evaluate(str(address))
Example #11
0
def test_address_range_columns():
    columns = list(list(x) for x in AddressRange('sh!A1:C3').cols)
    assert 3 == len(columns)
    assert 3 == len(columns[0])

    assert all('A' == addr.column for addr in columns[0])
    assert all('C' == addr.column for addr in columns[-1])
Example #12
0
def offset(reference, row_inc, col_inc, height=None, width=None):
    # Excel reference: https://support.microsoft.com/en-us/office/
    #   offset-function-c8de19ae-dd79-4b9b-a14e-b4d906d11b66
    """
    Returns a reference to a range that is a specified number of rows and
    columns from a cell or range of cells.
    """
    base_addr = AddressRange.create(reference)

    if height is None:
        height = base_addr.size.height
    if width is None:
        width = base_addr.size.width

    new_row = base_addr.row + row_inc
    end_row = new_row + height - 1
    new_col = base_addr.col_idx + col_inc
    end_col = new_col + width - 1

    if new_row <= 0 or end_row > MAX_ROW or new_col <= 0 or end_col > MAX_COL:
        return REF_ERROR

    top_left = AddressCell((new_col, new_row, new_col, new_row),
                           sheet=base_addr.sheet)
    if height == width == 1:
        return top_left
    else:
        bottom_right = AddressCell((end_col, end_row, end_col, end_row),
                                   sheet=base_addr.sheet)

        return AddressRange(f'{top_left.coordinate}:{bottom_right.coordinate}',
                            sheet=top_left.sheet)
Example #13
0
    def get_range(self, address):
        if not isinstance(address, (AddressRange, AddressCell)):
            address = AddressRange(address)

        if address.has_sheet:
            sheet = self.workbook[address.sheet]
            sheet_dataonly = self.workbook_dataonly[address.sheet]
        else:
            sheet = self.workbook.active
            sheet_dataonly = self.workbook_dataonly.active

        cells = sheet[address.coordinate]
        if isinstance(cells, Cell):
            cell = cells
            cell_dataonly = sheet_dataonly[address.coordinate]
            return _OpxCell(cell, cell_dataonly)

        else:
            cells_dataonly = sheet_dataonly[address.coordinate]

            if len(cells) != len(cells_dataonly):
                # The read_only version of an openpyxl worksheet has the
                # somewhat annoying property of not giving empty rows at the
                # end.  Which is not the same behavior as the non-readonly
                # version.  So we need to align the data here by adding
                # empty rows.
                empty_row = (EMPTY_CELL, ) * len(cells[0])
                empty_rows = (empty_row, ) * (len(cells) - len(cells_dataonly))
                cells_dataonly += empty_rows

            return _OpxRange(cells, cells_dataonly)
Example #14
0
    def cell_to_formula(cls, cell):
        if cell.value is None:
            return ''
        else:
            formula = str(cell.value)
            if not formula.startswith('='):
                return ''

            elif formula.startswith('={') and formula[-1] == '}':
                # This is not in a CSE Array Context
                return '=index({},1,1)'.format(formula[1:])

            elif formula.startswith(ARRAY_FORMULA_NAME):
                # These are CSE Array formulas as encoded from sheet
                params = formula[len(ARRAY_FORMULA_NAME) + 1:-1].rsplit(',', 4)
                start_row = cell.row - int(params[1]) + 1
                start_col_idx = cell.col_idx - int(params[2]) + 1
                end_row = start_row + int(params[3]) - 1
                end_col_idx = start_col_idx + int(params[4]) - 1
                cse_range = AddressRange(
                    (start_col_idx, start_row, end_col_idx, end_row),
                    sheet=cell.parent.title)
                return '=index({},{},{})'.format(cse_range.quoted_address,
                                                 *params[1:3])
            else:
                return formula
Example #15
0
    def __init__(self, data):
        self.address = AddressRange(data.address)
        if not self.address.sheet:
            raise ValueError("Must pass in a sheet: {}".format(self.address))

        self.addresses = data.address.resolve_range
        self.size = data.address.size
        self.value = None
Example #16
0
 def resolve_range(self):
     return AddressRange(
         (self.address.start.col_idx,
          self.address.start.row,
          self.address.start.col_idx + len(self.values[0]) - 1,
          self.address.start.row + len(self.values) - 1),
         sheet=self.address.sheet
     ).resolve_range
Example #17
0
def test_evaluate_conditional_formatting(cond_format_ws):
    cells_addrs = [
        AddressCell('B2'),
        AddressCell('Sheet1!B3'),
        AddressRange('Sheet1!B4:B6'),
    ]
    formats = cond_format_ws.eval_conditional_formats(cells_addrs)
    formats2 = cond_format_ws.eval_conditional_formats(
        (a for a in cells_addrs))
    assert formats == list(formats2)  # should match cells_addrs's type
    assert formats2 == tuple(
        formats2)  # tuple since cells_addrs is a generator
    assert isinstance(formats[0], tuple)
    assert len(formats) == 3
    assert len(formats[2]) == 3

    # read the spreadsheet from yaml
    cond_format_ws.to_file(file_types=('yml', ))
    cond_format_ws_yaml = ExcelCompiler.from_file(cond_format_ws.filename +
                                                  '.yml')
    cells_addrs[0] = AddressCell('Sheet1!B2')
    formats3 = cond_format_ws_yaml.eval_conditional_formats(tuple(cells_addrs))
    assert formats2 == formats3

    # read the spreadsheet from pickle
    cond_format_ws.to_file(file_types=('pkl', ))
    cond_format_ws_pkl = ExcelCompiler.from_file(cond_format_ws.filename +
                                                 '.pkl')
    cells_addrs[0] = AddressCell('Sheet1!B2')
    formats4 = cond_format_ws_pkl.eval_conditional_formats(tuple(cells_addrs))
    assert formats2 == formats4

    formats.append(formats[2][0][0])
    formats.append(formats[2][1][0])
    formats.append(formats[2][2][0])
    del formats[2]

    color_key = {
        ('FF006100', 'FFC6EFCE'): 'grn',
        ('FF9C5700', 'FFFFEB9C'): 'yel',
        ('FF9C0006', 'FFFFC7CE'): 'red',
        (None, 'FFFFC7CE'): 'nofont',
    }

    color_map = {}
    for idx, dxf in cond_format_ws.conditional_formats.items():
        color_map[idx] = color_key[dxf.font and dxf.font.color.value,
                                   dxf.fill.bgColor.value]

    expected = [
        ['red'],
        ['grn', 'yel', 'red'],
        ['yel', 'red'],
        ['nofont'],
        ['yel', 'red'],
    ]
    results = [[color_map[x] for x in y] for y in formats]
    assert results == expected
Example #18
0
def test_has_sheet():

    assert AddressRange('Sheet1!a1').has_sheet
    assert not AddressRange('a1').has_sheet
    assert AddressRange('Sheet1!a1:b2').has_sheet
    assert not AddressRange('a1:b2').has_sheet

    assert AddressCell('sh!A2') == AddressRange(AddressRange('A2'), sheet='sh')

    with pytest.raises(ValueError, match='Mismatched sheets'):
        AddressRange(AddressRange('shx!a1'), sheet='sh')
Example #19
0
 def get_formula_from_range(self, address):
     if not isinstance(address, (AddressRange, AddressCell)):
         address = AddressRange(address)
     result = self.get_range(address)
     if isinstance(address, AddressCell):
         return result.formula if result.formula.startswith("=") else None
     else:
         return tuple(
             tuple(self.get_formula_from_range(a) for a in row)
             for row in result.resolve_range)
Example #20
0
    def __init__(self, address, excel):
        self.address = AddressRange(address)
        self.excel = excel
        if not self.address.sheet:
            raise ValueError("Must pass in a sheet: {}".format(address))

        self.addresses = resolve_range(self.address)
        self._cells = None
        self.size = self.address.size
        self.value = None
Example #21
0
    def table_name_containing(self, address):
        """ Return the table name containing the address given """
        address = AddressCell(address)
        if address not in self._table_refs:
            for t in self.workbook[address.sheet]._tables:
                if address in AddressRange(t.ref):
                    self._table_refs[address] = t.name.lower()
                    break

        return self._table_refs.get(address)
Example #22
0
 def get_formula_or_value(self, address):
     if not isinstance(address, (AddressRange, AddressCell)):
         address = AddressRange(address)
     result = self.get_range(address)
     if isinstance(address, AddressCell):
         return result.formula or result.values
     else:
         return tuple(
             tuple(self.get_formula_or_value(a) for a in row)
             for row in result.resolve_range)
Example #23
0
    def get_range(self, address):
        if not isinstance(address, (AddressRange, AddressCell)):
            address = AddressRange(address)

        if address.has_sheet:
            sheet = self.workbook[address.sheet]
            sheet_dataonly = self.workbook_dataonly[address.sheet]
        else:
            sheet = self.workbook.active
            sheet_dataonly = self.workbook_dataonly.active

        with mock.patch('openpyxl.worksheet._reader.from_excel',
                        self.from_excel):
            # work around type coercion to datetime that causes some issues

            if address.is_range and not address.is_bounded_range:
                # bound the address range to the data in the spreadsheet
                address = address & AddressRange(
                    (1, 1, sheet_dataonly.max_column, sheet_dataonly.max_row),
                    sheet=address.sheet)

            cells = sheet[address.coordinate]
            if isinstance(cells, (Cell, MergedCell)):
                cell = cells
                cell_dataonly = sheet_dataonly[address.coordinate]
                return _OpxCell(cell, cell_dataonly, address)

            else:
                cells_dataonly = sheet_dataonly[address.coordinate]

                if len(cells) != len(cells_dataonly):
                    # The read_only version of openpyxl worksheet has the
                    # somewhat annoying property of not giving empty rows at the
                    # end.  Which is not the same behavior as the non-readonly
                    # version.  So we need to align the data here by adding
                    # empty rows.
                    empty_row = (EMPTY_CELL, ) * len(cells[0])
                    empty_rows = (empty_row, ) * (len(cells) -
                                                  len(cells_dataonly))
                    cells_dataonly += empty_rows

                return _OpxRange(cells, cells_dataonly, address)
Example #24
0
    def __init__(self, address=None, formula='', excel=None):
        formula_is_python_code = excel is None or isinstance(
            excel, _CompiledImporter)
        self.formula = formula and ExcelFormula(
            formula, cell=self,
            formula_is_python_code=formula_is_python_code) or None

        if isinstance(excel, _CompiledImporter):
            excel = None
        self.excel = excel
        self.address = AddressRange(address)
Example #25
0
    def needed_addresses(self):
        """Return the addresses and address ranges this formula needs"""
        if self._needed_addresses is None:
            # get all the cells/ranges this formula refers to, and remove dupes
            if self.python_code:
                self._needed_addresses = uniqueify(
                    AddressRange(eval_call[1][2:-2])
                    for eval_call in EVAL_REGEX.findall(self.python_code))
            else:
                self._needed_addresses = ()

        return self._needed_addresses
Example #26
0
def test_multi_area_ranges(excel, ATestCell):
    cell = ATestCell('A', 1, excel=excel)
    from unittest import mock
    with mock.patch.object(excel, '_defined_names',
                           {'dname': (('$A$1', 's1'), ('$A$3:$A$4', 's2'))}):

        multi_area_range = AddressMultiAreaRange(
            tuple(AddressRange(addr, sheet=sh))
            for addr, sh in excel._defined_names['dname'])

        assert (multi_area_range, None) == range_boundaries('dname', cell)
        assert multi_area_range == AddressRange.create('dname', cell=cell)
Example #27
0
    def eval_conditional_formats(self, address):
        """Evaluate the conditional format (formulas) for a cell or cells

        returns the conditional format id which is the key for the dict:
          ExcelCompiler.conditional_formats

        NOTE: conditional_formats are not saved in the persistent formats.
              If needed they can be hand serialized into "extra_data"

        :param address: str, AddressRange, AddressCell or a tuple or list
            or iterable of these three
        :return: evaluated objects ids
        """
        if list_like(address):
            if not isinstance(address, (tuple, list)):
                address = tuple(address)

            # process a tuple or list of addresses
            return type(address)(self.eval_conditional_formats(c)
                                 for c in address)

        address = AddressRange.create(address)

        # get the sheet if not specified
        if not address.has_sheet:
            address = AddressRange(address,
                                   sheet=self.excel.get_active_sheet_name())

        if address.is_range:
            return tuple(
                tuple(self.eval_conditional_formats(addr) for addr in row)
                for row in address.rows)

        cf_addr = str(address).replace('!', '.cf!')

        if cf_addr not in self.cell_map:
            phony_cell = _Cell(address)
            formats = self.excel.conditional_format(address)
            format_strs = []
            for f in formats:
                excel_formula = ExcelFormula(f.formula, cell=phony_cell)
                python_code = excel_formula.python_code
                format_strs.append(
                    f'({python_code}, {f.dxf_id}, {int(bool(f.stop_if_true))})'
                )
                self.conditional_formats[f.dxf_id] = f.dxf

            python_code = f"=conditional_format_ids({', '.join(format_strs)})"
            a_cell = _Cell(address, formula=python_code)
            self.cell_map[cf_addr] = a_cell
            self._gen_graph(a_cell.formula.needed_addresses)

        return self.eval(self.cell_map[cf_addr])
Example #28
0
    def _from_text(cls, filename, is_json=False):
        """deserialize from a json/yaml file"""

        if not is_json:
            if not filename.split('.')[-1].startswith('y'):
                filename += '.yml'
        else:
            if not filename.endswith('.json'):  # pragma: no branch
                filename += '.json'

        with open(filename, 'r') as f:
            data = YAML().load(f)

        excel = _CompiledImporter(filename, data)
        excel_compiler = cls(excel=excel, cycles=data.get('cycles', False))
        excel.compiler = excel_compiler
        if 'cycles' in data:
            del data['cycles']

        def add_line_numbers(cell_addr, line_number):
            formula = excel_compiler.cell_map[cell_addr].formula
            if formula is not None:
                formula.lineno = line_number
                formula.filename = filename

        # populate the cells
        range_todos = []
        for address, python_code in data['cell_map'].items():
            lineno = data['cell_map'].lc.data[address][0] + 1
            address = AddressRange(address)
            if address.is_range:
                range_todos.append((address, lineno))
            else:
                excel_compiler._make_cells(address)
                add_line_numbers(address.address, lineno)

        # populate the ranges and dependant graph
        for address, lineno in range_todos:
            excel_compiler._make_cells(address)
            add_line_numbers(address.address, lineno)

        excel_compiler._process_gen_graph()
        del data['cell_map']

        # process the rest of the data from the file
        excel_compiler._excel_file_md5_digest = data['excel_hash']
        del data['excel_hash']
        excel_compiler.extra_data = data

        # remove "excel" file references for GC
        excel_compiler.excel = None
        return excel_compiler
Example #29
0
def test_address_range_size():

    assert (1, 1) == AddressRange('B1').size
    assert (1, 2) == AddressRange('B1:C1').size
    assert (2, 1) == AddressRange('B1:B2').size
    assert (2, 2) == AddressRange('B1:C2').size

    assert (MAX_ROW, 2) == AddressRange('B:C').size
    assert (3, MAX_COL) == AddressRange('2:4').size
Example #30
0
    def connect(self):
        self.workbook = load_workbook(self.filename)
        self.workbook_dataonly = load_workbook(
            self.filename, data_only=True, read_only=True)

        for ws in self.workbook:  # pragma: no cover
            # ::TODO:: this is simple hack so that we won't try to eval
            # array formulas since they are not implemented
            for address, props in ws.formula_attributes.items():
                if props.get('t') == 'array':
                    formula = '{%s}' % ws[address].value
                    addrs = it.chain.from_iterable(
                        AddressRange(props.get('ref')).rows)
                    for addr in addrs:
                        ws[addr.coordinate] = formula