def sumproduct(*args): # Excel reference: https://support.office.com/en-us/article/ # SUMPRODUCT-function-16753E75-9F68-4874-94AC-4D2145A2FD2E # find any errors error = next((i for i in flatten(args) if i in ERROR_CODES), None) if error: return error # verify array sizes match sizes = set() for arg in args: assert isinstance(arg, tuple), isinstance(arg[0], tuple) sizes.add((len(arg), len(arg[0]))) if len(sizes) != 1: return VALUE_ERROR # put the values into numpy vectors values = np.array( tuple( tuple(x if isinstance(x, (float, int)) and not isinstance(x, bool) else 0 for x in flatten(arg)) for arg in args)) # return the sum product return np.sum(np.prod(values, axis=0))
def cells_to_build(self, data): assert isinstance(data.formula, tuple) return zip( # pragma: no branch self, # address flatten(v for row in data.values for v in row), # value flatten(f for row in data.formula for f in row) # formula )
def _dec2base(value, places=None, base=16): value = list(flatten(value)) if len(value) != 1 or isinstance(value[0], bool): return VALUE_ERROR value = value[0] if value in ERROR_CODES: return value if value in (None, EMPTY): if base == 8: return NUM_ERROR value = 0 try: value = int(value) except ValueError: return VALUE_ERROR mask = _SIZE_MASK[base] if not (-mask <= value < mask): return NUM_ERROR if value < 0: value += mask << 1 value = _BASE_TO_FUNC[base](value)[2:].upper() if places is None: places = 0 else: places = int(places) if places < len(value): return NUM_ERROR return value.zfill(int(places))
def npv(rate, *args): # Excel reference: https://support.office.com/en-us/article/ # NPV-function-8672CB67-2576-4D07-B67B-AC28ACF2A568 if rate in ERROR_CODES: return rate # Check if rate is a valid number try: float(rate) except: return VALUE_ERROR _rate = rate + 1 cashflows = [x for x in flatten(args[0])] # Return the correct error code if one of the cash flows is invalid for cashflow in cashflows: if cashflow in ERROR_CODES: return cashflow # For entries that are both non-numeric and non-error, Excel removes them # and does not treat as zero or raise an error fil = [get_numeric(c) for c in cashflows] cashflows = np.array([i for (i, v) in zip(cashflows, fil) if v]) return (cashflows/np.power(_rate, np.arange(1, len(cashflows) + 1))).sum()
def xl_max(*args): """ Returns the largest value in a set of values. :param args: items to find the largest number in. :return: largest number. .. remarks:: * Arguments can either be numbers or names, arrays, or references that contain numbers. * Logical values and text representations of numbers that you type directly into the list of arguments are counted. * If an argument is an array or reference, only numbers in that array or reference are used. Empty cells, logical values, or text in the array or reference are ignored. * If the arguments contain no numbers, MAX returns 0 (zero). * Arguments that are error values or text that cannot be translated into numbers cause errors. * If you want to include logical values and text representations of numbers in a reference as part of the calculation, use the MAXA function. """ # ignore non-numeric cells data = [x for x in flatten(args) if isinstance(x, number_types)] # however, if no non numeric cells, return zero (is what excel does) if len(data) < 1: return 0 else: return max(data)
def npv(rate, *args): # Excel reference: https://support.office.com/en-us/article/ # NPV-function-8672CB67-2576-4D07-B67B-AC28ACF2A568 rate += 1 cashflow = [x for x in flatten(args, coerce=coerce_to_number) if is_number(x) and not isinstance(x, bool)] return sum(x * rate ** -i for i, x in enumerate(cashflow, start=1))
def xsum(*args): # ignore non numeric cells data = [x for x in flatten(args) if isinstance(x,(int,float,long))] # however, if no non numeric cells, return zero (is what excel does) if len(data) < 1: return 0 else: return sum(data)
def set_value(self, address, value, set_as_range=False): """ Set the value of one or more cells or ranges :param address: `str`, `AddressRange`, `AddressCell` or a tuple, list or an iterable of these three :param value: value to set. This can be a value or a tuple/list which matches the shapes needed for the given address/addresses :param set_as_range: With a single range address and a list like value, set to true to set the entire rnage to the inserted list. """ if list_like(value) and not set_as_range: value = tuple(flatten(value)) if list_like(address): address = (AddressCell(addr) for addr in flatten(address)) else: address = flatten(AddressRange(address).resolve_range) address = tuple(address) assert len(address) == len(value) for addr, val in zip(address, value): self.set_value(addr, val) return elif address not in self.cell_map: address = AddressRange.create(address).address assert address in self.cell_map, ( f'Address "{address}" not found in the cell map. Evaluate the ' 'address, or an address that references it, to place it in the cell map.' ) if set_as_range and list_like(value) and not (value and list_like(value[0])): value = (value, ) cell_or_range = self.cell_map[address] if cell_or_range.value != value: # pragma: no branch # need to be able to 'set' an empty cell, set to not None cell_or_range.value = value # reset the node + its dependencies if not self.cycles: self._reset(cell_or_range) # set the value cell_or_range.value = value
def test_evaluate_from_non_cells(excel_compiler): input_addrs = ['Sheet1!A11'] output_addrs = ['Sheet1!A11:A13', 'Sheet1!D1', 'Sheet1!B11', ] old_values = excel_compiler.evaluate(output_addrs) excel_compiler.trim_graph(input_addrs, output_addrs) excel_compiler.to_file(file_types='yml') excel_compiler = ExcelCompiler.from_file(excel_compiler.filename) for expected, result in zip( old_values, excel_compiler.evaluate(output_addrs)): assert tuple(flatten(expected)) == pytest.approx(tuple(flatten(result))) range_cell = excel_compiler.cell_map[output_addrs[0]] excel_compiler._reset(range_cell) range_value = excel_compiler.evaluate(range_cell.address) assert old_values[0] == range_value
def xl_sum(*args): # ignore non numeric cells data = [x for x in flatten(args) if isinstance(x, number_types)] # however, if no non numeric cells, return zero (is what excel does) if len(data) < 1: return 0 else: return sum(data)
def set_value(self, address, value, set_as_range=False): """ Set the value of one or more cells or ranges :param address: `str`, `AddressRange`, `AddressCell` or a tuple, list or an iterable of these three :param value: value to set. This can be a value or a tuple/list which matches the shapes needed for the given address/addresses :param set_as_range: With a single range address and a list like value, set to true to set the entire rnage to the inserted list. """ if list_like(value) and not set_as_range: value = tuple(flatten(value)) if list_like(address): address = (AddressCell(addr) for addr in flatten(address)) else: address = flatten(AddressRange(address).resolve_range) address = tuple(address) assert len(address) == len(value) for addr, val in zip(address, value): self.set_value(addr, val) return elif address not in self.cell_map: address = AddressRange.create(address).address assert address in self.cell_map if set_as_range and list_like(value) and not ( value and list_like(value[0])): value = (value, ) cell_or_range = self.cell_map[address] if cell_or_range.value != value: # pragma: no branch # need to be able to 'set' an empty cell if cell_or_range.value is None: cell_or_range.value = value # reset the node + its dependencies self._reset(cell_or_range) # set the value cell_or_range.value = value
def build_range(excel_range): a_range = _CellRange(excel_range) excel_cells = zip(a_range, flatten(excel_range.formulas), flatten(excel_range.values)) added_nodes = [a_range] for cell_address, f, value in excel_cells: assert isinstance(cell_address, AddressCell) if str(cell_address) not in self.cell_map: if (f, value) != ('', None): a_cell = _Cell(cell_address, value=value, formula=f, excel=self.excel) self.cell_map[str(cell_address)] = a_cell added_nodes.append(a_cell) self.cell_map[str(excel_range.address)] = a_range return added_nodes
def test_flatten(): assert ['ddd'] == list(flatten(['ddd'])) assert ['ddd', 1, 2, 3] == list(flatten(['ddd', 1, (2, 3)])) assert ['ddd', 1, 2, 3] == list(flatten(['ddd', (1, (2, 3))])) assert ['ddd', 1, 2, 3] == list(flatten(['ddd', (1, 2), 3])) assert [None] == list(flatten(None)) assert [True] == list(flatten(True)) assert [1.0] == list(flatten(1.0))
def _numerics(*args, keep_bools=False): # ignore non numeric cells args = tuple(flatten(args)) error = next((x for x in args if x in ERROR_CODES), None) if error is not None: # return the first error in the list return error else: if not keep_bools: args = (a for a in args if not isinstance(a, bool)) return tuple(x for x in args if isinstance(x, (int, float)))
def concatenate(*args): # Excel reference: https://support.office.com/en-us/article/ # CONCATENATE-function-8F8AE884-2CA8-4F7A-B093-75D702BEA31D if tuple(flatten(args)) != args: return VALUE_ERROR error = next((x for x in args if x in ERROR_CODES), None) if error: return error return ''.join(coerce_to_string(a) for a in args)
def _clean_logicals(*args): """For logicals that take more than one argument, clean via excel rules""" values = tuple(flatten(args)) error = next((x for x in values if x in ERROR_CODES), None) if error is not None: # return the first error in the list return error else: values = tuple(x for x in values if not (x is None or isinstance(x, str))) return VALUE_ERROR if len(values) == 0 else values
def ln(number): """ Returns the natural logarithm of a number. Natural logarithms are based on the constant e (2.71828182845904). :param number: Required. The positive real number for which you want the natural logarithm. :return: the natural logarithm of the number. """ if isinstance(number, list_types): return [math.log(x) for x in flatten(number)] else: return math.log(number)
def xl_log(number, base=10): """ Returns the logarithm of a number to the base you specify. :param number: Required. The positive real number for which you want the logarithm. :param base: Optional. The base of the logarithm. If base is omitted, it is assumed to be 10. :return: the logarithm of the number. """ if isinstance(number, list_types): return [math.log(item, base) for item in flatten(number)] else: return math.log(number, base)
def set_value(self, address, value): """ Set the value of one or more cells or ranges :param address: `str`, `AddressRange`, `AddressCell` or a tuple, list or an iterable of these three :param value: value to set. This can be a value or a tuple/list which matches the shapes needed for the given address/addresses """ if list_like(value): value = tuple(flatten(value)) if list_like(address): address = (AddressCell(addr) for addr in flatten(address)) else: address = flatten(AddressRange(address).resolve_range) address = tuple(address) assert len(address) == len(value) for addr, val in zip(address, value): self.set_value(addr, val) return elif address not in self.cell_map: address = AddressRange.create(address).address assert address in self.cell_map cell_or_range = self.cell_map[address] if cell_or_range.value != value: # pragma: no branch # need to be able to 'set' an empty cell if cell_or_range.value is None: cell_or_range.value = value # reset the node + its dependencies self._reset(cell_or_range) # set the value cell_or_range.value = value
def wrapper(*args): for arg_num in param_indices: try: arg = args[arg_num] except IndexError: break if isinstance(arg, str) and arg in ERROR_CODES: return arg elif isinstance(arg, tuple): error = next((a for a in flatten(arg) if isinstance(a, str) and a in ERROR_CODES), None) if error is not None: return error return f(*args)
def test_trend_shapes(Y, X, new_X, expected): import numpy as np expected = tuple(flatten(expected)) result = np.array(trend(Y, X, new_X, True)).ravel() assert_np_close(result, expected) result = np.array(trend([[x] for x in Y[0]], X, new_X)).ravel() assert_np_close(result, expected) if X is not None and new_X is None: result = np.array( trend([[x] for x in Y[0]], np.array(X).transpose(), None)).ravel() assert_np_close(result, expected)
def sumproduct(*args): # Excel reference: https://support.office.com/en-us/article/ # SUMPRODUCT-function-16753E75-9F68-4874-94AC-4D2145A2FD2E # find any errors error = next((i for i in flatten(args) if i in ERROR_CODES), None) if error: return error # verify array sizes match sizes = set() for arg in args: assert isinstance(arg, tuple), isinstance(arg[0], tuple) sizes.add((len(arg), len(arg[0]))) if len(sizes) != 1: return VALUE_ERROR # put the values into numpy vectors values = np.array(tuple(tuple( x if isinstance(x, (float, int)) and not isinstance(x, bool) else 0 for x in flatten(arg)) for arg in args)) # return the sum product return np.sum(np.prod(values, axis=0))
def count(*args): # Excel reference: https://support.office.com/en-us/article/ # COUNT-function-a59cd7fc-b623-4d93-87a4-d23bf411294c total = 0 for arg in flatten(args): if isinstance(arg, list): # count inside a list total += len( [x for x in arg if is_number(x) and not isinstance(x, bool)]) else: total += int(is_number(arg)) return total
def __new__(cls, cells, cells_dataonly, address): formula = None value = cells[0][0].value if isinstance(value, str) and value.startswith(ARRAY_FORMULA_NAME): # if this range refers to a CSE Array Formula, get the formula front, *args = cells[0][0].value[:-1].rsplit(',', 4) # if this range corresponds to the top left of a CSE Array formula if (args[0] == args[1] == '1') and all( c.value and c.value.startswith(front) for c in flatten(cells)): # apply formula to the range formula = '={%s}' % front[len(ARRAY_FORMULA_NAME) + 1:] else: formula = tuple(tuple(cls.cell_to_formula(cell) for cell in row) for row in cells) values = tuple(tuple(cell.value for cell in row) for row in cells_dataonly) return ExcelWrapper.RangeData.__new__(cls, address, formula, values)
def _base2dec(value, base): value = list(flatten(value)) if len(value) != 1 or isinstance(value[0], bool): return VALUE_ERROR value = value[0] if value in ERROR_CODES: return value if value in (None, EMPTY): value = '0' elif isinstance(value, (int, float)) and value >= 0: if int(value) == value: value = str(int(value)) if isinstance(value, str) and len(value) <= 10: try: value, mask = int(value, base), _SIZE_MASK[base] if value >= 0: return (value & ~mask) - (value & mask) except ValueError: return NUM_ERROR return NUM_ERROR
def __iter__(self): return flatten(self.addresses)
def count(*args): # Excel reference: https://support.office.com/en-us/article/ # COUNT-function-a59cd7fc-b623-4d93-87a4-d23bf411294c return sum(1 for x in flatten(args) if isinstance(x, (int, float)) and not isinstance(x, bool))
def concat(*args): # Excel reference: https://support.office.com/en-us/article/ # concat-function-9b1a9a3f-94ff-41af-9736-694cbd6b4ca2 return concatenate(*tuple(flatten(args)))
def validate_calcs(self, output_addrs=None): """For each address, calc the value, and verify that it matches This is a debugging tool which will show which cells evaluate differently than they do for excel. :param output_addrs: The cells to evaluate from (defaults to all) :return: dict of addresses with good/bad values that failed to verify """ def close_enough(val1, val2): import pytest if isinstance(val1, (int, float)) and \ isinstance(val2, (int, float)): return val2 == pytest.approx(val1) else: return val1 == val2 Mismatch = collections.namedtuple('Mismatch', 'original calced formula') if output_addrs is None: to_verify = self._formula_cells elif list_like(output_addrs): to_verify = [AddressCell(addr) for addr in flatten(output_addrs)] else: to_verify = [AddressCell(output_addrs)] verified = set() failed = {} while to_verify: addr = to_verify.pop() try: self._gen_graph(addr) cell = self.cell_map[addr.address] if isinstance(cell, _Cell) and cell.python_code: original_value = cell.value if original_value == str(cell.formula): self.log.debug("No Orig data?: {}: {}".format( addr, cell.value)) continue cell.value = None self._evaluate(addr.address) # pragma: no branch if not close_enough(original_value, cell.value): failed.setdefault('mismatch', {})[str(addr)] = Mismatch( original_value, cell.value, cell.formula.base_formula) print('{} mismatch {} -> {} {}'.format( addr, original_value, cell.value, cell.formula.base_formula)) # do it again to allow easy breakpointing cell.value = None self._evaluate(cell.address.address) verified.add(addr) for addr in cell.needed_addresses: if addr not in verified: # pragma: no branch to_verify.append(addr) except Exception as exc: cell = self.cell_map.get(addr.address, None) formula = cell and cell.formula.base_formula exc_str = str(exc) exc_str_split = exc_str.split('\n') if 'has not been implemented' in exc_str: exc_str_key = exc_str.split('has not been implemented')[0] exc_str_key = exc_str_key.strip().rsplit(' ', 1)[1].upper() not_implemented = True else: if len(exc_str_split) == 1: exc_str_key = '{}: {}'.format( type(exc).__name__, exc_str) else: exc_str_key = exc_str_split[-2] # pragma: no cover not_implemented = exc_str_key.startswith( 'NotImplementedError: ') if not_implemented: failed.setdefault('not-implemented', {}).setdefault(exc_str_key, []).append( (str(addr), formula, exc_str)) else: failed.setdefault('exceptions', {}).setdefault(exc_str_key, []).append( (str(addr), formula, exc_str)) return failed
def average(*args): l = list(flatten(*args)) return sum(l) / len(l)
def validate_calcs(self, output_addrs=None, sheet=None, verify_tree=True): """For each address, calc the value, and verify that it matches This is a debugging tool which will show which cells evaluate differently than they do for excel. :param output_addrs: The cells to evaluate from (defaults to all) :param sheet: The sheet to evaluate from (defaults to all) :param verify_tree: Follow the tree to any precedent nodes :return: dict of addresses with good/bad values that failed to verify """ def close_enough(val1, val2): import pytest if isinstance(val1, (int, float)) and \ isinstance(val2, (int, float)): return val2 == pytest.approx(val1) else: return val1 == val2 Mismatch = collections.namedtuple('Mismatch', 'original calced formula') if output_addrs is None: to_verify = list(self.formula_cells(sheet)) print('Found {} formulas to evaluate'.format(len(to_verify))) elif list_like(output_addrs): to_verify = [AddressCell(addr) for addr in flatten(output_addrs)] else: to_verify = [AddressCell(output_addrs)] verified = set() failed = {} while to_verify: addr = to_verify.pop() if len(to_verify) % 100 == 0: print("{} formulas left to process".format(len(to_verify))) try: self._gen_graph(addr) cell = self.cell_map[addr.address] if isinstance(cell, _Cell) and cell.python_code: original_value = cell.value if original_value == str(cell.formula): self.log.debug( "No Orig data?: {}: {}".format(addr, cell.value)) continue cell.value = None self._evaluate(addr.address) if not (original_value is None or close_enough(original_value, cell.value)): failed.setdefault('mismatch', {})[str(addr)] = Mismatch( original_value, cell.value, cell.formula.base_formula) print('{} mismatch {} -> {} {}'.format( addr, original_value, cell.value, cell.formula.base_formula)) # do it again to allow easy breakpointing cell.value = None self._evaluate(cell.address.address) verified.add(addr) if verify_tree: # pragma: no branch for addr in cell.needed_addresses: if addr not in verified: # pragma: no branch to_verify.append(addr) except Exception as exc: cell = self.cell_map.get(addr.address, None) formula = cell and cell.formula.base_formula exc_str = str(exc) exc_str_split = exc_str.split('\n') if 'is not implemented' in exc_str: exc_str_key = exc_str.split('is not implemented')[0] exc_str_key = exc_str_key.strip().rsplit(' ', 1)[1].upper() not_implemented = True else: if len(exc_str_split) == 1: exc_str_key = '{}: {}'.format( type(exc).__name__, exc_str) else: exc_str_key = exc_str_split[-2] # pragma: no cover not_implemented = exc_str_key.startswith( 'NotImplementedError: ') if not_implemented: failed.setdefault('not-implemented', {}).setdefault( exc_str_key, []).append((str(addr), formula, exc_str)) else: failed.setdefault('exceptions', {}).setdefault( exc_str_key, []).append((str(addr), formula, exc_str)) return failed
def validate_calcs(self, output_addrs=None, sheet=None, verify_tree=True, tolerance=None, raise_exceptions=False): """For each address, calc the value, and verify that it matches This is a debugging tool which will show which cells evaluate differently than they do for excel. :param output_addrs: The cells to evaluate from (defaults to all) :param sheet: The sheet to evaluate from (defaults to all) :param verify_tree: Follow the tree to any precedent nodes :return: dict of addresses with good/bad values that failed to verify """ if output_addrs is None: to_verify = list(self.formula_cells(sheet)) print(f'Found {len(to_verify)} formulas to evaluate') elif list_like(output_addrs): to_verify = [AddressCell(addr) for addr in flatten(output_addrs)] else: to_verify = [AddressCell(output_addrs)] verified = set() failed = {} if self.cycles: iterative_eval_tracker(**self.cycles) while to_verify: addr = to_verify.pop() if len(to_verify) % 100 == 0: print(f"{len(to_verify)} formulas left to process") try: self._gen_graph(addr) cell = self.cell_map[addr.address] if isinstance(cell, _Cell) and cell.python_code and ( not cell.address.is_unbounded_range): original_value = cell.value if original_value == str(cell.formula): self.log.debug(f"No Orig data?: {addr}: {cell.value}") continue cell.value = None self.evaluate(addr.address) if not (original_value is None or cell.close_enough( original_value, tol=tolerance)): failed.setdefault('mismatch', {})[str(addr)] = Mismatch( original_value, cell.value, cell.formula.base_formula) print('{} mismatch {} -> {} {}'.format( addr, original_value, cell.value, cell.formula.base_formula)) # do it again to allow easy break-pointing cell.value = None self.evaluate(cell.address.address) verified.add(addr) if verify_tree: # pragma: no branch for addr in cell.needed_addresses: if addr not in verified: # pragma: no branch to_verify.append(addr) except Exception as exc: if raise_exceptions: raise cell = self.cell_map.get(addr.address, None) formula = cell and cell.formula.base_formula exc_str = str(exc) exc_str_split = exc_str.split('\n') if 'is not implemented' in exc_str: exc_str_key = exc_str.split('is not implemented')[0] exc_str_key = exc_str_key.strip().rsplit(' ', 1)[1].upper() not_implemented = True else: if len(exc_str_split) == 1: exc_str_key = f'{type(exc).__name__}: {exc_str}' else: exc_str_key = exc_str_split[-2] # pragma: no cover not_implemented = exc_str_key.startswith( 'NotImplementedError: ') if not_implemented: failed.setdefault('not-implemented', {}).setdefault(exc_str_key, []).append( (str(addr), formula, exc_str)) else: failed.setdefault('exceptions', {}).setdefault(exc_str_key, []).append( (str(addr), formula, exc_str)) return failed
def xlog(value): if list_like(value): return [math_wrap(log)(x) for x in flatten(value)] else: return math_wrap(log)(value)
def index(array, row_num, col_num=None): # Excel reference: https://support.microsoft.com/en-us/office/ # index-function-a5dcf0dd-996d-40a4-a822-b56b061328bd if not list_like(array): if array in ERROR_CODES: return array else: return VALUE_ERROR if not list_like(array[0]): return VALUE_ERROR if is_address(array[0][0]): assert len({a for a in flatten(array)}) == 1 _C_ = index.excel_func_meta['name_space']['_C_'] ref_addr = array[0][0].address_at_offset else: ref_addr = None def array_data(row, col): if ref_addr: return _C_(ref_addr(row, col).address) else: return array[row][col] try: # rectangular array if row_num and col_num: if row_num < 0 or col_num < 0: return VALUE_ERROR else: return array_data(row_num - 1, col_num - 1) elif row_num: if row_num < 0: return VALUE_ERROR elif len(array[0]) == 1: return array_data(row_num - 1, 0) elif len(array) == 1: return array_data(0, row_num - 1) elif isinstance(array, np.ndarray): return array[row_num - 1, :] else: return (tuple( array_data(row_num - 1, col) for col in range(len(array[0]))), ) elif col_num: if col_num < 0: return VALUE_ERROR elif len(array) == 1: return array_data(0, col_num - 1) elif len(array[0]) == 1: return array_data(col_num - 1, 0) elif isinstance(array, np.ndarray): result = array[:, col_num - 1] result.shape = result.shape + (1, ) return result else: return tuple((array_data(row, col_num - 1), ) for row in range(len(array))) except IndexError: return REF_ERROR else: return array
def xlog(a): if isinstance(a,(list,tuple,np.ndarray)): return [log(x) for x in flatten(a)] else: #print a return log(a)