def cell_add(self, cell, value=None): """ Adds a cell to the Spreadsheet. Either the cell argument can be specified, or any combination of the other arguments. :param address: the address of the cell :param cell: a Cell object to add :param value: (optional) a new value for the cell. In this case, the first argument cell is processed as an address. :param formula: """ if type(cell) != Cell: cell = Cell(cell, None, value=value, formula=None, is_range=False, is_named_range=False) addr = cell.address() if addr in self.cellmap: raise Exception('Cell %s already in cellmap' % addr) cellmap, G = graph_from_seeds([cell], self) self.cellmap = cellmap self.G = G print( "Graph construction updated, %s nodes, %s edges, %s cellmap entries" % (len(G.nodes()), len(G.edges()), len(cellmap)))
def cell_set_value(self, address, val): """ Set the value of a cell :param address: the address of a cell :param value: the new value """ self.reset_buffer = set() try: address = address.replace('$', '') address = address.replace("'", '') cell = self.cellmap[address] # when you set a value on cell, its should_eval flag is set to 'never' so its formula is not used until set free again => sp.activate_formula() self.cell_fix(address) # case where the address refers to a range if cell.is_range: cells_to_set = [] if not isinstance(val, list): val = [val] * len(cells_to_set) self.cell_reset(cell) cell.range.values = val # case where the address refers to a single value else: if address in self.named_ranges: # if the cell is a named range, we need to update and fix the reference cell ref_address = self.named_ranges[address] if ref_address in self.cellmap: ref_cell = self.cellmap[ref_address] else: ref_cell = Cell(ref_address, None, value=val, formula=None, is_range=False, is_named_range=False) self.cell_add(ref_cell) ref_cell.value = val if cell.value != val: if cell.value is None: cell.value = 'notNone' # hack to avoid the direct return in reset() when value is None # reset the node + its dependencies self.cell_reset(cell) # set the value cell.value = val for vol in self.pointers_to_reset: # reset all pointers self.cell_reset(self.cellmap[vol]) except KeyError: raise Exception('Cell %s not in cellmap' % address)
def set_value(self, address, val): self.reset_buffer = set() if address not in self.cellmap: raise Exception("Address not present in graph.") address = address.replace('$', '') cell = self.cellmap[address] # when you set a value on cell, its should_eval flag is set to 'never' so its formula is not used until set free again => sp.activate_formula() self.fix_cell(address) # case where the address refers to a range if self.cellmap[address].is_range: cells_to_set = [] # for a in self.cellmap[address].range.addresses: # if a in self.cellmap: # cells_to_set.append(self.cellmap[a]) # self.fix_cell(a) if type(val) != list: val = [val] * len(cells_to_set) self.reset(cell) cell.range.values = val # case where the address refers to a single value else: if address in self.named_ranges: # if the cell is a named range, we need to update and fix the reference cell ref_address = self.named_ranges[address] if ref_address in self.cellmap: ref_cell = self.cellmap[ref_address] else: ref_cell = Cell(ref_address, None, value=val, formula=None, is_range=False, is_named_range=False) self.add_cell(ref_cell) # self.fix_cell(ref_address) ref_cell.value = val if cell.value != val: if cell.value is None: cell.value = 'notNone' # hack to avoid the direct return in reset() when value is None # reset the node + its dependencies self.reset(cell) # set the value cell.value = val for vol in self.pointers_to_reset: # reset all pointers self.reset(self.cellmap[vol])
def cell_from_dict(d): cell_is_range = type(d["value"]) == dict if cell_is_range: range = d["value"] if len(range["values"]) == 0: range["values"] = [None] * len(range["cells"]) value = RangeCore(range["cells"], range["values"], nrows = range["nrows"], ncols = range["ncols"]) else: value = d["value"] new_cell = Cell(d["address"], None, value=value, formula=d["formula"], is_range = cell_is_range, is_named_range=d["is_named_range"], should_eval=d["should_eval"]) new_cell.python_expression = d["python_expression"] new_cell.compile() return {"id": new_cell}
def set_value(self, address, val): self.reset_buffer = set() if address not in self.cellmap: raise Exception("Address not present in graph.") address = address.replace('$','') cell = self.cellmap[address] # when you set a value on cell, its should_eval flag is set to 'never' so its formula is not used until set free again => sp.activate_formula() self.fix_cell(address) # case where the address refers to a range if self.cellmap[address].is_range: cells_to_set = [] # for a in self.cellmap[address].range.addresses: # if a in self.cellmap: # cells_to_set.append(self.cellmap[a]) # self.fix_cell(a) if type(val) != list: val = [val]*len(cells_to_set) self.reset(cell) cell.range.values = val # case where the address refers to a single value else: if address in self.named_ranges: # if the cell is a named range, we need to update and fix the reference cell ref_address = self.named_ranges[address] if ref_address in self.cellmap: ref_cell = self.cellmap[ref_address] else: ref_cell = Cell(ref_address, None, value = val, formula = None, is_range = False, is_named_range = False ) self.add_cell(ref_cell) # self.fix_cell(ref_address) ref_cell.value = val if cell.value != val: if cell.value is None: cell.value = 'notNone' # hack to avoid the direct return in reset() when value is None # reset the node + its dependencies self.reset(cell) # set the value cell.value = val for vol_range in self.volatiles: # reset all volatiles self.reset(self.cellmap[vol_range])
def add_cell(self, cell, value = None): if type(cell) != Cell: cell = Cell(cell, None, value = value, formula = None, is_range = False, is_named_range = False) addr = cell.address() if addr in self.cellmap: raise Exception('Cell %s already in cellmap' % addr) cellmap, G = graph_from_seeds([cell], self) self.cellmap = cellmap self.G = G print "Graph construction updated, %s nodes, %s edges, %s cellmap entries" % (len(G.nodes()),len(G.edges()),len(cellmap))
def add_cell(self, cell, value=None): if type(cell) != Cell: cell = Cell(cell, None, value=value, formula=None, is_range=False, is_named_range=False) addr = cell.address() if addr in self.cellmap: raise Exception('Cell %s already in cellmap' % addr) cellmap, G = graph_from_seeds([cell], self) self.cellmap = cellmap self.G = G print "Graph construction updated, %s nodes, %s edges, %s cellmap entries" % ( len(G.nodes()), len(G.edges()), len(cellmap))
def graph_from_seeds(seeds, cell_source): """ This creates/updates a networkx graph from a list of cells. The graph is created when the cell_source is an instance of ExcelCompiler The graph is updated when the cell_source is an instance of Spreadsheet """ # when called from Spreadsheet instance, use the Spreadsheet cellmap and graph if hasattr(cell_source, 'G'): # ~ cell_source is a Spreadsheet cellmap = cell_source.cellmap cells = cellmap G = cell_source.G for c in seeds: G.add_node(c) cellmap[c.address()] = c # when called from ExcelCompiler instance, construct cellmap and graph from seeds else: # ~ cell_source is a ExcelCompiler cellmap = dict([(x.address(), x) for x in seeds]) cells = cell_source.cells # directed graph G = networkx.DiGraph() # match the info in cellmap for c in cellmap.values(): G.add_node(c) # cells to analyze: only formulas todo = [s for s in seeds if s.formula] steps = [i for i, s in enumerate(todo)] names = cell_source.named_ranges while todo: c1 = todo.pop() step = steps.pop() cursheet = c1.sheet ###### 1) looking for cell c1 dependencies #################### # print 'C1', c1.address() # in case a formula, get all cells that are arguments pystr, ast = cell2code(c1, names) # set the code & compile it (will flag problems sooner rather than later) c1.python_expression = pystr.replace('"', "'") # compilation is done later if 'OFFSET' in c1.formula or 'INDEX' in c1.formula: if c1.address( ) not in cell_source.named_ranges: # pointers names already treated in ExcelCompiler cell_source.pointers.add(c1.address()) # get all the cells/ranges this formula refers to deps = [x for x in ast.nodes() if isinstance(x, RangeNode)] # remove dupes deps = uniqueify(deps) ###### 2) connect dependencies in cells in graph #################### # ### LOG # tmp = [] # for dep in deps: # if dep not in names: # if "!" not in dep and cursheet != None: # dep = cursheet + "!" + dep # if dep not in cellmap: # tmp.append(dep) # #deps = tmp # logStep = "%s %s = %s " % ('|'*step, c1.address(), '',) # print logStep # if len(deps) > 1 and 'L' in deps[0] and deps[0] == deps[-1].replace('DG','L'): # print logStep, "[%s...%s]" % (deps[0], deps[-1]) # elif len(deps) > 0: # print logStep, "->", deps # else: # print logStep, "done" for dep in deps: dep_name = dep.tvalue.replace('$', '') # this is to avoid :A1 or A1: dep due to clean_pointers() returning an ExcelError if dep_name.startswith(':') or dep_name.endswith(':'): dep_name = dep_name.replace(':', '') # if not pointer, we need an absolute address if dep.tsubtype != 'pointer' and dep_name not in names and "!" not in dep_name and cursheet != None: dep_name = cursheet + "!" + dep_name # Named_ranges + ranges already parsed (previous iterations) if dep_name in cellmap: origins = [cellmap[dep_name]] target = cellmap[c1.address()] # if the dep_name is a multi-cell range, create a range object elif is_range(dep_name) or (dep_name in names and is_range(names[dep_name])): if dep_name in names: reference = names[dep_name] else: reference = dep_name if 'OFFSET' in reference or 'INDEX' in reference: start_end = prepare_pointer(reference, names, ref_cell=c1) rng = cell_source.range(start_end) if dep_name in names: # dep is a pointer range address = dep_name else: if c1.address( ) in names: # c1 holds is a pointer range address = c1.address() else: # a pointer range with no name, its address will be its name address = '%s:%s' % (start_end["start"], start_end["end"]) cell_source.pointers.add(address) else: address = dep_name # get a list of the addresses in this range that are not yet in the graph range_addresses = list( resolve_range(reference, should_flatten=True)[0]) cellmap_add_addresses = [ addr for addr in range_addresses if addr not in cellmap.keys() ] if len(cellmap_add_addresses) > 0: # this means there are cells to be added # get row and col dimensions for the sheet, assuming the whole range is in one sheet sheet_initial = split_address( cellmap_add_addresses[0])[0] max_rows, max_cols = max_dimension( cellmap, sheet_initial) # create empty cells that aren't in the cellmap for addr in cellmap_add_addresses: sheet_new, col_new, row_new = split_address(addr) # if somehow a new sheet comes up in the range, get the new dimensions if sheet_new != sheet_initial: sheet_initial = sheet_new max_rows, max_cols = max_dimension( cellmap, sheet_new) # add the empty cells if int(row_new) <= max_rows and int( col2num(col_new)) <= max_cols: # only add cells within the maximum bounds of the sheet to avoid too many evaluations # for A:A or 1:1 ranges cell_new = Cell(addr, sheet_new, value="", should_eval='False' ) # create new cell object cellmap[ addr] = cell_new # add it to the cellmap G.add_node(cell_new) # add it to the graph cell_source.cells[ addr] = cell_new # add it to the cell_source, used in this function rng = cell_source.range(reference) if address in cellmap: virtual_cell = cellmap[address] else: virtual_cell = Cell(address, None, value=rng, formula=reference, is_range=True, is_named_range=True) # save the range cellmap[address] = virtual_cell # add an edge from the range to the parent G.add_node(virtual_cell) # Cell(A1:A10) -> c1 or Cell(ExampleName) -> c1 G.add_edge(virtual_cell, c1) # cells in the range should point to the range as their parent target = virtual_cell origins = [] if len( list(rng.keys()) ) != 0: # could be better, but can't check on Exception types here... for child in rng.addresses: if child not in cellmap: origins.append(cells[child]) else: origins.append(cellmap[child]) else: # not a range if dep_name in names: reference = names[dep_name] else: reference = dep_name if reference in cells: if dep_name in names: virtual_cell = Cell(dep_name, None, value=cells[reference].value, formula=reference, is_range=False, is_named_range=True) G.add_node(virtual_cell) G.add_edge(cells[reference], virtual_cell) origins = [virtual_cell] else: cell = cells[reference] origins = [cell] cell = origins[0] if cell.formula is not None and ('OFFSET' in cell.formula or 'INDEX' in cell.formula): cell_source.pointers.add(cell.address()) else: virtual_cell = Cell(dep_name, None, value=None, formula=None, is_range=False, is_named_range=True) origins = [virtual_cell] target = c1 # process each cell for c2 in flatten(origins): # if we havent treated this cell allready if c2.address() not in cellmap: if c2.formula: # cell with a formula, needs to be added to the todo list todo.append(c2) steps.append(step + 1) else: # constant cell, no need for further processing, just remember to set the code pystr, ast = cell2code(c2, names) c2.python_expression = pystr c2.compile() # save in the cellmap cellmap[c2.address()] = c2 # add to the graph G.add_node(c2) # add an edge from the cell to the parent (range or cell) if (target != []): # print "Adding edge %s --> %s" % (c2.address(), target.address()) G.add_edge(c2, target) c1.compile( ) # cell compilation is done here because pointer ranges might update python_expressions return (cellmap, G)
def load(fname): def clean_bool(string): if string == "0": return None else: return string def to_bool(string): if string == "1" or string == "True": return True elif string == "0" or string == "False": return False else: return string def to_float(string): if string == "None": return None try: return float(string) except: return string mode = "node0" nodes = [] edges = [] volatiles = set() outputs = None inputs = None named_ranges = {} infile = gzip.GzipFile(fname, 'r') for line in infile.read().splitlines(): if line == "====": mode = "node0" continue if line == "-----": cellmap_temp = {n.address(): n for n in nodes} Range = RangeFactory(cellmap_temp) mode = "node0" continue elif line == "edges": cellmap = {n.address(): n for n in nodes} mode = "edges" continue elif line == "outputs": mode = "outputs" continue elif line == "inputs": mode = "inputs" continue elif line == "named_ranges": mode = "named_ranges" continue if mode == "node0": [address, formula, python_expression, is_range, is_named_range, is_volatile, should_eval] = line.split(SEP) formula = clean_bool(formula) python_expression = clean_bool(python_expression) is_range = to_bool(is_range) is_named_range = to_bool(is_named_range) is_volatile = to_bool(is_volatile) should_eval = should_eval mode = "node1" elif mode == "node1": if is_range: reference = json.loads(line) if is_volatile else line # in order to be able to parse dicts vv = Range(reference) if is_volatile: if not is_named_range: address = vv.name volatiles.add(address) cell = Cell(address, None, vv, formula, is_range, is_named_range, should_eval) cell.python_expression = python_expression nodes.append(cell) else: value = to_bool(to_float(line)) cell = Cell(address, None, value, formula, is_range, is_named_range, should_eval) cell.python_expression = python_expression if formula: if 'OFFSET' in formula or 'INDEX' in formula: volatiles.add(address) cell.compile() nodes.append(cell) elif mode == "edges": source, target = line.split(SEP) edges.append((cellmap[source], cellmap[target])) elif mode == "outputs": outputs = line.split(SEP) elif mode == "inputs": inputs = line.split(SEP) elif mode == "named_ranges": k,v = line.split(SEP) named_ranges[k] = v G = DiGraph(data = edges) print "Graph loading done, %s nodes, %s edges, %s cellmap entries" % (len(G.nodes()),len(G.edges()),len(cellmap)) return (G, cellmap, named_ranges, volatiles, outputs, inputs)
def clean_pointer(self): print '___### Cleaning Pointers ###___' new_named_ranges = self.named_ranges.copy() new_cells = self.cellmap.copy() ### 1) create ranges for n in self.named_ranges: reference = self.named_ranges[n] if is_range(reference): if 'OFFSET' not in reference: my_range = self.Range(reference) self.cellmap[n] = Cell(n, None, value=my_range, formula=reference, is_range=True, is_named_range=True) else: self.cellmap[n] = Cell(n, None, value=None, formula=reference, is_range=False, is_named_range=True) else: if reference in self.cellmap: self.cellmap[n] = Cell(n, None, value=self.cellmap[reference].value, formula=reference, is_range=False, is_named_range=True) else: self.cellmap[n] = Cell(n, None, value=None, formula=reference, is_range=False, is_named_range=True) ### 2) gather all occurence of pointer functions in cells or named_range all_pointers = set() for pointer_name in self.pointer_to_remove: for k, v in self.named_ranges.items(): if pointer_name in v: all_pointers.add((v, k, None)) for k, cell in self.cellmap.items(): if cell.formula and pointer_name in cell.formula: all_pointers.add( (cell.formula, cell.address(), cell.sheet)) # print "%s %s to parse" % (str(len(all_pointers)), pointer_name) ### 3) evaluate all pointers for formula, address, sheet in all_pointers: if sheet: parsed = parse_cell_address(address) else: parsed = "" e = shunting_yard(formula, self.named_ranges, ref=parsed, tokenize_range=True) ast, root = build_ast(e) code = root.emit(ast) cell = {"formula": formula, "address": address, "sheet": sheet} replacements = self.eval_pointers_from_ast(ast, root, cell) new_formula = formula if type(replacements) == list: for repl in replacements: if type(repl["value"]) == ExcelError: if self.debug: print 'WARNING: Excel error found => replacing with #N/A' repl["value"] = "#N/A" if repl["expression_type"] == "value": new_formula = new_formula.replace( repl["formula"], str(repl["value"])) else: new_formula = new_formula.replace( repl["formula"], repl["value"]) else: new_formula = None if address in new_named_ranges: new_named_ranges[address] = new_formula else: old_cell = self.cellmap[address] new_cells[address] = Cell( old_cell.address(), old_cell.sheet, value=old_cell.value, formula=new_formula, is_range=old_cell.is_range, is_named_range=old_cell.is_named_range, should_eval=old_cell.should_eval) return new_cells, new_named_ranges
def prune_graph(self): print '___### Pruning Graph ###___' G = self.G # get all the cells impacted by inputs dependencies = set() for input_address in self.inputs: child = self.cellmap[input_address] if child == None: print "Not found ", input_address continue g = make_subgraph(G, child, "descending") dependencies = dependencies.union(g.nodes()) # print "%s cells depending on inputs" % str(len(dependencies)) # prune the graph and set all cell independent of input to const subgraph = networkx.DiGraph() new_cellmap = {} for output_address in self.outputs: new_cellmap[output_address] = self.cellmap[output_address] seed = self.cellmap[output_address] todo = map(lambda n: (seed, n), G.predecessors(seed)) done = set(todo) while len(todo) > 0: current, pred = todo.pop() # print "===========================" # print current.address(), pred.address() if current in dependencies: if pred in dependencies or isinstance( pred.value, RangeCore) or pred.is_named_range: subgraph.add_edge(pred, current) new_cellmap[pred.address()] = pred new_cellmap[current.address()] = current nexts = G.predecessors(pred) for n in nexts: if (pred, n) not in done: todo += [(pred, n)] done.add((pred, n)) else: if pred.address() not in new_cellmap: const_node = Cell( pred.address(), pred.sheet, value=pred.range if pred.is_range else pred.value, formula=None, is_range=isinstance(pred.range, RangeCore), is_named_range=pred.is_named_range, should_eval=pred.should_eval) # pystr,ast = cell2code(self.named_ranges, const_node, pred.sheet) # const_node.python_expression = pystr # const_node.compile() new_cellmap[pred.address()] = const_node const_node = new_cellmap[pred.address()] subgraph.add_edge(const_node, current) else: # case of range independant of input, we add all children as const if pred.address() not in new_cellmap: const_node = Cell( pred.address(), pred.sheet, value=pred.range if pred.is_range else pred.value, formula=None, is_range=pred.is_range, is_named_range=pred.is_named_range, should_eval=pred.should_eval) # pystr,ast = cell2code(self.named_ranges, const_node, pred.sheet) # const_node.python_expression = pystr # const_node.compile() new_cellmap[pred.address()] = const_node const_node = new_cellmap[pred.address()] subgraph.add_edge(const_node, current) print "Graph pruning done, %s nodes, %s edges, %s cellmap entries" % ( len(subgraph.nodes()), len(subgraph.edges()), len(new_cellmap)) undirected = networkx.Graph(subgraph) # print "Number of connected components %s", str(number_connected_components(undirected)) # print map(lambda x: x.address(), subgraph.nodes()) # add back inputs that have been pruned because they are outside of calculation chain for i in self.inputs: if i not in new_cellmap: if i in self.named_ranges: reference = self.named_ranges[i] if is_range(reference): rng = self.Range(reference) virtual_cell = Cell(i, None, value=rng, formula=reference, is_range=True, is_named_range=True) new_cellmap[i] = virtual_cell subgraph.add_node( virtual_cell ) # edges are not needed here since the input here is not in the calculation chain else: # might need to be changed to actual self.cells Cell, not a copy virtual_cell = Cell( i, None, value=self.cellmap[reference].value, formula=reference, is_range=False, is_named_range=True) new_cellmap[i] = virtual_cell subgraph.add_node( virtual_cell ) # edges are not needed here since the input here is not in the calculation chain else: if is_range(i): rng = self.Range(i) virtual_cell = Cell(i, None, value=rng, formula=o, is_range=True, is_named_range=True) new_cellmap[i] = virtual_cell subgraph.add_node( virtual_cell ) # edges are not needed here since the input here is not in the calculation chain else: new_cellmap[i] = self.cellmap[i] subgraph.add_node( self.cellmap[i] ) # edges are not needed here since the input here is not in the calculation chain return Spreadsheet(subgraph, new_cellmap, self.named_ranges, self.pointers, self.outputs, self.inputs, debug=self.debug)
def test_Modify_graph(self): self.sp.add_cell(Cell('Sheet1!A4', formula='A1 + 10')) self.sp.set_value('Sheet1!A1', 3) self.assertEqual(self.sp.evaluate('Sheet1!A4'), 13)
def read_cells(archive, ignore_sheets=[], ignore_hidden=False): global debug print('___### Reading Cells from XLSX ###___') cells = {} functions = set() cts = dict(read_content_types(archive)) strings_path = cts.get( SHARED_STRINGS ) # source: https://bitbucket.org/openpyxl/openpyxl/src/93604327bce7aac5e8270674579af76d390e09c0/openpyxl/reader/excel.py?at=default&fileviewer=file-view-default if strings_path is not None: if strings_path.startswith("/"): strings_path = strings_path[1:] shared_strings = read_string_table(archive.read(strings_path)) else: shared_strings = [] for sheet in detect_worksheets(archive): sheet_name = sheet['title'] function_map = {} if sheet_name in ignore_sheets: continue root = fromstring( archive.read(sheet['path']) ) # it is necessary to use cElementTree from xml module, otherwise root.findall doesn't work as it should hidden_cols = False nb_hidden = 0 if ignore_hidden: hidden_col_min = None hidden_col_max = None for col in root.findall('.//{%s}cols/*' % SHEET_MAIN_NS): if 'hidden' in col.attrib and col.attrib['hidden'] == '1': hidden_cols = True hidden_col_min = int(col.attrib['min']) hidden_col_max = int(col.attrib['max']) for c in root.findall('.//{%s}c/*/..' % SHEET_MAIN_NS): cell_data_type = c.get('t', 'n') # if no type assigned, assign 'number' cell_address = c.attrib['r'] skip = False if hidden_cols: found = re.search(CELL_REF_RE, cell_address) col = col2num(found.group(1)) if col >= hidden_col_min and col <= hidden_col_max: nb_hidden += 1 skip = True if not skip: cell = { 'a': '%s!%s' % (sheet_name, cell_address), 'f': None, 'v': None } if debug: print('Cell', cell['a']) for child in c: child_data_type = child.get( 't', 'n') # if no type assigned, assign 'number' if child.tag == '{%s}f' % SHEET_MAIN_NS: if 'ref' in child.attrib: # the first cell of a shared formula has a 'ref' attribute if debug: print( '*** Found definition of shared formula ***', child.text, child.attrib['ref']) if "si" in child.attrib: function_map[child.attrib['si']] = ( child.attrib['ref'], Translator(str('=' + child.text), cell_address) ) # translator of openpyxl needs a unicode argument that starts with '=' # else: # print "Encountered cell with ref but not si: ", sheet_name, child.attrib['ref'] if child_data_type == 'shared': if debug: print( '*** Found child %s of shared formula %s ***' % (cell_address, child.attrib['si'])) ref = function_map[child.attrib['si']][0] formula = function_map[child.attrib['si']][1] translated = formula.translate_formula( cell_address) cell['f'] = translated[ 1:] # we need to get rid of the '=' else: cell['f'] = child.text elif child.tag == '{%s}v' % SHEET_MAIN_NS: if cell_data_type == 's' or cell_data_type == 'str': # value is a string try: # if it fails, it means that cell content is a string calculated from a formula cell['v'] = shared_strings[int(child.text)] except: cell['v'] = child.text elif cell_data_type == 'b': cell['v'] = bool(int(child.text)) elif cell_data_type == 'n': cell['v'] = _cast_number(child.text) elif child.text is None: continue if cell['f'] is not None: pattern = re.compile(r"([A-Z][A-Z0-9]*)\(") found = re.findall(pattern, cell['f']) map(lambda x: functions.add(x), found) if cell['f'] is not None or cell['v'] is not None: should_eval = 'always' if cell[ 'f'] is not None and 'OFFSET' in cell['f'] else 'normal' # cleaned_formula = cell['f'] cleaned_formula = cell['f'].replace( ", ", ",") if cell['f'] is not None else None if "!" in cell_address: cells[cell_address] = Cell(cell_address, sheet_name, value=cell['v'], formula=cleaned_formula, should_eval=should_eval) else: cells[sheet_name + "!" + cell_address] = Cell( cell_address, sheet_name, value=cell['v'], formula=cleaned_formula, should_eval=should_eval) if nb_hidden > 0: print('Ignored %i hidden cells in sheet %s' % (nb_hidden, sheet_name)) print('Nb of different functions %i' % len(functions)) print(functions) for f in functions: if f not in existing: print('== Missing function: %s' % f) return cells
def load(fname): def clean_bool(string): if string == "0": return None else: return string def to_bool(string): if string == "1" or string == "True": return True elif string == "0" or string == "False": return False else: return string def to_float(string): if string == "None": return None try: return float(string) except: return string mode = "node0" nodes = [] edges = [] pointers = set() outputs = None inputs = None named_ranges = {} infile = gzip.GzipFile(fname, 'r') for line in infile.read().splitlines(): if line == "====": mode = "node0" continue if line == "-----": cellmap_temp = {n.address(): n for n in nodes} Range = RangeFactory(cellmap_temp) mode = "node0" continue elif line == "edges": cellmap = {n.address(): n for n in nodes} mode = "edges" continue elif line == "outputs": mode = "outputs" continue elif line == "inputs": mode = "inputs" continue elif line == "named_ranges": mode = "named_ranges" continue if mode == "node0": [ address, formula, python_expression, is_range, is_named_range, is_pointer, should_eval ] = line.split(SEP) formula = clean_bool(formula) python_expression = clean_bool(python_expression) is_range = to_bool(is_range) is_named_range = to_bool(is_named_range) is_pointer = to_bool(is_pointer) should_eval = should_eval mode = "node1" elif mode == "node1": if is_range: reference = json.loads( line ) if is_pointer else line # in order to be able to parse dicts vv = Range(reference) if is_pointer: if not is_named_range: address = vv.name pointers.add(address) cell = Cell(address, None, vv, formula, is_range, is_named_range, should_eval) cell.python_expression = python_expression nodes.append(cell) else: value = to_bool(to_float(line)) cell = Cell(address, None, value, formula, is_range, is_named_range, should_eval) cell.python_expression = python_expression if formula: if 'OFFSET' in formula or 'INDEX' in formula: pointers.add(address) cell.compile() nodes.append(cell) elif mode == "edges": source, target = line.split(SEP) edges.append((cellmap[source], cellmap[target])) elif mode == "outputs": outputs = line.split(SEP) elif mode == "inputs": inputs = line.split(SEP) elif mode == "named_ranges": k, v = line.split(SEP) named_ranges[k] = v G = DiGraph(data=edges) print "Graph loading done, %s nodes, %s edges, %s cellmap entries" % (len( G.nodes()), len(G.edges()), len(cellmap)) return (G, cellmap, named_ranges, pointers, outputs, inputs)
def gen_graph(self, outputs=[], inputs=[]): """ Generate the contents of the Spreadsheet from the read cells in the binary files. Specifically this function generates the graph. :param outputs: can be used to specify the outputs. All not affected cells are removed from the graph. :param inputs: can be used to specify the inputs. All not affected cells are removed from the graph. """ # print('___### Generating Graph ###___') if len(outputs) == 0: preseeds = set( list(flatten(self.cellmap.keys())) + list(self.named_ranges.keys())) # to have unicity else: preseeds = set(outputs) preseeds = list(preseeds) # to be able to modify the list seeds = [] for o in preseeds: if o in self.named_ranges: reference = self.named_ranges[o] if is_range(reference): if 'OFFSET' in reference or 'INDEX' in reference: start_end = prepare_pointer(reference, self.named_ranges) rng = self.range(start_end) self.pointers.add(o) else: rng = self.range(reference) for address in rng.addresses: # this is avoid pruning deletion preseeds.append(address) virtual_cell = Cell(o, None, value=rng, formula=reference, is_range=True, is_named_range=True) seeds.append(virtual_cell) else: # might need to be changed to actual self.cells Cell, not a copy if 'OFFSET' in reference or 'INDEX' in reference: self.pointers.add(o) value = self.cellmap[ reference].value if reference in self.cellmap else None virtual_cell = Cell(o, None, value=value, formula=reference, is_range=False, is_named_range=True) seeds.append(virtual_cell) else: if is_range(o): rng = self.range(o) for address in rng.addresses: # this is avoid pruning deletion preseeds.append(address) virtual_cell = Cell(o, None, value=rng, formula=o, is_range=True, is_named_range=True) seeds.append(virtual_cell) else: seeds.append(self.cellmap[o]) seeds = set(seeds) # print("Seeds %s cells" % len(seeds)) outputs = set(preseeds) if len(outputs) > 0 else [ ] # seeds and outputs are the same when you don't specify outputs cellmap, G = graph_from_seeds(seeds, self) if len( inputs ) != 0: # otherwise, we'll set inputs to cellmap inside Spreadsheet inputs = list(set(inputs)) # add inputs that are outside of calculation chain for i in inputs: if i not in cellmap: if i in self.named_ranges: reference = self.named_ranges[i] if is_range(reference): rng = self.range(reference) for address in rng.addresses: # this is avoid pruning deletion inputs.append(address) virtual_cell = Cell(i, None, value=rng, formula=reference, is_range=True, is_named_range=True) cellmap[i] = virtual_cell G.add_node( virtual_cell ) # edges are not needed here since the input here is not in the calculation chain else: # might need to be changed to actual self.cells Cell, not a copy virtual_cell = Cell( i, None, value=self.cellmap[reference].value, formula=reference, is_range=False, is_named_range=True) cellmap[i] = virtual_cell G.add_node( virtual_cell ) # edges are not needed here since the input here is not in the calculation chain else: if is_range(i): rng = self.range(i) for address in rng.addresses: # this is avoid pruning deletion inputs.append(address) virtual_cell = Cell(i, None, value=rng, formula=o, is_range=True, is_named_range=True) cellmap[i] = virtual_cell G.add_node( virtual_cell ) # edges are not needed here since the input here is not in the calculation chain else: cellmap[i] = self.cellmap[i] G.add_node( self.cellmap[i] ) # edges are not needed here since the input here is not in the calculation chain inputs = set(inputs) # print("Graph construction done, %s nodes, %s edges, %s cellmap entries" % (len(G.nodes()),len(G.edges()),len(cellmap))) # undirected = networkx.Graph(G) # print "Number of connected components %s", str(number_connected_components(undirected)) return Spreadsheet(G, cellmap, self.named_ranges, pointers=self.pointers, outputs=outputs, inputs=inputs, debug=self.debug, excel_compiler=False)
def cell_from_dict(d): return Cell.from_dict(d, cellmap=cellmap)