class ExcelCompiler(object): """Class responsible for taking cells and named_range and create a graph that can be serialized to disk, and executed independently of excel. """ def __init__(self, file, ignore_sheets=[], ignore_hidden=False, debug=False): warnings.warn( "The ExcelCompiler class will disappear in a future version. Please use Spreadsheet instead.", PendingDeprecationWarning) self.spreadsheet = Spreadsheet(file=file, ignore_sheets=ignore_sheets, ignore_hidden=ignore_hidden, debug=debug) def clean_pointer(self): warnings.warn( "The ExcelCompiler class will disappear in a future version. Please use Spreadsheet.clean_pointer instead.", PendingDeprecationWarning) self.spreadsheet.clean_pointer() def gen_graph(self, outputs=[], inputs=[]): warnings.warn( "The ExcelCompiler class will disappear in a future version. Please use Spreadsheet.gen_graph() instead. " "Please also note that this function is now included in the init of Spreadsheet and therefore it shouldn't " "be called as such anymore.", PendingDeprecationWarning) return self.spreadsheet
def clean_pointer(self): sp = Spreadsheet(networkx.DiGraph(),self.cells, self.named_ranges, debug = self.debug) cleaned_cells, cleaned_ranged_names = sp.clean_pointer() self.cells = cleaned_cells self.named_ranges = cleaned_ranged_names self.pointers = set()
def clean_pointer(self): sp = Spreadsheet(networkx.DiGraph(), self.cells, self.named_ranges, debug=self.debug) cleaned_cells, cleaned_ranged_names = sp.clean_pointer() self.cells = cleaned_cells self.named_ranges = cleaned_ranged_names self.pointers = set()
def clean_volatile(self, subset, orig_sp): G = orig_sp.G cells = orig_sp.cellmap named_ranges = orig_sp.named_ranges sp = Spreadsheet(G,cells, named_ranges, debug = self.debug) cleaned_cells, cleaned_ranged_names = sp.clean_volatile(subset) self.cells = cleaned_cells self.named_ranges = cleaned_ranged_names self.volatiles = set()
def __init__(self, file, ignore_sheets=[], ignore_hidden=False, debug=False): warnings.warn( "The ExcelCompiler class will disappear in a future version. Please use Spreadsheet instead.", PendingDeprecationWarning) self.spreadsheet = Spreadsheet(file=file, ignore_sheets=ignore_sheets, ignore_hidden=ignore_hidden, debug=debug)
def reload_koala(file_name, ignore_sheets=None, bootstrap_equations=None): """Loads the Excel workbook into a koala Spreadsheet object""" global excel_compiler print("Loading workbook") excel_compiler = Spreadsheet.from_file_name(file_name, ignore_sheets=ignore_sheets) excel_compiler.clean_pointer() print("Workbook '%s' has been loaded." % file_name) print("Ignored worksheets %s" % ignore_sheets)
def __init__(self, file, ignore_sheets=[], ignore_hidden=False, debug=False): # print("___### Initializing Excel Compiler ###___") warnings.warn( "The ExcelCompiler class will disappear in a future version. Please use Spreadsheet instead.", PendingDeprecationWarning) self.sp = Spreadsheet.from_file_name(os.path.abspath(file), ignore_sheets=ignore_sheets, ignore_hidden=ignore_hidden, debug=debug, excel_compiler=True)
def run_model(model_id, input_dict, output_names): """ Load the model, set the inputs and return the calculated outputs TODO get this working roughly following these steps - Get serialised model from S3 - Load model with koala - Extract inputs from payload - Set the inputs in the model - Extract required outputs from payload (all outputs if none specifically requested) - Get the required outputs from the model - Build and return response """ # see if compiled model compiled try: compliled_string = bucket.Object( 'compiled_models/{}'.format(model_id)).get()['Body'].read() except botocore.exceptions.ClientError as err: return err.response['Error']['Code'] # XXX HACK Workaround needed for koala spreadsheet loading API # - need to write the file to a temp location for koala to read it... # - FIX: update koala.Spreadsheet / koala.serialize to take the file contents in directly if not os.path.exists('/tmp'): os.mkdir('/tmp') dummy_file_name = '/tmp/temp_{}.gzip'.format(model_id) with open(dummy_file_name, 'wb') as f: f.write(compliled_string) sheet = Spreadsheet.load(dummy_file_name) for name, value in input_dict.iteritems(): sheet.set_value(name, value) results = {} for name in output_names: output_value = sheet.evaluate(name) if isinstance(output_value, ExcelError): output_value = str(output_value) results[name] = output_value # Cleanup previous workaround os.remove(dummy_file_name) if not os.listdir('/tmp'): os.rmdir('/tmp') return results
def gen_graph(self, outputs=[], inputs=[]): print '___### Generating Graph ###___' if len(outputs) == 0: preseeds = set( list(flatten(self.cells.keys())) + self.named_ranges.keys()) # to have unicity else: preseeds = set(outputs) preseeds = list(preseeds) # to be able to modify the list seeds = [] for o in preseeds: if o in self.named_ranges: reference = self.named_ranges[o] if is_range(reference): if 'OFFSET' in reference or 'INDEX' in reference: start_end = prepare_pointer(reference, self.named_ranges) rng = self.Range(start_end) self.pointers.add(o) else: rng = self.Range(reference) for address in rng.addresses: # this is avoid pruning deletion preseeds.append(address) virtual_cell = Cell(o, None, value=rng, formula=reference, is_range=True, is_named_range=True) seeds.append(virtual_cell) else: # might need to be changed to actual self.cells Cell, not a copy if 'OFFSET' in reference or 'INDEX' in reference: self.pointers.add(o) value = self.cells[ reference].value if reference in self.cells else None virtual_cell = Cell(o, None, value=value, formula=reference, is_range=False, is_named_range=True) seeds.append(virtual_cell) else: if is_range(o): rng = self.Range(o) for address in rng.addresses: # this is avoid pruning deletion preseeds.append(address) virtual_cell = Cell(o, None, value=rng, formula=o, is_range=True, is_named_range=True) seeds.append(virtual_cell) else: seeds.append(self.cells[o]) seeds = set(seeds) print "Seeds %s cells" % len(seeds) outputs = set(preseeds) if len(outputs) > 0 else [ ] # seeds and outputs are the same when you don't specify outputs cellmap, G = graph_from_seeds(seeds, self) if len( inputs ) != 0: # otherwise, we'll set inputs to cellmap inside Spreadsheet inputs = list(set(inputs)) # add inputs that are outside of calculation chain for i in inputs: if i not in cellmap: if i in self.named_ranges: reference = self.named_ranges[i] if is_range(reference): rng = self.Range(reference) for address in rng.addresses: # this is avoid pruning deletion inputs.append(address) virtual_cell = Cell(i, None, value=rng, formula=reference, is_range=True, is_named_range=True) cellmap[i] = virtual_cell G.add_node( virtual_cell ) # edges are not needed here since the input here is not in the calculation chain else: # might need to be changed to actual self.cells Cell, not a copy virtual_cell = Cell( i, None, value=self.cells[reference].value, formula=reference, is_range=False, is_named_range=True) cellmap[i] = virtual_cell G.add_node( virtual_cell ) # edges are not needed here since the input here is not in the calculation chain else: if is_range(i): rng = self.Range(i) for address in rng.addresses: # this is avoid pruning deletion inputs.append(address) virtual_cell = Cell(i, None, value=rng, formula=o, is_range=True, is_named_range=True) cellmap[i] = virtual_cell G.add_node( virtual_cell ) # edges are not needed here since the input here is not in the calculation chain else: cellmap[i] = self.cells[i] G.add_node( self.cells[i] ) # edges are not needed here since the input here is not in the calculation chain inputs = set(inputs) print "Graph construction done, %s nodes, %s edges, %s cellmap entries" % ( len(G.nodes()), len(G.edges()), len(cellmap)) # undirected = networkx.Graph(G) # print "Number of connected components %s", str(number_connected_components(undirected)) return Spreadsheet(G, cellmap, self.named_ranges, pointers=self.pointers, outputs=outputs, inputs=inputs, debug=self.debug)
from koala.ExcelCompiler import ExcelCompiler from koala.Spreadsheet import Spreadsheet filename = "./examples/basic.xlsx" print(filename) ### Graph Generation ### c = ExcelCompiler(filename) sp = c.gen_graph() ## Graph Serialization ### print("Serializing to disk...") sp.dump(filename.replace("xlsx", "gzip")) ### Graph Loading ### print("Reading from disk...") sp = Spreadsheet.load(filename.replace("xlsx", "gzip")) ### Graph Evaluation ### sp.set_value('Sheet1!A1', 10) print('New D1 value: %s' % str(sp.evaluate('Sheet1!D1')))
from koala.ExcelCompiler import ExcelCompiler from koala.Spreadsheet import Spreadsheet file = "./examples/basic.xlsx" print file ### Graph Generation ### c = ExcelCompiler(file) sp = c.gen_graph() ## Graph Serialization ### print "Serializing to disk..." sp.dump(file.replace("xlsx", "gzip")) ### Graph Loading ### print "Reading from disk..." sp = Spreadsheet.load(file.replace("xlsx", "gzip")) ### Graph Evaluation ### sp.set_value('Sheet1!A1', 10) print 'New D1 value: %s' % str(sp.evaluate('Sheet1!D1'))