Beispiel #1
0
def do_compilation(filename, seed, sheet=None):
    excel = ExcelComWrapper(filename, app=xl_app())
    c = ExcelCompiler(filename=filename, excel=excel)
    sp = c.gen_graph(seed, sheet=sheet)
    sp.save_to_file(filename + ".pickle")
    sp.export_to_gexf(filename + ".gexf")
    return sp
Beispiel #2
0
    def __init__(self, filename=None, excel=None, *args,**kwargs):

        super(ExcelCompiler,self).__init__()
        self.filename = filename
        
        if excel:
            # if we are running as an excel addin, this gets passed to us
            self.excel = excel
        else:
            # TODO: use a proper interface so we can (eventually) support loading from file (much faster)  Still need to find a good lib though.
            self.excel = ExcelWrapperImpl(filename=filename)
            self.excel.connect()
            
        self.log = logging.getLogger("decode.{0}".format(self.__class__.__name__))
Beispiel #3
0
    def __init__(self, filename=None, excel=None, *args,**kwargs):

        super(ExcelCompiler,self).__init__()
        self.filename = filename
        
        if excel:
            # if we are running as an excel addin, this gets passed to us
            self.excel = excel
        else:
            # TODO: use a proper interface so we can (eventually) support loading from file (much faster)  Still need to find a good lib though.
            self.excel = ExcelWrapperImpl(filename=filename)
            self.excel.connect()
            
        self.log = logging.getLogger("decode.{0}".format(self.__class__.__name__))
Beispiel #4
0
class ExcelCompiler(object):
    """Class responsible for taking an Excel spreadsheet and compiling it to a Spreadsheet instance
       that can be serialized to disk, and executed independently of excel.
    """

    def __init__(self, filename=None, excel=None, *args,**kwargs):

        super(ExcelCompiler,self).__init__()
        self.filename = filename
        
        if excel:
            # if we are running as an excel addin, this gets passed to us
            self.excel = excel
        else:
            # TODO: use a proper interface so we can (eventually) support loading from file (much faster)  Still need to find a good lib though.
            self.excel = ExcelWrapperImpl(filename=filename)
            self.excel.connect()
            
        self.log = logging.getLogger("decode.{0}".format(self.__class__.__name__))
        
    def cell2code(self,cell):
        """Generate python code for the given cell"""
        if cell.formula:
            e = shunting_yard(cell.formula or str(cell.value))
            ast,root = build_ast(e)
            code = root.emit(ast,context=Context(cell,self.excel))
        else:
            ast = None
            code = str('"' + cell.value + '"' if isinstance(cell.value,unicode) else cell.value)
            
        return code,ast

    def add_node_to_graph(self,G, n):
        G.add_node(n)
        G.node[n]['sheet'] = n.sheet
        
        if isinstance(n,Cell):
            G.node[n]['label'] = n.col + str(n.row)
        else:
            #strip the sheet
            G.node[n]['label'] = n.address()[n.address().find('!')+1:]
            
    def gen_graph(self, seed, sheet=None):
        """Given a starting point (e.g., A6, or A3:B7) on a particular sheet, generate
           a Spreadsheet instance that captures the logic and control flow of the equations."""

        # starting points
        cursheet = sheet if sheet else self.excel.get_active_sheet()
        self.excel.set_sheet(cursheet)
        
        seeds, nr, nc = Cell.make_cells(self.excel, seed, sheet=cursheet) # no need to output nr and nc here, since seed can be a list of unlinked cells
        seeds = list(flatten(seeds))
        
        print "Seed %s expanded into %s cells" % (seed,len(seeds))
        
        # only keep seeds with formulas or numbers
        seeds = [s for s in seeds if s.formula or isinstance(s.value,(int,float))]

        print "%s filtered seeds " % len(seeds)
        
        # cells to analyze: only formulas
        todo = [s for s in seeds if s.formula]

        print "%s cells on the todo list" % len(todo)

        # map of all cells
        cellmap = dict([(x.address(),x) for x in seeds])
        
        # directed graph
        G = nx.DiGraph()

        # match the info in cellmap
        for c in cellmap.itervalues(): self.add_node_to_graph(G, c)

        while todo:
            c1 = todo.pop()
            
            print "Handling ", c1.address()
            
            # set the current sheet so relative addresses resolve properly
            if c1.sheet != cursheet:
                cursheet = c1.sheet
                self.excel.set_sheet(cursheet)
            
            # parse the formula into code
            pystr,ast = self.cell2code(c1)

            # set the code & compile it (will flag problems sooner rather than later)
            c1.python_expression = pystr
            c1.compile()                
            
            # get all the cells/ranges this formula refers to
            deps = [x.tvalue.replace('$','') for x in ast.nodes() if isinstance(x,RangeNode)]
            
            # remove dupes
            deps = uniqueify(deps)
            
            for dep in deps:
                
                # if the dependency is a multi-cell range, create a range object
                if is_range(dep):
                    # this will make sure we always have an absolute address
                    rng = CellRange(dep,sheet=cursheet)
                    
                    if rng.address() in cellmap:
                        # already dealt with this range
                        # add an edge from the range to the parent
                        G.add_edge(cellmap[rng.address()],cellmap[c1.address()])
                        continue
                    else:
                        # turn into cell objects
                        cells,nrows,ncols = Cell.make_cells(self.excel,dep,sheet=cursheet)

                        # get the values so we can set the range value
                        if nrows == 1 or ncols == 1:
                            rng.value = [c.value for c in cells]
                        else:
                            rng.value = [ [c.value for c in cells[i]] for i in range(len(cells)) ] 

                        # save the range
                        cellmap[rng.address()] = rng
                        # add an edge from the range to the parent
                        self.add_node_to_graph(G, rng)
                        G.add_edge(rng,cellmap[c1.address()])
                        # cells in the range should point to the range as their parent
                        target = rng
                else:
                    # not a range, create the cell object
                    cells = [Cell.resolve_cell(self.excel, dep, sheet=cursheet)]
                    target = cellmap[c1.address()]

                # process each cell                    
                for c2 in flatten(cells):
                    # if we havent treated this cell allready
                    if c2.address() not in cellmap:
                        if c2.formula:
                            # cell with a formula, needs to be added to the todo list
                            todo.append(c2)
                            #print "appended ", c2.address()
                        else:
                            # constant cell, no need for further processing, just remember to set the code
                            pystr,ast = self.cell2code(c2)
                            c2.python_expression = pystr
                            c2.compile()     
                            #print "skipped ", c2.address()
                        
                        # save in the cellmap
                        cellmap[c2.address()] = c2
                        # add to the graph
                        self.add_node_to_graph(G, c2)
                        
                    # add an edge from the cell to the parent (range or cell)
                    G.add_edge(cellmap[c2.address()],target)
            
        print "Graph construction done, %s nodes, %s edges, %s cellmap entries" % (len(G.nodes()),len(G.edges()),len(cellmap))

        sp = Spreadsheet(G,cellmap)
        
        return sp
Beispiel #5
0
class ExcelCompiler(object):
    """Class responsible for taking an Excel spreadsheet and compiling it to a Spreadsheet instance
       that can be serialized to disk, and executed independently of excel.
    """
    def __init__(self, filename=None, excel=None, *args, **kwargs):

        super(ExcelCompiler, self).__init__()
        self.filename = filename

        if excel:
            # if we are running as an excel addin, this gets passed to us
            self.excel = excel
        else:
            # TODO: use a proper interface so we can (eventually) support loading from file (much faster)  Still need to find a good lib though.
            self.excel = ExcelWrapperImpl(filename=filename)
            self.excel.connect()
            self.names = dict(
                map(lambda x: (x[1], x[2].replace("$", "")),
                    self.excel.rangednames))

        self.log = logging.getLogger("decode.{0}".format(
            self.__class__.__name__))

    def cell2code(self, cell):
        """Generate python code for the given cell"""
        if cell.formula:
            e = shunting_yard(cell.formula or str(cell.value), self.names)
            ast, root = build_ast(e)
            code = root.emit(ast, context=Context(cell, self.excel))
        else:
            ast = None
            code = str('"' + cell.value +
                       '"' if isinstance(cell.value, unicode) else cell.value)
        return code, ast

    def add_node_to_graph(self, G, n):
        G.add_node(n)
        G.node[n]['sheet'] = n.sheet

        if isinstance(n, Cell):
            G.node[n]['label'] = n.col + str(n.row)
        else:
            #strip the sheet
            G.node[n]['label'] = n.address()[n.address().find('!') + 1:]

    def gen_graph(self, seed, sheet=None):
        """Given a starting point (e.g., A6, or A3:B7) on a particular sheet, generate
           a Spreadsheet instance that captures the logic and control flow of the equations."""

        # starting points
        cursheet = sheet if sheet else self.excel.get_active_sheet()
        self.excel.set_sheet(cursheet)

        seeds, nr, nc = Cell.make_cells(self.excel, seed, sheet=cursheet)
        seeds = list(flatten(seeds))

        print "Seed %s expanded into %s cells" % (seed, len(seeds))

        # only keep seeds with formulas or numbers
        seeds = [
            s for s in seeds if s.formula or isinstance(s.value, (int, float))
        ]

        print "%s filtered seeds " % len(seeds)

        # cells to analyze: only formulas
        todo = [s for s in seeds if s.formula]

        print "%s cells on the todo list" % len(todo)

        # map of all cells
        cellmap = dict([(x.address(), x) for x in seeds])

        # directed graph
        G = nx.DiGraph()

        # match the info in cellmap
        for c in cellmap.itervalues():
            self.add_node_to_graph(G, c)

        while todo:
            c1 = todo.pop()

            print "Handling ", c1.address()
            print c1.formula

            # set the current sheet so relative addresses resolve properly
            if c1.sheet != cursheet:
                cursheet = c1.sheet
                self.excel.set_sheet(cursheet)

            # parse the formula into code
            pystr, ast = self.cell2code(c1)

            # set the code & compile it (will flag problems sooner rather than later)
            c1.python_expression = pystr
            c1.compile()
            print c1.python_expression

            # get all the cells/ranges this formula refers to
            deps = [
                x.tvalue.replace('$', '') for x in ast.nodes()
                if isinstance(x, RangeNode)
            ]

            # remove dupes
            deps = uniqueify(deps)

            for dep in deps:

                # if the dependency is a multi-cell range, create a range object
                if is_range(dep):
                    # this will make sure we always have an absolute address
                    rng = CellRange(dep, sheet=cursheet)

                    if rng.address() in cellmap:
                        # already dealt with this range
                        # add an edge from the range to the parent
                        G.add_edge(cellmap[rng.address()],
                                   cellmap[c1.address()])
                        continue
                    else:
                        # turn into cell objects
                        cells, nrows, ncols = Cell.make_cells(self.excel,
                                                              dep,
                                                              sheet=cursheet)

                        # get the values so we can set the range value
                        if nrows == 1 or ncols == 1:
                            rng.value = [c.value for c in cells]
                        else:
                            rng.value = [[c.value for c in cells[i]]
                                         for i in range(len(cells))]

                        # save the range
                        cellmap[rng.address()] = rng
                        # add an edge from the range to the parent
                        self.add_node_to_graph(G, rng)
                        G.add_edge(rng, cellmap[c1.address()])
                        # cells in the range should point to the range as their parent
                        target = rng
                else:
                    # not a range, create the cell object
                    cells = [
                        Cell.resolve_cell(self.excel, dep, sheet=cursheet)
                    ]
                    target = cellmap[c1.address()]

                # process each cell
                for c2 in flatten(cells):
                    # if we havent treated this cell allready
                    if c2.address() not in cellmap:
                        if c2.formula:
                            # cell with a formula, needs to be added to the todo list
                            todo.append(c2)
                            #print "appended ", c2.address()
                        else:
                            # constant cell, no need for further processing, just remember to set the code
                            pystr, ast = self.cell2code(c2)
                            c2.python_expression = pystr
                            c2.compile()
                            #print "skipped ", c2.address()

                        # save in the cellmap
                        cellmap[c2.address()] = c2
                        # add to the graph
                        self.add_node_to_graph(G, c2)

                    # add an edge from the cell to the parent (range or cell)
                    G.add_edge(cellmap[c2.address()], target)

        print "Graph construction done, %s nodes, %s edges, %s cellmap entries" % (
            len(G.nodes()), len(G.edges()), len(cellmap))

        sp = Spreadsheet(G, cellmap)

        return sp
Beispiel #6
0
import os
import sys

dir = os.path.dirname(__file__)
path = os.path.join(dir, '../src')
sys.path.insert(0, path)

# We will choose our wrapper with os compatibility
try:
    import win32com.client
    import pythoncom
    from pycel.excelwrapper import ExcelComWrapper as ExcelWrapperImpl
except:
    print "Can\'t import win32com -> switch from Com to Openpyxl wrapping implementation"
    from pycel.excelwrapper import ExcelOpxWrapper as ExcelWrapperImpl

# RUN AT THE ROOT LEVEL
excel = ExcelWrapperImpl(os.path.join(dir, "../example/example.xlsx"))
excel.connect()


def simple_offset():
    formula = excel.get_formula(3, 4)
    parsed = excel.OffsetParser.parseOffsets(formula, excel.get_sheet().title)
    assert parsed[0] == '=Sheet1!B2'


simple_offset()