Beispiel #1
0
    def emit(self, ast, context=None, pointer=False):
        if isinstance(self.tvalue, ExcelError):
            if self.debug:
                print('WARNING: Excel Error Code found', self.tvalue)
            return self.tvalue

        is_a_range = False
        is_a_named_range = self.tsubtype == "named_range"

        if is_a_named_range:
            my_str = '"%s"' % self.token.tvalue
        else:
            rng = self.tvalue.replace('$', '')
            sheet = context + "!" if context else ""

            is_a_range = is_range(rng)

            if self.tsubtype == 'pointer':
                my_str = '"' + rng + '"'
            else:
                if is_a_range:
                    sh, start, end = split_range(rng)
                else:
                    try:
                        sh, col, row = split_address(rng)
                    except:
                        if self.debug:
                            print(
                                'WARNING: Unknown address: %s is not a cell/range reference, nor a named range'
                                % to_str(rng))
                        sh = None

                if sh:
                    my_str = '"' + rng + '"'
                else:
                    my_str = '"' + sheet + rng + '"'

        to_eval = True
        # exception for formulas which use the address and not it content as ":" or "OFFSET"
        parent = self.parent(ast)
        # for OFFSET, it will also depends on the position in the formula (1st position required)
        if (parent is not None and
            (parent.tvalue == ':' or
             (parent.tvalue == 'OFFSET' and parent.children(ast)[0] == self) or
             (parent.tvalue == 'CHOOSE' and parent.children(ast)[0] != self
              and self.tsubtype == "named_range")) or pointer):

            to_eval = False

        # if parent is None and is_a_named_range: # When a named range is referenced in a cell without any prior operation
        #     return 'self.eval_ref(%s, ref = %s)' % (my_str, to_str(self.ref))

        if to_eval == False:
            output = my_str

        # OFFSET HANDLER
        elif (parent is not None and parent.tvalue == 'OFFSET'
              and parent.children(ast)[1] == self
              and self.tsubtype == "named_range"):
            output = 'self.eval_ref(%s, ref = %s)' % (my_str, to_str(self.ref))
        elif (parent is not None and parent.tvalue == 'OFFSET'
              and parent.children(ast)[2] == self
              and self.tsubtype == "named_range"):
            output = 'self.eval_ref(%s, ref = %s)' % (my_str, to_str(self.ref))

        # INDEX HANDLER
        elif (parent is not None and parent.tvalue == 'INDEX'
              and parent.children(ast)[0] == self):

            # we don't use eval_ref here to avoid empty cells (which are not included in Ranges)
            if is_a_named_range:
                output = 'resolve_range(self.named_ranges[%s])' % my_str
            else:
                output = 'resolve_range(%s)' % my_str

        elif (parent is not None and parent.tvalue == 'INDEX'
              and parent.children(ast)[1] == self
              and self.tsubtype == "named_range"):
            output = 'self.eval_ref(%s, ref = %s)' % (my_str, to_str(self.ref))
        elif (parent is not None and parent.tvalue == 'INDEX'
              and len(parent.children(ast)) == 3
              and parent.children(ast)[2] == self
              and self.tsubtype == "named_range"):
            output = 'self.eval_ref(%s, ref = %s)' % (my_str, to_str(self.ref))
        # MATCH HANDLER
        elif parent is not None and parent.tvalue == 'MATCH' \
             and (parent.children(ast)[0] == self or len(parent.children(ast)) == 3 and parent.children(ast)[2] == self):
            output = 'self.eval_ref(%s, ref = %s)' % (my_str, to_str(self.ref))
        elif self.find_special_function(ast) or self.has_ind_func_parent(ast):
            output = 'self.eval_ref(%s)' % my_str
        else:
            output = 'self.eval_ref(%s, ref = %s)' % (my_str, to_str(self.ref))

        return output
Beispiel #2
0
    def emit(self,ast,context=None, pointer = False):
        if isinstance(self.tvalue, ExcelError):
            if self.debug:
                print('WARNING: Excel Error Code found', self.tvalue)
            return self.tvalue

        is_a_range = False
        is_a_named_range = self.tsubtype == "named_range"

        if is_a_named_range:
            my_str = '"%s"' % self.token.tvalue
        else:
            rng = self.tvalue.replace('$','')
            sheet = context + "!" if context else ""

            is_a_range = is_range(rng)

            if self.tsubtype == 'pointer':
                my_str = '"' + rng + '"'
            else:
                if is_a_range:
                    sh,start,end = split_range(rng)
                else:
                    try:
                        sh,col,row = split_address(rng)
                    except:
                        if self.debug:
                            print('WARNING: Unknown address: %s is not a cell/range reference, nor a named range' % to_str(rng))
                        sh = None

                if sh:
                    my_str = '"' + rng + '"'
                else:
                    my_str = '"' + sheet + rng + '"'

        to_eval = True
        # exception for formulas which use the address and not it content as ":" or "OFFSET"
        parent = self.parent(ast)
        # for OFFSET, it will also depends on the position in the formula (1st position required)
        if (parent is not None and
            (parent.tvalue == ':' or
            (parent.tvalue == 'OFFSET' and parent.children(ast)[0] == self) or
            (parent.tvalue == 'CHOOSE' and parent.children(ast)[0] != self and self.tsubtype == "named_range")) or
            pointer):

            to_eval = False

        # if parent is None and is_a_named_range: # When a named range is referenced in a cell without any prior operation
        #     return 'self.eval_ref(%s, ref = %s)' % (my_str, to_str(self.ref))

        if to_eval == False:
            output = my_str

        # OFFSET HANDLER
        elif (parent is not None and parent.tvalue == 'OFFSET' and
             parent.children(ast)[1] == self and self.tsubtype == "named_range"):
            output = 'self.eval_ref(%s, ref = %s)' % (my_str, to_str(self.ref))
        elif (parent is not None and parent.tvalue == 'OFFSET' and
             parent.children(ast)[2] == self and self.tsubtype == "named_range"):
            output = 'self.eval_ref(%s, ref = %s)' % (my_str, to_str(self.ref))

        # INDEX HANDLER
        elif (parent is not None and parent.tvalue == 'INDEX' and
             parent.children(ast)[0] == self):

            # we don't use eval_ref here to avoid empty cells (which are not included in Ranges)
            if is_a_named_range:
                output = 'resolve_range(self.named_ranges[%s])' % my_str
            else:
                output = 'resolve_range(%s)' % my_str

        elif (parent is not None and parent.tvalue == 'INDEX' and
             parent.children(ast)[1] == self and self.tsubtype == "named_range"):
            output = 'self.eval_ref(%s, ref = %s)' % (my_str, to_str(self.ref))
        elif (parent is not None and parent.tvalue == 'INDEX' and len(parent.children(ast)) == 3 and
             parent.children(ast)[2] == self and self.tsubtype == "named_range"):
            output = 'self.eval_ref(%s, ref = %s)' % (my_str, to_str(self.ref))
        # MATCH HANDLER
        elif parent is not None and parent.tvalue == 'MATCH' \
             and (parent.children(ast)[0] == self or len(parent.children(ast)) == 3 and parent.children(ast)[2] == self):
            output = 'self.eval_ref(%s, ref = %s)' % (my_str, to_str(self.ref))
        elif self.find_special_function(ast) or self.has_ind_func_parent(ast):
            output = 'self.eval_ref(%s)' % my_str
        else:
            output = 'self.eval_ref(%s, ref = %s)' % (my_str, to_str(self.ref))

        return output
Beispiel #3
0
def graph_from_seeds(seeds, cell_source):
    """
    This creates/updates a networkx graph from a list of cells.

    The graph is created when the cell_source is an instance of ExcelCompiler
    The graph is updated when the cell_source is an instance of Spreadsheet
    """

    # when called from ExcelCompiler instance, construct cellmap and graph from seeds
    if cell_source.excel_compiler:  # ~ cell_source is a ExcelCompiler
        cellmap = dict([(x.address(), x) for x in seeds])
        cells = cell_source.cellmap
        # directed graph
        G = networkx.DiGraph()
        # match the info in cellmap
        for c in cellmap.values():
            G.add_node(c)
    # when called from Spreadsheet instance, use the Spreadsheet cellmap and graph
    else:  # ~ cell_source is a Spreadsheet
        cellmap = cell_source.cellmap
        cells = cellmap
        G = cell_source.G
        for c in seeds:
            G.add_node(c)
            cellmap[c.address()] = c

    # cells to analyze: only formulas
    todo = [s for s in seeds if s.formula]
    steps = [i for i, s in enumerate(todo)]
    names = cell_source.named_ranges

    while todo:
        c1 = todo.pop()
        step = steps.pop()
        cursheet = c1.sheet

        ###### 1) looking for cell c1 dependencies ####################
        # print 'C1', c1.address()
        # in case a formula, get all cells that are arguments
        pystr, ast = cell2code(c1, names)
        # set the code & compile it (will flag problems sooner rather than later)
        c1.python_expression = pystr.replace('"',
                                             "'")  # compilation is done later

        if 'OFFSET' in c1.formula or 'INDEX' in c1.formula:
            if c1.address(
            ) not in cell_source.named_ranges:  # pointers names already treated in ExcelCompiler
                cell_source.pointers.add(c1.address())

        # get all the cells/ranges this formula refers to
        deps = [x for x in ast.nodes() if isinstance(x, RangeNode)]
        # remove dupes
        deps = uniqueify(deps)

        ###### 2) connect dependencies in cells in graph ####################

        # ### LOG
        # tmp = []
        # for dep in deps:
        #     if dep not in names:
        #         if "!" not in dep and cursheet != None:
        #             dep = cursheet + "!" + dep
        #     if dep not in cellmap:
        #         tmp.append(dep)
        # #deps = tmp
        # logStep = "%s %s = %s " % ('|'*step, c1.address(), '',)
        # print logStep

        # if len(deps) > 1 and 'L' in deps[0] and deps[0] == deps[-1].replace('DG','L'):
        #     print logStep, "[%s...%s]" % (deps[0], deps[-1])
        # elif len(deps) > 0:
        #     print logStep, "->", deps
        # else:
        #     print logStep, "done"

        for dep in deps:
            dep_name = dep.tvalue.replace('$', '')

            # this is to avoid :A1 or A1: dep due to clean_pointers() returning an ExcelError
            if dep_name.startswith(':') or dep_name.endswith(':'):
                dep_name = dep_name.replace(':', '')

            # if not pointer, we need an absolute address
            if dep.tsubtype != 'pointer' and dep_name not in names and "!" not in dep_name and cursheet != None:
                dep_name = cursheet + "!" + dep_name

            # Named_ranges + ranges already parsed (previous iterations)
            if dep_name in cellmap:
                origins = [cellmap[dep_name]]
                target = cellmap[c1.address()]
            # if the dep_name is a multi-cell range, create a range object
            elif is_range(dep_name) or (dep_name in names
                                        and is_range(names[dep_name])):
                if dep_name in names:
                    reference = names[dep_name]
                else:
                    reference = dep_name

                if 'OFFSET' in reference or 'INDEX' in reference:
                    start_end = prepare_pointer(reference, names, ref_cell=c1)
                    rng = cell_source.range(start_end)

                    if dep_name in names:  # dep is a pointer range
                        address = dep_name
                    else:
                        if c1.address(
                        ) in names:  # c1 holds is a pointer range
                            address = c1.address()
                        else:  # a pointer range with no name, its address will be its name
                            address = '%s:%s' % (start_end["start"],
                                                 start_end["end"])
                            cell_source.pointers.add(address)
                else:
                    address = dep_name

                    # get a list of the addresses in this range that are not yet in the graph
                    range_addresses = list(
                        resolve_range(reference, should_flatten=True)[0])
                    cellmap_add_addresses = [
                        addr for addr in range_addresses
                        if addr not in cellmap.keys()
                    ]

                    if len(cellmap_add_addresses) > 0:
                        # this means there are cells to be added

                        # get row and col dimensions for the sheet, assuming the whole range is in one sheet
                        sheet_initial = split_address(
                            cellmap_add_addresses[0])[0]
                        max_rows, max_cols = max_dimension(
                            cellmap, sheet_initial)

                        # create empty cells that aren't in the cellmap
                        for addr in cellmap_add_addresses:
                            sheet_new, col_new, row_new = split_address(addr)

                            # if somehow a new sheet comes up in the range, get the new dimensions
                            if sheet_new != sheet_initial:
                                sheet_initial = sheet_new
                                max_rows, max_cols = max_dimension(
                                    cellmap, sheet_new)

                            # add the empty cells
                            if int(row_new) <= max_rows and int(
                                    col2num(col_new)) <= max_cols:
                                # only add cells within the maximum bounds of the sheet to avoid too many evaluations
                                # for A:A or 1:1 ranges

                                cell_new = Cell(addr,
                                                sheet_new,
                                                value="",
                                                should_eval='False'
                                                )  # create new cell object
                                cellmap[
                                    addr] = cell_new  # add it to the cellmap
                                G.add_node(cell_new)  # add it to the graph
                                cell_source.cellmap[
                                    addr] = cell_new  # add it to the cell_source, used in this function

                    rng = cell_source.range(reference)

                if address in cellmap:
                    virtual_cell = cellmap[address]
                else:
                    virtual_cell = Cell(address,
                                        None,
                                        value=rng,
                                        formula=reference,
                                        is_range=True,
                                        is_named_range=True)
                    # save the range
                    cellmap[address] = virtual_cell

                # add an edge from the range to the parent
                G.add_node(virtual_cell)
                # Cell(A1:A10) -> c1 or Cell(ExampleName) -> c1
                G.add_edge(virtual_cell, c1)
                # cells in the range should point to the range as their parent
                target = virtual_cell
                origins = []

                if len(
                        list(rng.keys())
                ) != 0:  # could be better, but can't check on Exception types here...
                    for child in rng.addresses:
                        if child not in cellmap:
                            origins.append(cells[child])
                        else:
                            origins.append(cellmap[child])
            else:
                # not a range
                if dep_name in names:
                    reference = names[dep_name]
                else:
                    reference = dep_name

                if reference in cells:
                    if dep_name in names:
                        virtual_cell = Cell(dep_name,
                                            None,
                                            value=cells[reference].value,
                                            formula=reference,
                                            is_range=False,
                                            is_named_range=True)

                        G.add_node(virtual_cell)
                        G.add_edge(cells[reference], virtual_cell)

                        origins = [virtual_cell]
                    else:
                        cell = cells[reference]
                        origins = [cell]

                    cell = origins[0]

                    if cell.formula is not None and ('OFFSET' in cell.formula
                                                     or 'INDEX'
                                                     in cell.formula):
                        cell_source.pointers.add(cell.address())
                else:
                    virtual_cell = Cell(dep_name,
                                        None,
                                        value=None,
                                        formula=None,
                                        is_range=False,
                                        is_named_range=True)
                    origins = [virtual_cell]

                target = c1

            # process each cell
            for c2 in flatten(origins):

                # if we havent treated this cell allready
                if c2.address() not in cellmap:
                    if c2.formula:
                        # cell with a formula, needs to be added to the todo list
                        todo.append(c2)
                        steps.append(step + 1)
                    else:
                        # constant cell, no need for further processing, just remember to set the code
                        pystr, ast = cell2code(c2, names)
                        c2.python_expression = pystr
                        c2.compile()

                    # save in the cellmap
                    cellmap[c2.address()] = c2
                    # add to the graph
                    G.add_node(c2)

                # add an edge from the cell to the parent (range or cell)
                if (target != []):
                    # print "Adding edge %s --> %s" % (c2.address(), target.address())
                    G.add_edge(c2, target)

        c1.compile(
        )  # cell compilation is done here because pointer ranges might update python_expressions

    return (cellmap, G)