Ejemplo n.º 1
0
def graph_from_seeds(seeds, cell_source):
    """
    This creates/updates a networkx graph from a list of cells.

    The graph is created when the cell_source is an instance of ExcelCompiler
    The graph is updated when the cell_source is an instance of Spreadsheet
    """

    # when called from Spreadsheet instance, use the Spreadsheet cellmap and graph
    if hasattr(cell_source, 'G'):  # ~ cell_source is a Spreadsheet
        cellmap = cell_source.cellmap
        cells = cellmap
        G = cell_source.G
        for c in seeds:
            G.add_node(c)
            cellmap[c.address()] = c
    # when called from ExcelCompiler instance, construct cellmap and graph from seeds
    else:  # ~ cell_source is a ExcelCompiler
        cellmap = dict([(x.address(), x) for x in seeds])
        cells = cell_source.cells
        # directed graph
        G = networkx.DiGraph()
        # match the info in cellmap
        for c in cellmap.values():
            G.add_node(c)

    # cells to analyze: only formulas
    todo = [s for s in seeds if s.formula]
    steps = [i for i, s in enumerate(todo)]
    names = cell_source.named_ranges

    while todo:
        c1 = todo.pop()
        step = steps.pop()
        cursheet = c1.sheet

        ###### 1) looking for cell c1 dependencies ####################
        # print 'C1', c1.address()
        # in case a formula, get all cells that are arguments
        pystr, ast = cell2code(c1, names)
        # set the code & compile it (will flag problems sooner rather than later)
        c1.python_expression = pystr.replace('"',
                                             "'")  # compilation is done later

        if 'OFFSET' in c1.formula or 'INDEX' in c1.formula:
            if c1.address(
            ) not in cell_source.named_ranges:  # pointers names already treated in ExcelCompiler
                cell_source.pointers.add(c1.address())

        # get all the cells/ranges this formula refers to
        deps = [x for x in ast.nodes() if isinstance(x, RangeNode)]
        # remove dupes
        deps = uniqueify(deps)

        ###### 2) connect dependencies in cells in graph ####################

        # ### LOG
        # tmp = []
        # for dep in deps:
        #     if dep not in names:
        #         if "!" not in dep and cursheet != None:
        #             dep = cursheet + "!" + dep
        #     if dep not in cellmap:
        #         tmp.append(dep)
        # #deps = tmp
        # logStep = "%s %s = %s " % ('|'*step, c1.address(), '',)
        # print logStep

        # if len(deps) > 1 and 'L' in deps[0] and deps[0] == deps[-1].replace('DG','L'):
        #     print logStep, "[%s...%s]" % (deps[0], deps[-1])
        # elif len(deps) > 0:
        #     print logStep, "->", deps
        # else:
        #     print logStep, "done"

        for dep in deps:
            dep_name = dep.tvalue.replace('$', '')

            # this is to avoid :A1 or A1: dep due to clean_pointers() returning an ExcelError
            if dep_name.startswith(':') or dep_name.endswith(':'):
                dep_name = dep_name.replace(':', '')

            # if not pointer, we need an absolute address
            if dep.tsubtype != 'pointer' and dep_name not in names and "!" not in dep_name and cursheet != None:
                dep_name = cursheet + "!" + dep_name

            # Named_ranges + ranges already parsed (previous iterations)
            if dep_name in cellmap:
                origins = [cellmap[dep_name]]
                target = cellmap[c1.address()]
            # if the dep_name is a multi-cell range, create a range object
            elif is_range(dep_name) or (dep_name in names
                                        and is_range(names[dep_name])):
                if dep_name in names:
                    reference = names[dep_name]
                else:
                    reference = dep_name

                if 'OFFSET' in reference or 'INDEX' in reference:
                    start_end = prepare_pointer(reference, names, ref_cell=c1)
                    rng = cell_source.range(start_end)

                    if dep_name in names:  # dep is a pointer range
                        address = dep_name
                    else:
                        if c1.address(
                        ) in names:  # c1 holds is a pointer range
                            address = c1.address()
                        else:  # a pointer range with no name, its address will be its name
                            address = '%s:%s' % (start_end["start"],
                                                 start_end["end"])
                            cell_source.pointers.add(address)
                else:
                    address = dep_name

                    # get a list of the addresses in this range that are not yet in the graph
                    range_addresses = list(
                        resolve_range(reference, should_flatten=True)[0])
                    cellmap_add_addresses = [
                        addr for addr in range_addresses
                        if addr not in cellmap.keys()
                    ]

                    if len(cellmap_add_addresses) > 0:
                        # this means there are cells to be added

                        # get row and col dimensions for the sheet, assuming the whole range is in one sheet
                        sheet_initial = split_address(
                            cellmap_add_addresses[0])[0]
                        max_rows, max_cols = max_dimension(
                            cellmap, sheet_initial)

                        # create empty cells that aren't in the cellmap
                        for addr in cellmap_add_addresses:
                            sheet_new, col_new, row_new = split_address(addr)

                            # if somehow a new sheet comes up in the range, get the new dimensions
                            if sheet_new != sheet_initial:
                                sheet_initial = sheet_new
                                max_rows, max_cols = max_dimension(
                                    cellmap, sheet_new)

                            # add the empty cells
                            if int(row_new) <= max_rows and int(
                                    col2num(col_new)) <= max_cols:
                                # only add cells within the maximum bounds of the sheet to avoid too many evaluations
                                # for A:A or 1:1 ranges

                                cell_new = Cell(addr,
                                                sheet_new,
                                                value="",
                                                should_eval='False'
                                                )  # create new cell object
                                cellmap[
                                    addr] = cell_new  # add it to the cellmap
                                G.add_node(cell_new)  # add it to the graph
                                cell_source.cells[
                                    addr] = cell_new  # add it to the cell_source, used in this function

                    rng = cell_source.range(reference)

                if address in cellmap:
                    virtual_cell = cellmap[address]
                else:
                    virtual_cell = Cell(address,
                                        None,
                                        value=rng,
                                        formula=reference,
                                        is_range=True,
                                        is_named_range=True)
                    # save the range
                    cellmap[address] = virtual_cell

                # add an edge from the range to the parent
                G.add_node(virtual_cell)
                # Cell(A1:A10) -> c1 or Cell(ExampleName) -> c1
                G.add_edge(virtual_cell, c1)
                # cells in the range should point to the range as their parent
                target = virtual_cell
                origins = []

                if len(
                        list(rng.keys())
                ) != 0:  # could be better, but can't check on Exception types here...
                    for child in rng.addresses:
                        if child not in cellmap:
                            origins.append(cells[child])
                        else:
                            origins.append(cellmap[child])
            else:
                # not a range
                if dep_name in names:
                    reference = names[dep_name]
                else:
                    reference = dep_name

                if reference in cells:
                    if dep_name in names:
                        virtual_cell = Cell(dep_name,
                                            None,
                                            value=cells[reference].value,
                                            formula=reference,
                                            is_range=False,
                                            is_named_range=True)

                        G.add_node(virtual_cell)
                        G.add_edge(cells[reference], virtual_cell)

                        origins = [virtual_cell]
                    else:
                        cell = cells[reference]
                        origins = [cell]

                    cell = origins[0]

                    if cell.formula is not None and ('OFFSET' in cell.formula
                                                     or 'INDEX'
                                                     in cell.formula):
                        cell_source.pointers.add(cell.address())
                else:
                    virtual_cell = Cell(dep_name,
                                        None,
                                        value=None,
                                        formula=None,
                                        is_range=False,
                                        is_named_range=True)
                    origins = [virtual_cell]

                target = c1

            # process each cell
            for c2 in flatten(origins):

                # if we havent treated this cell allready
                if c2.address() not in cellmap:
                    if c2.formula:
                        # cell with a formula, needs to be added to the todo list
                        todo.append(c2)
                        steps.append(step + 1)
                    else:
                        # constant cell, no need for further processing, just remember to set the code
                        pystr, ast = cell2code(c2, names)
                        c2.python_expression = pystr
                        c2.compile()

                    # save in the cellmap
                    cellmap[c2.address()] = c2
                    # add to the graph
                    G.add_node(c2)

                # add an edge from the cell to the parent (range or cell)
                if (target != []):
                    # print "Adding edge %s --> %s" % (c2.address(), target.address())
                    G.add_edge(c2, target)

        c1.compile(
        )  # cell compilation is done here because pointer ranges might update python_expressions

    return (cellmap, G)
Ejemplo n.º 2
0
def graph_from_seeds(seeds, cell_source):
    """
    This creates/updates a networkx graph from a list of cells.

    The graph is created when the cell_source is an instance of ExcelCompiler
    The graph is updated when the cell_source is an instance of Spreadsheet
    """

    # when called from Spreadsheet instance, use the Spreadsheet cellmap and graph
    if hasattr(cell_source, 'G'): # ~ cell_source is a Spreadsheet
        cellmap = cell_source.cellmap
        cells = cellmap
        G = cell_source.G
        for c in seeds:
            G.add_node(c)
            cellmap[c.address()] = c
    # when called from ExcelCompiler instance, construct cellmap and graph from seeds
    else: # ~ cell_source is a ExcelCompiler
        cellmap = dict([(x.address(),x) for x in seeds])
        cells = cell_source.cells
        # directed graph
        G = networkx.DiGraph()
        # match the info in cellmap
        for c in cellmap.values(): G.add_node(c)

    # cells to analyze: only formulas
    todo = [s for s in seeds if s.formula]
    steps = [i for i,s in enumerate(todo)]
    names = cell_source.named_ranges

    while todo:
        c1 = todo.pop()
        step = steps.pop()
        cursheet = c1.sheet

        ###### 1) looking for cell c1 dependencies ####################
        # print 'C1', c1.address()
        # in case a formula, get all cells that are arguments
        pystr, ast = cell2code(c1, names)
        # set the code & compile it (will flag problems sooner rather than later)
        c1.python_expression = pystr.replace('"', "'") # compilation is done later

        if 'OFFSET' in c1.formula or 'INDEX' in c1.formula:
            if c1.address() not in cell_source.named_ranges: # pointers names already treated in ExcelCompiler
                cell_source.pointers.add(c1.address())

        # get all the cells/ranges this formula refers to
        deps = [x for x in ast.nodes() if isinstance(x,RangeNode)]
        # remove dupes
        deps = uniqueify(deps)

        ###### 2) connect dependencies in cells in graph ####################

        # ### LOG
        # tmp = []
        # for dep in deps:
        #     if dep not in names:
        #         if "!" not in dep and cursheet != None:
        #             dep = cursheet + "!" + dep
        #     if dep not in cellmap:
        #         tmp.append(dep)
        # #deps = tmp
        # logStep = "%s %s = %s " % ('|'*step, c1.address(), '',)
        # print logStep

        # if len(deps) > 1 and 'L' in deps[0] and deps[0] == deps[-1].replace('DG','L'):
        #     print logStep, "[%s...%s]" % (deps[0], deps[-1])
        # elif len(deps) > 0:
        #     print logStep, "->", deps
        # else:
        #     print logStep, "done"

        for dep in deps:
            dep_name = dep.tvalue.replace('$','')

            # this is to avoid :A1 or A1: dep due to clean_pointers() returning an ExcelError
            if dep_name.startswith(':') or dep_name.endswith(':'):
                dep_name = dep_name.replace(':', '')

            # if not pointer, we need an absolute address
            if dep.tsubtype != 'pointer' and dep_name not in names and "!" not in dep_name and cursheet != None:
                dep_name = cursheet + "!" + dep_name

            # Named_ranges + ranges already parsed (previous iterations)
            if dep_name in cellmap:
                origins = [cellmap[dep_name]]
                target = cellmap[c1.address()]
            # if the dep_name is a multi-cell range, create a range object
            elif is_range(dep_name) or (dep_name in names and is_range(names[dep_name])):
                if dep_name in names:
                    reference = names[dep_name]
                else:
                    reference = dep_name

                if 'OFFSET' in reference or 'INDEX' in reference:
                    start_end = prepare_pointer(reference, names, ref_cell = c1)
                    rng = cell_source.Range(start_end)

                    if dep_name in names: # dep is a pointer range
                        address = dep_name
                    else:
                        if c1.address() in names: # c1 holds is a pointer range
                            address = c1.address()
                        else: # a pointer range with no name, its address will be its name
                            address = '%s:%s' % (start_end["start"], start_end["end"])
                            cell_source.pointers.add(address)
                else:
                    address = dep_name
                    rng = cell_source.Range(reference)

                if address in cellmap:
                    virtual_cell = cellmap[address]
                else:
                    virtual_cell = Cell(address, None, value = rng, formula = reference, is_range = True, is_named_range = True )
                    # save the range
                    cellmap[address] = virtual_cell

                # add an edge from the range to the parent
                G.add_node(virtual_cell)
                # Cell(A1:A10) -> c1 or Cell(ExampleName) -> c1
                G.add_edge(virtual_cell, c1)
                # cells in the range should point to the range as their parent
                target = virtual_cell
                origins = []

                if len(list(rng.keys())) != 0: # could be better, but can't check on Exception types here...
                    for child in rng.addresses:
                        if child not in cellmap:
                            origins.append(cells[child])
                        else:
                            origins.append(cellmap[child])
            else:
                # not a range
                if dep_name in names:
                    reference = names[dep_name]
                else:
                    reference = dep_name

                if reference in cells:
                    if dep_name in names:
                        virtual_cell = Cell(dep_name, None, value = cells[reference].value, formula = reference, is_range = False, is_named_range = True )

                        G.add_node(virtual_cell)
                        G.add_edge(cells[reference], virtual_cell)

                        origins = [virtual_cell]
                    else:
                        cell = cells[reference]
                        origins = [cell]

                    cell = origins[0]

                    if cell.formula is not None and ('OFFSET' in cell.formula or 'INDEX' in cell.formula):
                        cell_source.pointers.add(cell.address())
                else:
                    virtual_cell = Cell(dep_name, None, value = None, formula = None, is_range = False, is_named_range = True )
                    origins = [virtual_cell]

                target = c1


            # process each cell
            for c2 in flatten(origins):

                # if we havent treated this cell allready
                if c2.address() not in cellmap:
                    if c2.formula:
                        # cell with a formula, needs to be added to the todo list
                        todo.append(c2)
                        steps.append(step+1)
                    else:
                        # constant cell, no need for further processing, just remember to set the code
                        pystr,ast = cell2code(c2, names)
                        c2.python_expression = pystr
                        c2.compile()

                    # save in the cellmap
                    cellmap[c2.address()] = c2
                    # add to the graph
                    G.add_node(c2)

                # add an edge from the cell to the parent (range or cell)
                if(target != []):
                    # print "Adding edge %s --> %s" % (c2.address(), target.address())
                    G.add_edge(c2,target)

        c1.compile() # cell compilation is done here because pointer ranges might update python_expressions


    return (cellmap, G)