Esempio n. 1
0
def memo(funcdesc, arglist_s):
    from rpython.annotator.model import SomePBC, SomeImpossibleValue, SomeBool
    from rpython.annotator.model import unionof
    # call the function now, and collect possible results
    argvalues = []
    for s in arglist_s:
        if s.is_constant():
            values = [s.const]
        elif isinstance(s, SomePBC):
            values = []
            assert not s.can_be_None, "memo call: cannot mix None and PBCs"
            for desc in s.descriptions:
                if desc.pyobj is None:
                    raise annmodel.AnnotatorError(
                        "memo call with a class or PBC that has no "
                        "corresponding Python object (%r)" % (desc,))
                values.append(desc.pyobj)
        elif isinstance(s, SomeImpossibleValue):
            return s    # we will probably get more possible args later
        elif isinstance(s, SomeBool):
            values = [False, True]
        else:
            raise annmodel.AnnotatorError("memo call: argument must be a class "
                                          "or a frozen PBC, got %r" % (s,))
        argvalues.append(values)
    # the list of all possible tuples of arguments to give to the memo function
    possiblevalues = cartesian_product(argvalues)

    # a MemoTable factory -- one MemoTable per family of arguments that can
    # be called together, merged via a UnionFind.
    bookkeeper = funcdesc.bookkeeper
    try:
        memotables = bookkeeper.all_specializations[funcdesc]
    except KeyError:
        func = funcdesc.pyobj
        if func is None:
            raise annmodel.AnnotatorError("memo call: no Python function object"
                                          "to call (%r)" % (funcdesc,))

        def compute_one_result(args):
            value = func(*args)
            memotable = MemoTable(funcdesc, args, value)
            memotable.register_finish()
            return memotable

        memotables = UnionFind(compute_one_result)
        bookkeeper.all_specializations[funcdesc] = memotables

    # merge the MemoTables for the individual argument combinations
    firstvalues = possiblevalues.next()
    _, _, memotable = memotables.find(firstvalues)
    for values in possiblevalues:
        _, _, memotable = memotables.union(firstvalues, values)

    if memotable.graph is not None:
        return memotable.graph   # if already computed
    else:
        # otherwise, for now, return the union of each possible result
        return unionof(*[bookkeeper.immutablevalue(v)
                         for v in memotable.table.values()])
Esempio n. 2
0
    def __init__(self, annotator):
        self.annotator = annotator
        self.policy = annotator.policy
        self.descs = {}  # map Python objects to their XxxDesc wrappers
        self.methoddescs = {}  # map (funcdesc, classdef) to the MethodDesc
        self.classdefs = []  # list of all ClassDefs
        self.seen_mutable = {}
        self.listdefs = {}  # map position_keys to ListDefs
        self.dictdefs = {}  # map position_keys to DictDefs
        self.immutable_cache = {}

        self.classpbc_attr_families = {
        }  # {'attr': UnionFind(ClassAttrFamily)}
        self.frozenpbc_attr_families = UnionFind(description.FrozenAttrFamily)
        self.pbc_maximal_call_families = UnionFind(description.CallFamily)

        self.emulated_pbc_calls = {}
        self.all_specializations = {}  # {FuncDesc: specialization-info}
        self.pending_specializations = []  # list of callbacks
        self.external_class_cache = {}  # cache of ExternalType classes

        self.needs_generic_instantiate = {}
        self.thread_local_fields = set()

        delayed_imports()
Esempio n. 3
0
def test_asymmetric_absorb():
    class Info(object):
        def __init__(self, obj):
            self.values = [obj]

        def absorb(self, other):
            self.values += other.values

    uf = UnionFind(Info)
    uf.union(2, 3)
    uf.union(1, 2)
    assert uf[1].values == uf[2].values == uf[3].values == [1, 2, 3]
Esempio n. 4
0
 def coalesce_variables(self):
     self._unionfind = UnionFind()
     pendingblocks = list(self.graph.iterblocks())
     while pendingblocks:
         block = pendingblocks.pop()
         # Aggressively try to coalesce each source variable with its
         # target.  We start from the end of the graph instead of
         # from the beginning.  This is a bit arbitrary, but the idea
         # is that the end of the graph runs typically more often
         # than the start, given that we resume execution from the
         # middle during blackholing.
         for link in block.exits:
             if link.last_exception is not None:
                 self._depgraph.add_node(link.last_exception)
             if link.last_exc_value is not None:
                 self._depgraph.add_node(link.last_exc_value)
             for i, v in enumerate(link.args):
                 self._try_coalesce(v, link.target.inputargs[i])
Esempio n. 5
0
def test_cleanup():
    state = []
    class ReferencedByExternalState(object):
        def __init__(self, obj):
            state.append(self)
            self.obj = obj

        def absorb(self, other):
            state.remove(other)

    uf = UnionFind(ReferencedByExternalState)
    uf.find(1)
    for i in xrange(1, 10, 2):
        uf.union(i, 1)
    uf.find(2)
    for i in xrange(2, 20, 2):
        uf.union(i, 2)
    assert len(state) == 2  # we have exactly 2 partitions
Esempio n. 6
0
def remove_identical_vars_SSA(graph):
    """When the same variable is passed multiple times into the next block,
    pass it only once.  This enables further optimizations by the annotator,
    which otherwise doesn't realize that tests performed on one of the copies
    of the variable also affect the other."""
    uf = UnionFind(Representative)
    entrymap = mkentrymap(graph)
    del entrymap[graph.startblock]
    entrymap.pop(graph.returnblock, None)
    entrymap.pop(graph.exceptblock, None)
    inputs = {}
    for block, links in entrymap.items():
        phis = zip(block.inputargs, zip(*[link.args for link in links]))
        inputs[block] = phis

    def simplify_phis(block):
        phis = inputs[block]
        to_remove = []
        unique_phis = {}
        for i, (input, phi_args) in enumerate(phis):
            new_args = [uf.find_rep(arg) for arg in phi_args]
            if all_equal(new_args) and not isspecialvar(new_args[0]):
                uf.union(new_args[0], input)
                to_remove.append(i)
            else:
                t = tuple(new_args)
                if t in unique_phis:
                    uf.union(unique_phis[t], input)
                    to_remove.append(i)
                else:
                    unique_phis[t] = input
        for i in reversed(to_remove):
            del phis[i]
        return bool(to_remove)

    progress = True
    while progress:
        progress = False
        for block in inputs:
            if simplify_phis(block):
                progress = True

    renaming = dict((key, uf[key].rep) for key in uf)
    for block, links in entrymap.items():
        if inputs[block]:
            new_inputs, new_args = zip(*inputs[block])
            new_args = map(list, zip(*new_args))
        else:
            new_inputs = []
            new_args = [[] for _ in links]
        block.inputargs = new_inputs
        assert len(links) == len(new_args)
        for link, args in zip(links, new_args):
            link.args = args
    for block in entrymap:
        block.renamevariables(renaming)
Esempio n. 7
0
 def get_classpbc_attr_families(self, attrname):
     """Return the UnionFind for the ClassAttrFamilies corresponding to
     attributes of the given name.
     """
     map = self.classpbc_attr_families
     try:
         access_sets = map[attrname]
     except KeyError:
         access_sets = map[attrname] = UnionFind(
             description.ClassAttrFamily)
     return access_sets
Esempio n. 8
0
def test_cleanup():
    state = []
    class ReferencedByExternalState(object):
        def __init__(self, obj):
            state.append(self)
            self.obj = obj

        def absorb(self, other):
            state.remove(other)

    uf = UnionFind(ReferencedByExternalState)
    uf.find(1)
    for i in xrange(1, 10, 2):
        uf.union(i, 1)
    uf.find(2)
    for i in xrange(2, 20, 2):
        uf.union(i, 2)
    assert len(state) == 2 # we have exactly 2 partitions
Esempio n. 9
0
def memo(funcdesc, args_s):
    # call the function now, and collect possible results

    # the list of all possible tuples of arguments to give to the memo function
    possiblevalues = cartesian_product([all_values(s_arg) for s_arg in args_s])

    # a MemoTable factory -- one MemoTable per family of arguments that can
    # be called together, merged via a UnionFind.
    bookkeeper = funcdesc.bookkeeper
    try:
        memotables = bookkeeper.all_specializations[funcdesc]
    except KeyError:
        func = funcdesc.pyobj
        if func is None:
            raise annmodel.AnnotatorError(
                "memo call: no Python function object"
                "to call (%r)" % (funcdesc, ))

        def compute_one_result(args):
            value = func(*args)
            memotable = MemoTable(funcdesc, args, value)
            memotable.register_finish()
            return memotable

        memotables = UnionFind(compute_one_result)
        bookkeeper.all_specializations[funcdesc] = memotables

    # merge the MemoTables for the individual argument combinations
    firstvalues = possiblevalues.next()
    _, _, memotable = memotables.find(firstvalues)
    for values in possiblevalues:
        _, _, memotable = memotables.union(firstvalues, values)

    if memotable.graph is not None:
        return memotable.graph  # if already computed
    else:
        # otherwise, for now, return the union of each possible result
        return unionof(
            *[bookkeeper.immutablevalue(v) for v in memotable.table.values()])
Esempio n. 10
0
def memo(funcdesc, args_s):
    # call the function now, and collect possible results

    # the list of all possible tuples of arguments to give to the memo function
    possiblevalues = cartesian_product([all_values(s_arg) for s_arg in args_s])

    # a MemoTable factory -- one MemoTable per family of arguments that can
    # be called together, merged via a UnionFind.
    bookkeeper = funcdesc.bookkeeper
    try:
        memotables = bookkeeper.all_specializations[funcdesc]
    except KeyError:
        func = funcdesc.pyobj
        if func is None:
            raise annmodel.AnnotatorError("memo call: no Python function object"
                                          "to call (%r)" % (funcdesc,))

        def compute_one_result(args):
            value = func(*args)
            memotable = MemoTable(funcdesc, args, value)
            memotable.register_finish()
            return memotable

        memotables = UnionFind(compute_one_result)
        bookkeeper.all_specializations[funcdesc] = memotables

    # merge the MemoTables for the individual argument combinations
    firstvalues = possiblevalues.next()
    _, _, memotable = memotables.find(firstvalues)
    for values in possiblevalues:
        _, _, memotable = memotables.union(firstvalues, values)

    if memotable.graph is not None:
        return memotable.graph   # if already computed
    else:
        # otherwise, for now, return the union of each possible result
        return unionof(*[bookkeeper.immutablevalue(v)
                         for v in memotable.table.values()])
Esempio n. 11
0
 def coalesce_variables(self):
     self._unionfind = UnionFind()
     pendingblocks = list(self.graph.iterblocks())
     while pendingblocks:
         block = pendingblocks.pop()
         # Aggressively try to coalesce each source variable with its
         # target.  We start from the end of the graph instead of
         # from the beginning.  This is a bit arbitrary, but the idea
         # is that the end of the graph runs typically more often
         # than the start, given that we resume execution from the
         # middle during blackholing.
         for link in block.exits:
             if link.last_exception is not None:
                 self._depgraph.add_node(link.last_exception)
             if link.last_exc_value is not None:
                 self._depgraph.add_node(link.last_exc_value)
             for i, v in enumerate(link.args):
                 self._try_coalesce(v, link.target.inputargs[i])
Esempio n. 12
0
class RegAllocator(object):
    DEBUG_REGALLOC = False

    def __init__(self, graph, consider_var, ListOfKind):
        self.graph = graph
        self.consider_var = consider_var
        self.ListOfKind = ListOfKind

    def make_dependencies(self):
        dg = DependencyGraph()
        for block in self.graph.iterblocks():
            # Compute die_at = {Variable: index_of_operation_with_last_usage}
            die_at = dict.fromkeys(block.inputargs, 0)
            for i, op in enumerate(block.operations):
                for v in op.args:
                    if isinstance(v, Variable):
                        die_at[v] = i
                    elif isinstance(v, self.ListOfKind):
                        for v1 in v:
                            if isinstance(v1, Variable):
                                die_at[v1] = i
                if op.result is not None:
                    die_at[op.result] = i + 1
            if isinstance(block.exitswitch, tuple):
                for x in block.exitswitch:
                    die_at.pop(x, None)
            else:
                die_at.pop(block.exitswitch, None)
            for link in block.exits:
                for v in link.args:
                    die_at.pop(v, None)
            die_at = [(value, key) for (key, value) in die_at.items()]
            die_at.sort()
            die_at.append((sys.maxint,))
            # Done.  XXX the code above this line runs 3 times
            # (for kind in KINDS) to produce the same result...
            livevars = [v for v in block.inputargs
                          if self.consider_var(v)]
            # Add the variables of this block to the dependency graph
            for i, v in enumerate(livevars):
                dg.add_node(v)
                for j in range(i):
                    dg.add_edge(livevars[j], v)
            livevars = set(livevars)
            die_index = 0
            for i, op in enumerate(block.operations):
                while die_at[die_index][0] == i:
                    try:
                        livevars.remove(die_at[die_index][1])
                    except KeyError:
                        pass
                    die_index += 1
                if (op.result is not None and
                        self.consider_var(op.result)):
                    dg.add_node(op.result)
                    for v in livevars:
                        if self.consider_var(v):
                            dg.add_edge(v, op.result)
                    livevars.add(op.result)
        self._depgraph = dg

    def coalesce_variables(self):
        self._unionfind = UnionFind()
        pendingblocks = list(self.graph.iterblocks())
        while pendingblocks:
            block = pendingblocks.pop()
            # Aggressively try to coalesce each source variable with its
            # target.  We start from the end of the graph instead of
            # from the beginning.  This is a bit arbitrary, but the idea
            # is that the end of the graph runs typically more often
            # than the start, given that we resume execution from the
            # middle during blackholing.
            for link in block.exits:
                if link.last_exception is not None:
                    self._depgraph.add_node(link.last_exception)
                if link.last_exc_value is not None:
                    self._depgraph.add_node(link.last_exc_value)
                for i, v in enumerate(link.args):
                    self._try_coalesce(v, link.target.inputargs[i])

    def _try_coalesce(self, v, w):
        if isinstance(v, Variable) and self.consider_var(v)  \
                                   and self.consider_var(w):
            dg = self._depgraph
            uf = self._unionfind
            v0 = uf.find_rep(v)
            w0 = uf.find_rep(w)
            if v0 is not w0 and v0 not in dg.neighbours[w0]:
                _, rep, _ = uf.union(v0, w0)
                assert uf.find_rep(v0) is uf.find_rep(w0) is rep
                if rep is v0:
                    dg.coalesce(w0, v0)
                else:
                    assert rep is w0
                    dg.coalesce(v0, w0)

    def find_node_coloring(self):
        self._coloring = self._depgraph.find_node_coloring()
        if self.DEBUG_REGALLOC:
            for block in self.graph.iterblocks():
                print block
                for v in block.getvariables():
                    print '\t', v, '\t', self.getcolor(v)

    def find_num_colors(self):
        if self._coloring:
            numcolors = max(self._coloring.values()) + 1
        else:
            numcolors = 0
        self.numcolors = numcolors

    def getcolor(self, v):
        return self._coloring[self._unionfind.find_rep(v)]

    def checkcolor(self, v, color):
        try:
            return self.getcolor(v) == color
        except KeyError:
            return False

    def swapcolors(self, col1, col2):
        for key, value in self._coloring.items():
            if value == col1:
                self._coloring[key] = col2
            elif value == col2:
                self._coloring[key] = col1
Esempio n. 13
0
 def __init__(self):
     self._analyzed_calls = UnionFind(lambda graph: Dependency(self))
Esempio n. 14
0
def memo(funcdesc, arglist_s):
    from rpython.annotator.model import SomePBC, SomeImpossibleValue, SomeBool
    from rpython.annotator.model import unionof
    # call the function now, and collect possible results
    argvalues = []
    for s in arglist_s:
        if s.is_constant():
            values = [s.const]
        elif isinstance(s, SomePBC):
            values = []
            assert not s.can_be_None, "memo call: cannot mix None and PBCs"
            for desc in s.descriptions:
                if desc.pyobj is None:
                    raise annmodel.AnnotatorError(
                        "memo call with a class or PBC that has no "
                        "corresponding Python object (%r)" % (desc, ))
                values.append(desc.pyobj)
        elif isinstance(s, SomeImpossibleValue):
            return s  # we will probably get more possible args later
        elif isinstance(s, SomeBool):
            values = [False, True]
        else:
            raise annmodel.AnnotatorError(
                "memo call: argument must be a class "
                "or a frozen PBC, got %r" % (s, ))
        argvalues.append(values)
    # the list of all possible tuples of arguments to give to the memo function
    possiblevalues = cartesian_product(argvalues)

    # a MemoTable factory -- one MemoTable per family of arguments that can
    # be called together, merged via a UnionFind.
    bookkeeper = funcdesc.bookkeeper
    try:
        memotables = bookkeeper.all_specializations[funcdesc]
    except KeyError:
        func = funcdesc.pyobj
        if func is None:
            raise annmodel.AnnotatorError(
                "memo call: no Python function object"
                "to call (%r)" % (funcdesc, ))

        def compute_one_result(args):
            value = func(*args)
            memotable = MemoTable(funcdesc, args, value)
            memotable.register_finish()
            return memotable

        memotables = UnionFind(compute_one_result)
        bookkeeper.all_specializations[funcdesc] = memotables

    # merge the MemoTables for the individual argument combinations
    firstvalues = possiblevalues.next()
    _, _, memotable = memotables.find(firstvalues)
    for values in possiblevalues:
        _, _, memotable = memotables.union(firstvalues, values)

    if memotable.graph is not None:
        return memotable.graph  # if already computed
    else:
        # otherwise, for now, return the union of each possible result
        return unionof(
            *[bookkeeper.immutablevalue(v) for v in memotable.table.values()])
Esempio n. 15
0
class RegAllocator(object):
    DEBUG_REGALLOC = False

    def __init__(self, graph, consider_var, ListOfKind):
        self.graph = graph
        self.consider_var = consider_var
        self.ListOfKind = ListOfKind

    def make_dependencies(self):
        dg = DependencyGraph()
        for block in self.graph.iterblocks():
            # Compute die_at = {Variable: index_of_operation_with_last_usage}
            die_at = dict.fromkeys(block.inputargs, 0)
            for i, op in enumerate(block.operations):
                for v in op.args:
                    if isinstance(v, Variable):
                        die_at[v] = i
                    elif isinstance(v, self.ListOfKind):
                        for v1 in v:
                            if isinstance(v1, Variable):
                                die_at[v1] = i
                if op.result is not None:
                    die_at[op.result] = i + 1
            if isinstance(block.exitswitch, tuple):
                for x in block.exitswitch:
                    die_at.pop(x, None)
            else:
                die_at.pop(block.exitswitch, None)
            for link in block.exits:
                for v in link.args:
                    die_at.pop(v, None)
            die_at = [(value, key) for (key, value) in die_at.items()]
            die_at.sort()
            die_at.append((sys.maxint,))
            # Done.  XXX the code above this line runs 3 times
            # (for kind in KINDS) to produce the same result...
            livevars = [v for v in block.inputargs
                          if self.consider_var(v)]
            # Add the variables of this block to the dependency graph
            for i, v in enumerate(livevars):
                dg.add_node(v)
                for j in range(i):
                    dg.add_edge(livevars[j], v)
            livevars = set(livevars)
            die_index = 0
            for i, op in enumerate(block.operations):
                while die_at[die_index][0] == i:
                    try:
                        livevars.remove(die_at[die_index][1])
                    except KeyError:
                        pass
                    die_index += 1
                if (op.result is not None and
                        self.consider_var(op.result)):
                    dg.add_node(op.result)
                    for v in livevars:
                        if self.consider_var(v):
                            dg.add_edge(v, op.result)
                    livevars.add(op.result)
        self._depgraph = dg

    def coalesce_variables(self):
        self._unionfind = UnionFind()
        pendingblocks = list(self.graph.iterblocks())
        while pendingblocks:
            block = pendingblocks.pop()
            # Aggressively try to coalesce each source variable with its
            # target.  We start from the end of the graph instead of
            # from the beginning.  This is a bit arbitrary, but the idea
            # is that the end of the graph runs typically more often
            # than the start, given that we resume execution from the
            # middle during blackholing.
            for link in block.exits:
                if link.last_exception is not None:
                    self._depgraph.add_node(link.last_exception)
                if link.last_exc_value is not None:
                    self._depgraph.add_node(link.last_exc_value)
                for i, v in enumerate(link.args):
                    self._try_coalesce(v, link.target.inputargs[i])

    def _try_coalesce(self, v, w):
        if isinstance(v, Variable) and self.consider_var(v):
            assert self.consider_var(w)
            dg = self._depgraph
            uf = self._unionfind
            v0 = uf.find_rep(v)
            w0 = uf.find_rep(w)
            if v0 is not w0 and v0 not in dg.neighbours[w0]:
                _, rep, _ = uf.union(v0, w0)
                assert uf.find_rep(v0) is uf.find_rep(w0) is rep
                if rep is v0:
                    dg.coalesce(w0, v0)
                else:
                    assert rep is w0
                    dg.coalesce(v0, w0)

    def find_node_coloring(self):
        self._coloring = self._depgraph.find_node_coloring()
        if self.DEBUG_REGALLOC:
            for block in self.graph.iterblocks():
                print block
                for v in block.getvariables():
                    print '\t', v, '\t', self.getcolor(v)

    def getcolor(self, v):
        return self._coloring[self._unionfind.find_rep(v)]

    def swapcolors(self, col1, col2):
        for key, value in self._coloring.items():
            if value == col1:
                self._coloring[key] = col2
            elif value == col2:
                self._coloring[key] = col1
Esempio n. 16
0
    def compute_lifetimes(self, graph):
        """Compute the static data flow of the graph: returns a list of LifeTime
        instances, each of which corresponds to a set of Variables from the graph.
        The variables are grouped in the same LifeTime if a value can pass from
        one to the other by following the links.  Each LifeTime also records all
        places where a Variable in the set is used (read) or build (created).
        """
        lifetimes = UnionFind(LifeTime)

        def set_creation_point(block, var, *cp):
            _, _, info = lifetimes.find((block, var))
            info.creationpoints.add(cp)

        def set_use_point(block, var, *up):
            _, _, info = lifetimes.find((block, var))
            info.usepoints.add(up)

        def union(block1, var1, block2, var2):
            if isinstance(var1, Variable):
                lifetimes.union((block1, var1), (block2, var2))
            elif isinstance(var1, Constant):
                set_creation_point(block2, var2, "constant", var1)
            else:
                raise TypeError(var1)

        for var in graph.startblock.inputargs:
            set_creation_point(graph.startblock, var, "inputargs")
        set_use_point(graph.returnblock, graph.returnblock.inputargs[0], "return")
        set_use_point(graph.exceptblock, graph.exceptblock.inputargs[0], "except")
        set_use_point(graph.exceptblock, graph.exceptblock.inputargs[1], "except")

        for node in graph.iterblocks():
                for op in node.operations:
                    if op.opname in self.IDENTITY_OPS:
                        # special-case these operations to identify their input
                        # and output variables
                        union(node, op.args[0], node, op.result)
                        continue
                    if op.opname in self.SUBSTRUCT_OPS:
                        if self.visit_substruct_op(node, union, op):
                            continue
                    for i in range(len(op.args)):
                        if isinstance(op.args[i], Variable):
                            set_use_point(node, op.args[i], "op", node, op, i)
                    set_creation_point(node, op.result, "op", node, op)
                if isinstance(node.exitswitch, Variable):
                    set_use_point(node, node.exitswitch, "exitswitch", node)

        for node in graph.iterlinks():
            if isinstance(node.last_exception, Variable):
                set_creation_point(node.prevblock, node.last_exception,
                                   "last_exception")
            if isinstance(node.last_exc_value, Variable):
                set_creation_point(node.prevblock, node.last_exc_value,
                                   "last_exc_value")
            d = set()
            for i, arg in enumerate(node.args):
                union(node.prevblock, arg,
                      node.target, node.target.inputargs[i])
                if isinstance(arg, Variable):
                    if arg in d:
                        # same variable present several times in link.args
                        # consider it as a 'use' of the variable, which
                        # will disable malloc optimization (aliasing problems)
                        set_use_point(node.prevblock, arg, "dup", node, i)
                    else:
                        d.add(arg)

        return lifetimes.infos()
Esempio n. 17
0
 def __init__(self, translator):
     self.translator = translator
     self._analyzed_calls = UnionFind(lambda graph: Dependency(self))
Esempio n. 18
0
def move_pushes_earlier(graph, regalloc):
    """gc_push_roots and gc_pop_roots are pushes/pops to the shadowstack,
    immediately enclosing the operation that needs them (typically a call).
    Here, we try to move individual pushes earlier.

    Should run after expand_push_roots(), but before expand_pop_roots(),
    so that it sees individual 'gc_save_root' operations but bulk
    'gc_pop_roots' operations.
    """
    # Concrete example (assembler tested on x86-64 gcc 5.3 and clang 3.7):
    #
    # ----original----           ----move_pushes_earlier----
    #
    # while (a > 10) {           *foo = b;
    #     *foo = b;              while (a > 10) {
    #     a = g(a);                  a = g(a);
    #     b = *foo;                  b = *foo;
    #                                // *foo = b;
    # }                          }
    # return b;                  return b;
    #
    # => the store and the       => the store is before, and gcc/clang
    # load are in the loop,      moves the load after the loop
    # even in the assembler      (the commented-out '*foo=b' is removed
    #                            here, but gcc/clang would also remove it)

    # Draft of the algorithm: see shadowcolor.txt

    if not regalloc:
        return

    entrymap = mkentrymap(graph)
    assert len(entrymap[graph.startblock]) == 1

    inputvars = {}    # {inputvar: (its block, its index in inputargs)}
    for block in graph.iterblocks():
        for i, v in enumerate(block.inputargs):
            inputvars[v] = (block, i)

    Plist = []

    for index in range(regalloc.numcolors):
        U = UnionFind()

        S = set()
        for block in graph.iterblocks():
            for op in reversed(block.operations):
                if op.opname == 'gc_pop_roots':
                    break
            else:
                continue   # no gc_pop_roots in this block
            for v in op.args:
                if isinstance(v, Variable) and regalloc.checkcolor(v, index):
                    break
            else:
                continue   # no variable goes into index i

            succ = set()
            pending_succ = [(block, v)]
            while pending_succ:
                block1, v1 = pending_succ.pop()
                assert regalloc.checkcolor(v1, index)
                for op1 in block1.operations:
                    if is_trivial_rewrite(op1) and op1.args[0] is v1:
                        if regalloc.checkcolor(op1.result, index):
                            pending_succ.append((block1, op1.result))
                for link1 in block1.exits:
                    for i2, v2 in enumerate(link1.args):
                        if v2 is not v1:
                            continue
                        block2 = link1.target
                        w2 = block2.inputargs[i2]
                        if w2 in succ or not regalloc.checkcolor(w2, index):
                            continue
                        succ.add(w2)
                        for op2 in block2.operations:
                            if op2.opname in ('gc_save_root', 'gc_pop_roots'):
                                break
                        else:
                            pending_succ.append((block2, w2))
            U.union_list(list(succ))
            S.update(succ)

        G = defaultdict(set)
        for block in graph.iterblocks():
            found = False
            for opindex, op in enumerate(block.operations):
                if op.opname == 'gc_save_root':
                    if (isinstance(op.args[1], Constant) and
                        op.args[1].concretetype == lltype.Signed):
                        break
                    elif op.args[0].value == index:
                        found = True
                        break
            if not found or not isinstance(op.args[1], Variable):
                continue   # no matching gc_save_root in this block

            key = (block, op)
            pred = set()
            pending_pred = [(block, op.args[1], opindex)]
            while pending_pred:
                block1, v1, opindex1 = pending_pred.pop()
                assert regalloc.getcolor(v1) == index
                for i in range(opindex1-1, -1, -1):
                    op1 = block1.operations[i]
                    if op1.opname == 'gc_pop_roots':
                        break    # stop
                    if op1.result is v1:
                        if not is_trivial_rewrite(op1):
                            break   # stop
                        if not regalloc.checkcolor(op1.args[0], index):
                            break   # stop
                        v1 = op1.args[0]
                else:
                    varindex = block1.inputargs.index(v1)
                    if v1 in pred:
                        continue    # already done
                    pred.add(v1)
                    for link1 in entrymap[block1]:
                        prevblock1 = link1.prevblock
                        if prevblock1 is not None:
                            w1 = link1.args[varindex]
                            if isinstance(w1, Variable) and w1 not in pred:
                                if regalloc.checkcolor(w1, index):
                                    pending_pred.append((prevblock1, w1,
                                                len(prevblock1.operations)))
            U.union_list(list(pred))
            for v1 in pred:
                G[v1].add(key)

        M = S.intersection(G)

        parts_target = {}
        for v in M:
            vp = U.find_rep(v)
            if vp not in parts_target:
                new_part = (index, set(), set())
                # (index,
                #  subset P of variables,
                #  set of (block, gc_save_root))
                Plist.append(new_part)
                parts_target[vp] = new_part
            part = parts_target[vp]
            part[1].add(v)
            part[2].update(G[v])

    # Sort P so that it prefers places that would avoid multiple
    # gcsaveroots (smaller 'heuristic' result, so first in sorted
    # order); but also prefers smaller overall pieces, because it
    # might be possible to remove several small-scale pieces instead
    # of one big-scale one.
    def heuristic((index, P, gcsaveroots)):
        return float(len(P)) / len(gcsaveroots)
    Plist.sort(key=heuristic)

    variables_along_changes = {}
    live_at_start_of_block = set()   # set of (block, index)
    insert_gc_push_root = defaultdict(list)

    for index, P, gcsaveroots in Plist:
        # if this Plist entry is not valid any more because of changes
        # done by the previous entries, drop it
        if any((inputvars[v][0], index) in live_at_start_of_block for v in P):
            continue
        if any(op not in block.operations for block, op in gcsaveroots):
            continue
        for v in P:
            assert regalloc.getcolor(v) == index
            assert v not in variables_along_changes

        success_count = 0
        mark = []

        for v in P:
            block, varindex = inputvars[v]
            for link in entrymap[block]:
                w = link.args[varindex]
                if link.prevblock is not None:
                    prevoperations = link.prevblock.operations
                else:
                    prevoperations = []
                for op in reversed(prevoperations):
                    if op.opname == 'gc_pop_roots':
                        # it is possible to have gc_pop_roots() without
                        # w in the args, if w is the result of the call
                        # that comes just before.
                        if (isinstance(w, Variable) and
                                w in op.args and
                                regalloc.checkcolor(w, index)):
                            success_count += 1
                        else:
                            mark.append((index, link, varindex))
                        break
                    if op.result is w:
                        if is_trivial_rewrite(op) and (
                                regalloc.checkcolor(op.args[0], index)):
                            w = op.args[0]
                        else:
                            mark.append((index, link, varindex))
                            break
                else:
                    if not isinstance(w, Variable) or w not in P:
                        mark.append((index, link, varindex))

        if success_count > 0:
            for block, op in gcsaveroots:
                newops = list(block.operations)
                newops.remove(op)
                block.operations = newops
            for index, link, varindex in mark:
                insert_gc_push_root[link].append((index, link.args[varindex]))
            for v in P:
                block, varindex = inputvars[v]
                variables_along_changes[v] = block, index
                live_at_start_of_block.add((block, index))

    for link in insert_gc_push_root:
        newops = [_gc_save_root(index, v)
                  for index, v in sorted(insert_gc_push_root[link])]
        insert_empty_block(link, newops=newops)