Beispiel #1
0
  def compile_meta_block( self, blocks ):

    meta_id = self.meta_block_id
    self.meta_block_id += 1

    # Create custom global dict for all blocks inside the meta block
    _globals = { f"blk{i}": b for i, b in enumerate( blocks ) }

    blk_srcs = []
    for i, b in enumerate(blocks):
      # This is a normal update block
      if b in self.branchiness:
        blk_srcs.append( f"blk{i}() # [br {self.branchiness[b]}, loop {int(self.only_loop_at_top[b])}] {b.__name__}" )
      # This is an SCC block which has zero BR and is a loop
      else:
        blk_srcs.append( f"blk{i}() # {b.__name__}" )

    gen_src = f"def meta_block{meta_id}():\n  "
    gen_src += "\n  ".join( blk_srcs )

    # use custom_exec to compile the meta block
    _locals = {}
    custom_exec( py.code.Source( gen_src ).compile(), _globals, _locals )
    ret = _locals[ f'meta_block{meta_id}' ]
    if _DEBUG: print(gen_src)

    # We will use pypyjit.dont_trace_here to compile standalone traces for
    # each meta block
    try:
      from pypyjit import dont_trace_here
      dont_trace_here( 0, False, ret.__code__ )
    except:
      pass

    return ret
Beispiel #2
0
def mk_bits(nbits):
    assert nbits > 0, "We don't allow Bits0"
    # assert nbits < 512, "We don't allow bitwidth to exceed 512."
    if nbits not in _bits_types:
        custom_exec(
            compile(bits_template.format(nbits),
                    filename=f"Bits{nbits}",
                    mode="exec"), globals(), locals())
    return _bits_types[nbits]
Beispiel #3
0
        def gen_wrapped_SCCblk( s, scc, src ):

          # TODO mamba?
          scc_tick_func = SimpleTickPass.gen_tick_function( scc )
          _globals = { 's': s, 'scc_tick_func': scc_tick_func, 'deepcopy': deepcopy,
                       'UpblkCyclicError': UpblkCyclicError }
          _locals  = {}

          custom_exec(py.code.Source( src ).compile(), _globals, _locals)
          return _locals[ 'generated_block' ]
Beispiel #4
0
  def gen_hook_func( top, x, ports, case_file ):
    port_srcs = [ f"'h{{str(x.{p}.to_bits())}}" for p in ports ]

    src =  """
def dump_case():
  if top.sim_cycle_count() > 2: # skip the 2 cycles of reset
    print(f"`T({});", file=case_file, flush=True)
""".format( ",".join(port_srcs) )
    _locals = {}
    custom_exec( py.code.Source(src).compile(), {'top': top, 'x': x, 'case_file': case_file}, _locals)
    return _locals['dump_case']
Beispiel #5
0
def _create_fn(fn_name, args_lst, body_lst, _globals=None):
    # Assemble argument string and body string
    args = ', '.join(args_lst)
    body = '\n'.join(f'  {statement}' for statement in body_lst)

    # Assemble the source code and execute it
    src = f'def {fn_name}({args}):\n{body}'
    if _globals is None: _globals = {}
    _locals = {}
    custom_exec(py.code.Source(src).compile(), _globals, _locals)
    return _locals[fn_name]
Beispiel #6
0
        def compile_scc(i):
            nonlocal scc_id

            scc = SCCs[i]

            if len(scc) == 1:
                return list(scc)[0]

            for x in scc:
                if x in onces:
                    raise UpblkCyclicError("update_once blocks are not allowed to appear in a cycle. \n - " + \
                                    "\n - ".join( [
                                      f"{y.__name__} ({'@update_once' if y in onces else '@update'} " \
                                      f"in 'top.{repr(top.get_update_block_host_component(y))[2:]}')"
                                      for y in scc] ))

            scc_id += 1
            if _DEBUG: print(f"{'='*100}\n SCC{scc_id}\n{'='*100}")

            # For each non-trivial SCC, we need to figure out a intra-SCC
            # linear schedule that minimizes the time to re-execute this SCC
            # due to value changes. A bad schedule may inefficiently execute
            # the SCC for many times, each of which changes a few signals.
            # The current algorithm iteratively finds the "entry block" of
            # the SCC and expand its adjancent blocks. The implementation is
            # to first find the actual entry point, and then BFS to expand the
            # footprint until all nodes are visited.

            tmp_schedule = []
            Q = deque()

            if scc_pred[i] is None:
                # We start bfs from the block that has the least number of input
                # edges in the SCC
                InD = {v: 0 for v in scc}
                for (u, v) in E:  # u -> v
                    if u in scc and v in scc:
                        InD[v] += 1
                Q.append(max(InD, key=InD.get))

            else:
                # We start bfs with the blocks that are successors of the
                # predecessor scc in the previous SCC-level topological sort.
                pred = set(SCCs[scc_pred[i]])
                # Sort by names for a fixed outcome
                for x in sorted(scc, key=lambda x: x.__name__):
                    for v in G_T[
                            x]:  # find reversed edges point back to pred SCC
                        if v in pred:
                            Q.append(x)

            # Perform bfs to find a heuristic schedule
            visited = set(Q)
            while Q:
                u = Q.popleft()
                tmp_schedule.append(u)
                for v in G[u]:
                    if v in scc and v not in visited:
                        Q.append(v)
                        visited.add(v)

            variables = set()
            for (u, v) in E:
                # Collect all variables that triggers other blocks in the SCC
                if u in scc and v in scc:
                    variables.update(constraint_objs[(u, v)])

            if len(variables) == 0:
                raise UpblkCyclicError("There is a cyclic dependency without involving variables."
                                "Probably a loop that involves blocks that should be update_once:\n{}"\
                                .format(", ".join( [ x.__name__ for x in scc] )))

            # generate a loop for scc
            # Shunning: we just simply loop over the whole SCC block
            # TODO performance optimizations using Mamba techniques within a SCC block

            template = """
from copy import deepcopy
def wrapped_SCC_{0}():
  N = 0
  while True:
    N += 1
    if N > 100:
      raise UpblkCyclicError("Combinational loop detected at runtime in {{{4}}} after 100 iters!")
    {1}
    {3}
    {2}
    # print( "SCC block{0} is executed", N, "times" )
    break
generated_block = wrapped_SCC_{0}
"""

            # clean up non-top variables if top is there. For slices of Bits
            # we directly use the top level wide Bits since Bits clone is
            # rpython code

            final_variables = set()

            for x in sorted(variables, key=repr):
                w = x.get_top_level_signal()
                if w is x:
                    final_variables.add(x)
                    continue

                # w is not x
                if issubclass(w._dsl.Type, Bits):
                    if w not in final_variables:
                        final_variables.add(w)
                elif is_bitstruct_class(w._dsl.Type):
                    if w not in final_variables:
                        final_variables.add(x)
                else:
                    final_variables.add(x)

            # also group them by common ancestor to reduce byte code
            # TODO use longest-common-prefix (LCP) algorithms ...

            final_var_host = defaultdict(list)
            for x in final_variables:
                final_var_host[x.get_host_component()].append(x)

            # Then, we generate the Python code that saves variables at the
            # beginning of each SCC iteration and the code that checks if the
            # values of those variables have changed
            copy_srcs = []
            check_srcs = []

            var_id = 0
            for host, var_list in final_var_host.items():
                hostlen = len(repr(host))

                copy_srcs.append(f"host = {host!r}")
                check_srcs.append(f"host = {host!r}")

                sub_check_srcs = []

                for var in var_list:
                    var_id += 1
                    subname = repr(var)[hostlen + 1:]
                    if issubclass(var._dsl.Type, Bits):
                        copy_srcs.append(f"t{var_id}=host.{subname}.clone()")
                    elif is_bitstruct_class(var._dsl.Type):
                        copy_srcs.append(f"t{var_id}=host.{subname}.clone()")
                    else:
                        copy_srcs.append(f"t{var_id}=deepcopy(host.{subname})")

                    sub_check_srcs.append(f"host.{subname} != t{var_id}")

                check_srcs.append(
                    f"if { ' or '.join(sub_check_srcs)}: continue")

            # Divide all blks into meta blocks
            # Branchiness factor is the bound of branchiness in a meta block.
            branchiness_factor = 20
            branchy_block_factor = 6

            num_blks = 0  # sanity check
            cur_meta, cur_br, cur_count = [], 0, 0
            scc_schedule = []

            _globals = {'s': top, 'UpblkCyclicError': UpblkCyclicError}
            blk_srcs = []

            # If there is only 10 blocks, we directly unroll it
            if len(tmp_schedule) < 10:
                blk_srcs = []
                for i, b in enumerate(tmp_schedule):
                    blk_srcs.append(
                        f"blk{i}() # [br {self.branchiness[b]}, loop {int(self.only_loop_at_top[b])}] {b.__name__}"
                    )
                    _globals[f"blk{i}"] = b  # put it into the block's closure

            else:
                for i, blk in enumerate(tmp_schedule):
                    # Same here. If an update block only has top-level loop, br = 0
                    br = 0 if self.only_loop_at_top[blk] else self.branchiness[
                        blk]
                    if cur_br == 0:
                        cur_meta.append(blk)
                        cur_br += br
                        cur_count += (br > 0)
                        if cur_br >= branchiness_factor or cur_count >= branchy_block_factor:
                            num_blks += len(cur_meta)
                            scc_schedule.append(cur_meta)
                            cur_meta, cur_br, cur_count = [], 0, 0  # clear
                    else:
                        if br == 0:
                            # If no branchy block available, directly start a new metablock
                            num_blks += len(cur_meta)
                            scc_schedule.append(cur_meta)
                            cur_meta, cur_br, cur_count = [blk], br, (br > 0)
                        else:
                            cur_meta.append(blk)
                            cur_br += br
                            cur_count += (br > 0)

                            if cur_br + br >= branchiness_factor or cur_count + 1 >= branchy_block_factor:
                                num_blks += len(cur_meta)
                                scc_schedule.append(cur_meta)
                                cur_meta, cur_br, cur_count = [], 0, 0  # clear

                if cur_meta:
                    num_blks += len(cur_meta)
                    scc_schedule.append(cur_meta)

                assert num_blks == len(tmp_schedule), f"Some blocks are missing during trace breaking of SCC "\
                                                      f"({num_blks} compiled, {len(tmp_schedule)} total)"

                blk_srcs = []

                if len(scc_schedule) == 1:
                    for i, b in enumerate(scc_schedule[-1]):
                        blk_srcs.append(
                            f"blk{i}() # [br {self.branchiness[b]}, loop {int(self.only_loop_at_top[b])}] {b.__name__}"
                        )
                        _globals[f"blk{i}"] = b

                else:
                    # TODO we might turn all meta blocks before the last one into meta
                    # blocks, and directly fold the last block into the main loop
                    # for i, meta in enumerate( scc_schedule[:-1] ):
                    # b = self.compile_meta_block( meta )
                    # blk_srcs.append( f"{b.__name__}()" )
                    # _globals[ b.__name__ ] = b

                    # for i, b in enumerate( scc_schedule[-1] ):
                    # blk_srcs.append( f"blk_of_last_meta{i}() # [br {self.branchiness[b]}, loop {int(self.only_loop_at_top[b])}] {b.__name__}" )
                    # _globals[ f"blk_of_last_meta{i}" ] = b

                    for i, meta in enumerate(scc_schedule):
                        b = self.compile_meta_block(meta)
                        blk_srcs.append(f"{b.__name__}()")
                        _globals[b.__name__] = b

            scc_block_src = template.format(
                scc_id, "; ".join(copy_srcs), "\n    ".join(check_srcs),
                '\n    '.join(blk_srcs), ", ".join([x.__name__ for x in scc]))

            if _DEBUG: print(scc_block_src, "\n", "=" * 100)

            _locals = {}
            custom_exec(
                py.code.Source(scc_block_src).compile(), _globals, _locals)
            return _locals['generated_block']
Beispiel #7
0
  def schedule_posedge_flip( self, top ):

    if not hasattr( top, "_sched" ):
      raise Exception( "Please create top._sched pass metadata namespace first!" )

    # To reduce the time to compile the code and the amount of bytecode, I
    # use a heuristic to group signals that belong to
    #   s.x.y.z._flip()
    #   s.x.y.zz._flip()
    # becomes
    #   x = s.x.y
    #   x.z._flip()
    #   x.zz._flip()

    hostobj_signals = defaultdict(list)
    for x in reversed(sorted( top._dsl.all_signals, \
        key=lambda x: x.get_host_component().get_component_level() )):
      if x._dsl.needs_double_buffer:
        hostobj_signals[ x.get_host_component() ].append( x )

    done = False
    while not done:
      next_hostobj_signals = defaultdict(list)
      done = True

      for x, y in hostobj_signals.items():
        if len(y) > 1:
          next_hostobj_signals[x].extend( y )
        elif x is top:
          next_hostobj_signals[x].extend( y )
        else:
          x = x.get_parent_object()
          next_hostobj_signals[x].append( y[0] )
          done = False
      hostobj_signals = next_hostobj_signals

    strs = []
    for x,y in hostobj_signals.items():
      if len(y) == 1:
        strs.append( f"    {repr(y[0])}._flip()" )
      elif x is top:
        for z in sorted(y, key=repr):
          strs.append(f"    {repr(z)}._flip()")
      else:
        repr_x = repr(x)
        pos = len(repr_x) + 1
        strs.append( f"    x = {repr_x}" )

        for z in sorted(y, key=repr):
          strs.append(f"    x.{repr(z)[pos:]}._flip()")

    if not strs:
      def no_double_buffer():
        pass
      top._sched.schedule_posedge_flip = [ no_double_buffer ]

    else:
      lines = ['def compile_double_buffer( s ):'] + \
              ['  def double_buffer():'] + \
                strs + \
              ['  return double_buffer']

      # Shunning: The reason why we replace py.code.Source with exec(compile()) + linecache
      # is because py.code.Source takes a full source code and divide them into
      # a list of lines by newline character which scales very very poorly
      # when the source code is huge. For some designs with 10K+ flip-flops
      # the performance overhead becomes huge.
      l = locals()
      custom_exec( compile( '\n'.join(lines), filename='ff_flips', mode='exec' ), globals(), l)
      linecache.cache['ff_flips'] = (1, None, lines, 'ff_flips')
      top._sched.schedule_posedge_flip = [ l['compile_double_buffer']( top ) ]
Beispiel #8
0
 def compile_net_blk( _globals, src, writer ):
   _locals = {}
   fname = f"Net (writer is {writer!r}"
   custom_exec( compile( src, filename=fname, mode="exec"), _globals, _locals )
   line_cache[ fname ] = (len(src), None, src.splitlines(), fname )
   return list(_locals.values())[0]
Beispiel #9
0
    def schedule_posedge_flip(self, top):

        if not hasattr(top, "_sched"):
            raise Exception(
                "Please create top._sched pass metadata namespace first!")

        # To reduce the time to compile the code and the amount of bytecode, I
        # use a heuristic to group signals that belong to
        #   s.x.y.z._flip()
        #   s.x.y.zz._flip()
        # becomes
        #   x = s.x.y
        #   x.z._flip()
        #   x.zz._flip()

        hostobj_signals = defaultdict(list)
        for x in reversed(sorted( top._dsl.all_signals, \
            key=lambda x: x.get_host_component().get_component_level() )):
            if x._dsl.needs_double_buffer:
                hostobj_signals[x.get_host_component()].append(x)

        done = False
        while not done:
            next_hostobj_signals = defaultdict(list)
            done = True

            for x, y in hostobj_signals.items():
                if len(y) > 1:
                    next_hostobj_signals[x].extend(y)
                elif x is top:
                    next_hostobj_signals[x].extend(y)
                else:
                    x = x.get_parent_object()
                    next_hostobj_signals[x].append(y[0])
                    done = False
            hostobj_signals = next_hostobj_signals

        strs = []
        for x, y in hostobj_signals.items():
            if len(y) == 1:
                strs.append(f"{repr(y[0])}._flip()")
            elif x is top:
                for z in sorted(y, key=repr):
                    strs.append(f"{repr(z)}._flip()")
            else:
                pos = len(repr(x)) + 1
                strs.append(f"x = {repr(x)}")

                for z in sorted(y, key=repr):
                    strs.append(f"x.{repr(z)[pos:]}._flip()")

        if not strs:

            def no_double_buffer():
                pass

            top._sched.schedule_posedge_flip = [no_double_buffer]

        else:
            src = """
      def compile_double_buffer( s ):
        def double_buffer():
          {}
        return double_buffer
      """.format("\n          ".join(strs))

            import py
            # print(src)
            l = locals()
            custom_exec(py.code.Source(src).compile(), globals(), l)

            top._sched.schedule_posedge_flip = [
                l['compile_double_buffer'](top)
            ]
Beispiel #10
0
    def _create_assign_lambda(s, o, lamb):
        assert isinstance(
            o, Signal
        ), "You can only assign(//=) a lambda function to a Wire/InPort/OutPort."

        srcs, line = inspect.getsourcelines(lamb)

        src = compiled_re.sub(r'\2', ''.join(srcs)).lstrip(' ')
        root = ast.parse(src)
        assert isinstance(root, ast.Module) and len(
            root.body) == 1, "We only support single-statement lambda."

        root = root.body[0]
        assert isinstance(root, ast.AugAssign) and isinstance(
            root.op, ast.FloorDiv)

        # lhs, rhs = root.target, root.value
        # Shunning: here we need to use ast from repr(o), because root.target
        # can be "m.in_" in some cases where we actually know what m is but the
        # source code still captures "m"
        lhs, rhs = ast.parse(
            f"s{repr(o)[len(repr(s)):]}").body[0].value, root.value
        lhs.ctx = ast.Store()
        # We expect the lambda to have no argument:
        # {'args': [], 'vararg': None, 'kwonlyargs': [], 'kw_defaults': [], 'kwarg': None, 'defaults': []}
        assert isinstance( rhs, ast.Lambda ) and not rhs.args.args and rhs.args.vararg is None, \
          "The lambda shouldn't contain any argument."

        rhs = rhs.body

        # Compose a new and valid function based on the lambda's lhs and rhs
        # Note that we don't need to add those source code of closure var
        # assignment to linecache. To get the matching line number in the
        # error message, we set the line number of update block
        # Shunning: bugfix:

        blk_name = "_lambda__{}".format(
            repr(o).replace(".",
                            "_").replace("[",
                                         "_").replace("]",
                                                      "_").replace(":", "_"))
        lambda_upblk = ast.FunctionDef(
            name=blk_name,
            args=ast.arguments(args=[],
                               vararg=None,
                               kwonlyargs=[],
                               kw_defaults=[],
                               kwarg=None,
                               defaults=[]),
            body=[
                ast.AugAssign(target=lhs,
                              op=ast.MatMult(),
                              value=rhs,
                              lineno=2,
                              col_offset=6)
            ],
            decorator_list=[],
            returns=None,
            lineno=1,
            col_offset=4,
        )
        lambda_upblk_module = ast.Module(body=[lambda_upblk])

        # Manually wrap the lambda upblk with a closure function that adds the
        # desired variables to the closure of `_lambda__*`
        # We construct AST for the following function to add free variables in the
        # closure of the lambda function to the closure of the generated lambda
        # update block.
        #
        # def closure( lambda_closure ):
        #   <FreeVarName1> = lambda_closure[<Idx1>].cell_contents
        #   <FreeVarName2> = lambda_closure[<Idx2>].cell_contents
        #   ...
        #   <FreeVarNameN> = lambda_closure[<IdxN>].cell_contents
        #   def _lambda__<lambda_blk_name>():
        #     # the assignment statement appears here
        #   return _lambda__<lambda_blk_name>

        new_root = ast.Module(body=[
            ast.FunctionDef(
                name="closure",
                args=ast.arguments(args=[
                    ast.arg(arg="lambda_closure",
                            annotation=None,
                            lineno=1,
                            col_offset=12)
                ],
                                   vararg=None,
                                   kwonlyargs=[],
                                   kw_defaults=[],
                                   kwarg=None,
                                   defaults=[]),
                body=[
                    ast.Assign(
                        targets=[
                            ast.Name(id=var,
                                     ctx=ast.Store(),
                                     lineno=1 + idx,
                                     col_offset=2)
                        ],
                        value=ast.Attribute(
                            value=ast.Subscript(
                                value=ast.Name(
                                    id='lambda_closure',
                                    ctx=ast.Load(),
                                    lineno=1 + idx,
                                    col_offset=5 + len(var),
                                ),
                                slice=ast.Index(value=ast.Num(
                                    n=idx,
                                    lineno=1 + idx,
                                    col_offset=19 + len(var),
                                ), ),
                                ctx=ast.Load(),
                                lineno=1 + idx,
                                col_offset=5 + len(var),
                            ),
                            attr='cell_contents',
                            ctx=ast.Load(),
                            lineno=1 + idx,
                            col_offset=5 + len(var),
                        ),
                        lineno=1 + idx,
                        col_offset=2,
                    ) for idx, var in enumerate(lamb.__code__.co_freevars)
                ] + [lambda_upblk] + [
                    ast.Return(
                        value=ast.Name(
                            id=blk_name,
                            ctx=ast.Load(),
                            lineno=4 + len(lamb.__code__.co_freevars),
                            col_offset=9,
                        ),
                        lineno=4 + len(lamb.__code__.co_freevars),
                        col_offset=2,
                    )
                ],
                decorator_list=[],
                returns=None,
                lineno=1,
                col_offset=0,
            )
        ])

        # In Python 3 we need to supply a dict as local to get the newly
        # compiled function from closure.
        # Then `closure(lamb.__closure__)` returns the lambda update block with
        # the correct free variables in its closure.

        dict_local = {}
        custom_exec(compile(new_root, blk_name, "exec"), lamb.__globals__,
                    dict_local)
        blk = dict_local['closure'](lamb.__closure__)

        # Add the source code to linecache for the compiled function

        new_src = "def {}():\n {}\n".format(blk_name, src.replace("//=", "@="))
        linecache.cache[blk_name] = (len(new_src), None, new_src.splitlines(),
                                     blk_name)

        ComponentLevel1._update(s, blk)

        # This caching here does no caching because the block name contains
        # the signal name intentionally to avoid conflicts. With //= it is
        # more possible than normal update block to have conflicts:
        # if param == 1:  s.out //= s.in_ + 1
        # else:           s.out //= s.out + 100
        # Here these two blocks will implicity have the same name but they
        # have different contents based on different param.
        # So the cache call here is just to reuse the existing interface to
        # register the AST/src of the generated block for elaborate or passes
        # to use.
        s._cache_func_meta(blk,
                           is_update_ff=False,
                           given=("".join(srcs), lambda_upblk_module, line,
                                  inspect.getsourcefile(lamb)))
        return blk
Beispiel #11
0
        # print("[default w/o Mamba] Use Python Bits")
        # The action of a __slots__ declaration is limited to the class where it is defined.
        # As a result, subclasses will have a __dict__ unless they also define __slots__.
        bits_template = """
class Bits{0}(Bits):
  __slots__ = ( "_nbits", "_uint", "_next" )
  nbits = {0}
  def __init__( s, v=0, *, trunc_int=False ):
    return super().__init__( {0}, v, trunc_int )
_bits_types[{0}] = b{0} = Bits{0}
"""

_bitwidths = list(range(1, 256)) + [384, 512]
_bits_types = dict()

custom_exec(
    compile("".join([bits_template.format(nbits) for nbits in _bitwidths]),
            filename="bits_import.py",
            mode="exec"), globals(), locals())


def mk_bits(nbits):
    assert nbits > 0, "We don't allow Bits0"
    # assert nbits < 512, "We don't allow bitwidth to exceed 512."
    if nbits not in _bits_types:
        custom_exec(
            compile(bits_template.format(nbits),
                    filename=f"Bits{nbits}",
                    mode="exec"), globals(), locals())
    return _bits_types[nbits]
Beispiel #12
0
    def lock_in_simulation():
      top._check_called_at_elaborate_top( "lock_in_simulation" )

      # Basically we want to avoid @= between elements in the same net since
      # we now use @=.
      # - First pass creates whole bunch of signals
      signal_object_mapping = {}

      Q = [ (top, top) ]
      while Q:
        current_obj, host = Q.pop()
        if isinstance( current_obj, list ):
          for i, obj in enumerate( current_obj ):
            if isinstance( obj, Signal ):
              try:
                value = obj.default_value()
                if obj._dsl.needs_double_buffer:
                  value <<= value
              except Exception as e:
                raise type(e)(str(e) + f' happens at {obj!r}')

              current_obj[i] = value

              signal_object_mapping[ obj ] = (current_obj, i, True, value)

            elif isinstance( obj, Component ):
              Q.append( (obj, obj) )
            elif isinstance( obj, (Interface, list) ):
              Q.append( (obj, host) )

        elif isinstance( current_obj, NamedObject ):
          for i, obj in current_obj.__dict__.items():
            if i[0] == '_': continue

            if isinstance( obj, Signal ):
              try:
                value = obj.default_value()
                if obj._dsl.needs_double_buffer:
                  value <<= value
              except Exception as e:
                raise type(e)(str(e) + f' happens at {obj!r}')

              setattr( current_obj, i, value )
              signal_object_mapping[obj] = (current_obj, i, False, value)

            elif isinstance( obj, Component ):
              Q.append( (obj, obj) )
            elif isinstance( obj, (Interface, list) ):
              Q.append( (obj, host) )

      # Swap all Signal objects with actual data
      nets = top.get_all_value_nets()

      # First step is to consolidate all non-slice signals in the same net
      # by pointing them to the same object
      # TODO optimize for bitstruct fields. Essentially only sliced signals
      # should be excluded.
      for writer, signals in nets:
        residence = None

        # Find the residence value
        if isinstance( writer, Const ) or writer.is_top_level_signal():
          residence = writer
        else:
          for x in signals:
            if x.is_top_level_signal():
              residence = x
              break

        if residence is None:
          continue # whole net is slice

        if isinstance( residence, Const ):
          residence_value = residence._dsl.const
        else:
          residence_value = signal_object_mapping[ residence ][-1]

        # Replace top-level signals in the net with residence value

        for x in signals:
          if x is not residence and x.is_top_level_signal():
            # swap old value with new residence value

            current_obj, i, is_list, value = signal_object_mapping[ x ]
            signal_object_mapping[ x ] = (current_obj, i, is_list, residence_value)

            if is_list:
              current_obj[i] = residence_value
            else:
              setattr( current_obj, i, residence_value )

      top._sim.signal_object_mapping = signal_object_mapping
      top._sim.locked_simulation = True

      # Add the function that checks if the Bits objects of
      # top-level input ports are modified. If so, it's mostly because
      # the top-level ports are assigned with = instead of @=.

      inports = []
      objs    = []
      for x in top._dsl.all_signals:
        if x.is_input_value_port() and x.is_top_level_signal() and x.get_host_component() is top:
          inports.append( x )
          objs.append( signal_object_mapping[x][-1] )

      src = """
def check_top_level_inports():
  {}
""".format( "\n  ".join([ f"assert {x} is obj{i}, 'Please use @= to assign top level InPort top.{repr(x)[2:]}'"
                            for i, x in enumerate(inports) ]) )
      _locals = {}
      _globals = { f"obj{i}" : x for i, x in enumerate(objs) }
      _globals['s'] = top
      custom_exec( py.code.Source(src).compile(), _globals, _locals)
      top._sim.check_top_level_inports = _locals['check_top_level_inports']