Example #1
0
 def compile_net_blk(_globals, src):
     _locals = {}
     fname = f"Net at {_globals['s']!r}"
     custom_exec(compile(src, filename=fname, mode="exec"), _globals,
                 _locals)
     line_cache[fname] = (len(src), None, src.splitlines(), fname)
     return list(_locals.values())[0]
Example #2
0
def _create_fn(fn_name, args_lst, body_lst, _globals=None, class_method=False):
    # Assemble argument string and body string
    args = ', '.join(args_lst)
    body = '\n'.join(f'  {statement}' for statement in body_lst)

    # Assemble the source code and execute it
    src = '@classmethod\n' if class_method else ''
    src += f'def {fn_name}({args}):\n{body}'
    if _globals is None: _globals = {}
    _locals = {}
    custom_exec(py.code.Source(src).compile(), _globals, _locals)
    return _locals[fn_name]
Example #3
0
    def gen_hook_func(top, x, ports, case_file):
        port_srcs = [f"'h{{str(to_bits(x.{p}))}}" for p in ports]

        src = """
def dump_case():
  if top.simulated_cycles >= 2: # skip reset
    print(f"`T({});", file=case_file)
""".format(",".join(port_srcs))
        _locals = {}
        custom_exec(
            py.code.Source(src).compile(), {
                'top': top,
                'x': x,
                'to_bits': to_bits,
                'case_file': case_file
            }, _locals)
        return _locals['dump_case']
Example #4
0
    def compile_meta_block(self, blocks):

        meta_id = self.meta_block_id
        self.meta_block_id += 1

        # Create custom global dict for all blocks inside the meta block
        _globals = {f"blk{i}": b for i, b in enumerate(blocks)}

        blk_srcs = []
        for i, b in enumerate(blocks):
            # This is a normal update block
            if b in self.branchiness:
                blk_srcs.append(
                    f"blk{i}() # [br {self.branchiness[b]}, loop {int(self.only_loop_at_top[b])}] {b.__name__}"
                )
            # This is an SCC block which has zero BR and is a loop
            else:
                blk_srcs.append(f"blk{i}() # {b.__name__}")

        gen_src = f"def meta_block{meta_id}():\n  "
        gen_src += "\n  ".join(blk_srcs)

        # use custom_exec to compile the meta block
        _locals = {}
        custom_exec(py.code.Source(gen_src).compile(), _globals, _locals)
        ret = _locals[f'meta_block{meta_id}']
        if _DEBUG: print(gen_src)

        # We will use pypyjit.dont_trace_here to compile standalone traces for
        # each meta block
        try:
            from pypyjit import dont_trace_here
            dont_trace_here(0, False, ret.__code__)
        except:
            pass

        return ret
Example #5
0
    def schedule_posedge_flip(self, top):

        if not hasattr(top, "_sched"):
            raise Exception(
                "Please create top._sched pass metadata namespace first!")

        # To reduce the time to compile the code and the amount of bytecode, I
        # use a heuristic to group signals that belong to
        #   s.x.y.z._flip()
        #   s.x.y.zz._flip()
        # becomes
        #   x = s.x.y
        #   x.z._flip()
        #   x.zz._flip()

        hostobj_signals = defaultdict(list)
        for x in reversed(sorted( top._dsl.all_signals, \
            key=lambda x: x.get_host_component().get_component_level() )):
            if x._dsl.needs_double_buffer:
                hostobj_signals[x.get_host_component()].append(x)

        done = False
        while not done:
            next_hostobj_signals = defaultdict(list)
            done = True

            for x, y in hostobj_signals.items():
                if len(y) > 1:
                    next_hostobj_signals[x].extend(y)
                elif x is top:
                    next_hostobj_signals[x].extend(y)
                else:
                    x = x.get_parent_object()
                    next_hostobj_signals[x].append(y[0])
                    done = False
            hostobj_signals = next_hostobj_signals

        strs = []
        for x, y in hostobj_signals.items():
            if len(y) == 1:
                strs.append(f"{repr(y[0])}._flip()")
            elif x is top:
                for z in sorted(y, key=repr):
                    strs.append(f"{repr(z)}._flip()")
            else:
                pos = len(repr(x)) + 1
                strs.append(f"x = {repr(x)}")

                for z in sorted(y, key=repr):
                    strs.append(f"x.{repr(z)[pos:]}._flip()")

        if not strs:

            def no_double_buffer():
                pass

            top._sched.schedule_posedge_flip = [no_double_buffer]

        else:
            src = """
      def compile_double_buffer( s ):
        def double_buffer():
          {}
        return double_buffer
      """.format("\n          ".join(strs))

            import py
            # print(src)
            l = locals()
            custom_exec(py.code.Source(src).compile(), globals(), l)

            top._sched.schedule_posedge_flip = [
                l['compile_double_buffer'](top)
            ]
Example #6
0
        def compile_scc(i):
            nonlocal scc_id

            scc = SCCs[i]

            if len(scc) == 1:
                return list(scc)[0]

            scc_id += 1
            if _DEBUG: print(f"{'='*100}\n SCC{scc_id}\n{'='*100}")

            # For each non-trivial SCC, we need to figure out a intra-SCC
            # linear schedule that minimizes the time to re-execute this SCC
            # due to value changes. A bad schedule may inefficiently execute
            # the SCC for many times, each of which changes a few signals.
            # The current algorithm iteratively finds the "entry block" of
            # the SCC and expand its adjancent blocks. The implementation is
            # to first find the actual entry point, and then BFS to expand the
            # footprint until all nodes are visited.

            tmp_schedule = []
            Q = deque()

            if scc_pred[i] is None:
                # We start bfs from the block that has the least number of input
                # edges in the SCC
                InD = {v: 0 for v in scc}
                for (u, v) in E:  # u -> v
                    if u in scc and v in scc:
                        InD[v] += 1
                Q.append(max(InD, key=InD.get))

            else:
                # We start bfs with the blocks that are successors of the
                # predecessor scc in the previous SCC-level topological sort.
                pred = set(SCCs[scc_pred[i]])
                # Sort by names for a fixed outcome
                for x in sorted(scc, key=lambda x: x.__name__):
                    for v in G_T[
                            x]:  # find reversed edges point back to pred SCC
                        if v in pred:
                            Q.append(x)

            # Perform bfs to find a heuristic schedule
            visited = set(Q)
            while Q:
                u = Q.popleft()
                tmp_schedule.append(u)
                for v in G[u]:
                    if v in scc and v not in visited:
                        Q.append(v)
                        visited.add(v)

            variables = set()
            for (u, v) in E:
                # Collect all variables that triggers other blocks in the SCC
                if u in scc and v in scc:
                    variables.update(constraint_objs[(u, v)])

            if len(variables) == 0:
                raise Exception(
                    "There is a cyclic dependency without involving variables."
                    "Probably a loop that involves update_once:\n{}".format(
                        ", ".join([x.__name__ for x in scc])))

            # generate a loop for scc
            # Shunning: we just simply loop over the whole SCC block
            # TODO performance optimizations using Mamba techniques within a SCC block

            template = """
from copy import deepcopy
def wrapped_SCC_{0}():
  N = 0
  while True:
    N += 1
    if N > 100:
      raise Exception("Combinational loop detected at runtime in {{{4}}} after 100 iters!")
    {1}
    {3}
    {2}
    # print( "SCC block{0} is executed", num_iters, "times" )
    break
generated_block = wrapped_SCC_{0}
"""

            # clean up non-top variables if top is there. For slices of Bits
            # we directly use the top level wide Bits since Bits clone is
            # rpython code

            final_variables = set()

            for x in sorted(variables, key=repr):
                w = x.get_top_level_signal()
                if w is x:
                    final_variables.add(x)
                    continue

                # w is not x
                if issubclass(w._dsl.Type, Bits):
                    if w not in final_variables:
                        final_variables.add(w)
                elif is_bitstruct_class(w._dsl.Type):
                    if w not in final_variables:
                        final_variables.add(x)
                else:
                    final_variables.add(x)

            # also group them by common ancestor to reduce byte code
            # TODO use longest-common-prefix (LCP) algorithms ...

            final_var_host = defaultdict(list)
            for x in final_variables:
                final_var_host[x.get_host_component()].append(x)

            # Then, we generate the Python code that saves variables at the
            # beginning of each SCC iteration and the code that checks if the
            # values of those variables have changed
            copy_srcs = []
            check_srcs = []

            var_id = 0
            for host, var_list in final_var_host.items():
                hostlen = len(repr(host))

                copy_srcs.append(f"host = {host!r}")
                check_srcs.append(f"host = {host!r}")

                sub_check_srcs = []

                for var in var_list:
                    var_id += 1
                    subname = repr(var)[hostlen + 1:]
                    if issubclass(var._dsl.Type, Bits):
                        copy_srcs.append(f"t{var_id}=host.{subname}.clone()")
                    elif is_bitstruct_class(var._dsl.Type):
                        copy_srcs.append(f"t{var_id}=host.{subname}.clone()")
                    else:
                        copy_srcs.append(f"t{var_id}=deepcopy(host.{subname})")

                    sub_check_srcs.append(f"host.{subname} != t{var_id}")

                check_srcs.append(
                    f"if { ' or '.join(sub_check_srcs)}: continue")

            # Divide all blks into meta blocks
            # Branchiness factor is the bound of branchiness in a meta block.
            branchiness_factor = 20
            branchy_block_factor = 6

            num_blks = 0  # sanity check
            cur_meta, cur_br, cur_count = [], 0, 0
            scc_schedule = []

            _globals = {'s': top}
            blk_srcs = []

            # If there is only 10 blocks, we directly unroll it
            if len(tmp_schedule) < 10:
                blk_srcs = []
                for i, b in enumerate(tmp_schedule):
                    blk_srcs.append(
                        f"blk{i}() # [br {self.branchiness[b]}, loop {int(self.only_loop_at_top[b])}] {b.__name__}"
                    )
                    _globals[f"blk{i}"] = b  # put it into the block's closure

            else:
                for i, blk in enumerate(tmp_schedule):
                    # Same here. If an update block only has top-level loop, br = 0
                    br = 0 if self.only_loop_at_top[blk] else self.branchiness[
                        blk]
                    if cur_br == 0:
                        cur_meta.append(blk)
                        cur_br += br
                        cur_count += (br > 0)
                        if cur_br >= branchiness_factor or cur_count >= branchy_block_factor:
                            num_blks += len(cur_meta)
                            scc_schedule.append(cur_meta)
                            cur_meta, cur_br, cur_count = [], 0, 0  # clear
                    else:
                        if br == 0:
                            # If no branchy block available, directly start a new metablock
                            num_blks += len(cur_meta)
                            scc_schedule.append(cur_meta)
                            cur_meta, cur_br, cur_count = [blk], br, (br > 0)
                        else:
                            cur_meta.append(blk)
                            cur_br += br
                            cur_count += (br > 0)

                            if cur_br + br >= branchiness_factor or cur_count + 1 >= branchy_block_factor:
                                num_blks += len(cur_meta)
                                scc_schedule.append(cur_meta)
                                cur_meta, cur_br, cur_count = [], 0, 0  # clear

                if cur_meta:
                    num_blks += len(cur_meta)
                    scc_schedule.append(cur_meta)

                assert num_blks == len(tmp_schedule), f"Some blocks are missing during trace breaking of SCC "\
                                                      f"({num_blks} compiled, {len(tmp_schedule)} total)"

                blk_srcs = []

                if len(scc_schedule) == 1:
                    for i, b in enumerate(scc_schedule[-1]):
                        blk_srcs.append(
                            f"blk{i}() # [br {self.branchiness[b]}, loop {int(self.only_loop_at_top[b])}] {b.__name__}"
                        )
                        _globals[f"blk{i}"] = b

                else:
                    # TODO we might turn all meta blocks before the last one into meta
                    # blocks, and directly fold the last block into the main loop
                    # for i, meta in enumerate( scc_schedule[:-1] ):
                    # b = self.compile_meta_block( meta )
                    # blk_srcs.append( f"{b.__name__}()" )
                    # _globals[ b.__name__ ] = b

                    # for i, b in enumerate( scc_schedule[-1] ):
                    # blk_srcs.append( f"blk_of_last_meta{i}() # [br {self.branchiness[b]}, loop {int(self.only_loop_at_top[b])}] {b.__name__}" )
                    # _globals[ f"blk_of_last_meta{i}" ] = b

                    for i, meta in enumerate(scc_schedule):
                        b = self.compile_meta_block(meta)
                        blk_srcs.append(f"{b.__name__}()")
                        _globals[b.__name__] = b

            scc_block_src = template.format(
                scc_id, "; ".join(copy_srcs), "\n    ".join(check_srcs),
                '\n    '.join(blk_srcs), ", ".join([x.__name__ for x in scc]))

            if _DEBUG: print(scc_block_src, "\n", "=" * 100)

            _locals = {}
            custom_exec(
                py.code.Source(scc_block_src).compile(), _globals, _locals)
            return _locals['generated_block']
Example #7
0
def mk_bits( nbits ):
  # assert nbits < 512, "We don't allow bitwidth to exceed 512."
  if nbits not in _bits_types:
    custom_exec(compile( bits_template.format(nbits), filename=f"Bits{nbits}", mode="exec" ),
                globals(), locals() )
  return _bits_types[nbits]
Example #8
0
    bits_template = """
class Bits{0}(Bits):
  nbits = {0}
  def __new__( cls, value=0 ):
    return Bits.__new__( cls, {0}, value )
_bits_types[{0}] = b{0} = Bits{0}
"""
  except ImportError:
    from .PythonBits import Bits
    # print "[default w/o Mamba] Use Python Bits"
    bits_template = """
class Bits{0}(Bits):
  nbits = {0}
  def __init__( s, value=0 ):
    return super().__init__( {0}, value )
_bits_types[{0}] = b{0} = Bits{0}
"""

_bitwidths  = list(range(1, 256)) + [ 384, 512 ]
_bits_types = dict()

custom_exec(compile( "".join([ bits_template.format(nbits) for nbits in _bitwidths ]),
                     filename="bits_import.py", mode="exec"), globals(), locals() )

def mk_bits( nbits ):
  # assert nbits < 512, "We don't allow bitwidth to exceed 512."
  if nbits not in _bits_types:
    custom_exec(compile( bits_template.format(nbits), filename=f"Bits{nbits}", mode="exec" ),
                globals(), locals() )
  return _bits_types[nbits]
Example #9
0
  def _create_assign_lambda( s, o, lamb ):
    assert isinstance( o, Signal ), "You can only assign(//=) a lambda function to a Wire/InPort/OutPort."

    srcs, line = inspect.getsourcelines( lamb )

    src  = compiled_re.sub( r'\2', ''.join(srcs) ).lstrip(' ')
    root = ast.parse(src)
    assert isinstance( root, ast.Module ) and len(root.body) == 1, "We only support single-statement lambda."

    root = root.body[0]
    assert isinstance( root, ast.AugAssign ) and isinstance( root.op, ast.FloorDiv )

    lhs, rhs = root.target, root.value
    # We expect the lambda to have no argument:
    # {'args': [], 'vararg': None, 'kwonlyargs': [], 'kw_defaults': [], 'kwarg': None, 'defaults': []}
    assert isinstance( rhs, ast.Lambda ) and not rhs.args.args and rhs.args.vararg is None, \
      "The lambda shouldn't contain any argument."

    rhs = rhs.body

    # Compose a new and valid function based on the lambda's lhs and rhs
    # Note that we don't need to add those source code of closure var
    # assignment to linecache. To get the matching line number in the
    # error message, we set the line number of update block
    # Shunning: bugfix:

    blk_name = "_lambda__{}".format( repr(o).replace(".","_").replace("[", "_").replace("]", "_").replace(":", "_") )
    lambda_upblk = ast.FunctionDef(
      name=blk_name,
      args=ast.arguments(args=[], vararg=None, kwonlyargs=[], kw_defaults=[], kwarg=None, defaults=[]),
      body=[ast.Assign(targets=[lhs], value=rhs, lineno=2, col_offset=6)],
      decorator_list=[],
      returns=None,
      lineno=1, col_offset=4,
    )
    lambda_upblk_module = ast.Module(body=[ lambda_upblk ])

    # Manually wrap the lambda upblk with a closure function that adds the
    # desired variables to the closure of `_lambda__*`
    # We construct AST for the following function to add free variables in the
    # closure of the lambda function to the closure of the generated lambda
    # update block.
    #
    # def closure( lambda_closure ):
    #   <FreeVarName1> = lambda_closure[<Idx1>].cell_contents
    #   <FreeVarName2> = lambda_closure[<Idx2>].cell_contents
    #   ...
    #   <FreeVarNameN> = lambda_closure[<IdxN>].cell_contents
    #   def _lambda__<lambda_blk_name>():
    #     # the assignment statement appears here
    #   return _lambda__<lambda_blk_name>

    new_root = ast.Module( body=[
      ast.FunctionDef(
          name="closure",
          args=ast.arguments(args=[ast.arg(arg="lambda_closure", annotation=None, lineno=1, col_offset=12)],
                             vararg=None, kwonlyargs=[], kw_defaults=[], kwarg=None, defaults=[]),
          body=[
            ast.Assign(
              targets=[ast.Name(id=var, ctx=ast.Store(), lineno=1+idx, col_offset=2)],
              value=ast.Attribute(
                value=ast.Subscript(
                  value=ast.Name(
                    id='lambda_closure',
                    ctx=ast.Load(),
                    lineno=1+idx, col_offset=5+len(var),
                  ),
                  slice=ast.Index(
                    value=ast.Num(
                      n=idx,
                      lineno=1+idx, col_offset=19+len(var),
                    ),
                  ),
                  ctx=ast.Load(),
                  lineno=1+idx, col_offset=5+len(var),
                ),
                attr='cell_contents',
                ctx=ast.Load(),
                lineno=1+idx, col_offset=5+len(var),
              ),
              lineno=1+idx, col_offset=2,
            ) for idx, var in enumerate(lamb.__code__.co_freevars)
          ] + [ lambda_upblk ] + [
            ast.Return(
              value=ast.Name(
                id=blk_name,
                ctx=ast.Load(),
                lineno=4+len(lamb.__code__.co_freevars), col_offset=9,
              ),
              lineno=4+len(lamb.__code__.co_freevars), col_offset=2,
            )
          ],
          decorator_list=[],
          returns=None,
          lineno=1, col_offset=0,
        )
    ] )

    # In Python 3 we need to supply a dict as local to get the newly
    # compiled function from closure.
    # Then `closure(lamb.__closure__)` returns the lambda update block with
    # the correct free variables in its closure.

    dict_local = {}
    custom_exec( compile(new_root, blk_name, "exec"), lamb.__globals__, dict_local )
    blk = dict_local[ 'closure' ]( lamb.__closure__ )

    # Add the source code to linecache for the compiled function

    new_src = "def {}():\n {}\n".format( blk_name, src.replace("//=", "=") )
    linecache.cache[ blk_name ] = (len(new_src), None, new_src.splitlines(), blk_name)

    ComponentLevel1.update( s, blk )

    # This caching here does no caching because the block name contains
    # the signal name intentionally to avoid conflicts. With //= it is
    # more possible than normal update block to have conflicts:
    # if param == 1:  s.out //= s.in_ + 1
    # else:           s.out //= s.out + 100
    # Here these two blocks will implicity have the same name but they
    # have different contents based on different param.
    # So the cache call here is just to reuse the existing interface to
    # register the AST/src of the generated block for elaborate or passes
    # to use.
    s._cache_func_meta( blk, is_update_ff=False,
      given=("".join(srcs), lambda_upblk_module, line, inspect.getsourcefile( lamb )) )
    return blk