def compile_net_blk(_globals, src): _locals = {} fname = f"Net at {_globals['s']!r}" custom_exec(compile(src, filename=fname, mode="exec"), _globals, _locals) line_cache[fname] = (len(src), None, src.splitlines(), fname) return list(_locals.values())[0]
def _create_fn(fn_name, args_lst, body_lst, _globals=None, class_method=False): # Assemble argument string and body string args = ', '.join(args_lst) body = '\n'.join(f' {statement}' for statement in body_lst) # Assemble the source code and execute it src = '@classmethod\n' if class_method else '' src += f'def {fn_name}({args}):\n{body}' if _globals is None: _globals = {} _locals = {} custom_exec(py.code.Source(src).compile(), _globals, _locals) return _locals[fn_name]
def gen_hook_func(top, x, ports, case_file): port_srcs = [f"'h{{str(to_bits(x.{p}))}}" for p in ports] src = """ def dump_case(): if top.simulated_cycles >= 2: # skip reset print(f"`T({});", file=case_file) """.format(",".join(port_srcs)) _locals = {} custom_exec( py.code.Source(src).compile(), { 'top': top, 'x': x, 'to_bits': to_bits, 'case_file': case_file }, _locals) return _locals['dump_case']
def compile_meta_block(self, blocks): meta_id = self.meta_block_id self.meta_block_id += 1 # Create custom global dict for all blocks inside the meta block _globals = {f"blk{i}": b for i, b in enumerate(blocks)} blk_srcs = [] for i, b in enumerate(blocks): # This is a normal update block if b in self.branchiness: blk_srcs.append( f"blk{i}() # [br {self.branchiness[b]}, loop {int(self.only_loop_at_top[b])}] {b.__name__}" ) # This is an SCC block which has zero BR and is a loop else: blk_srcs.append(f"blk{i}() # {b.__name__}") gen_src = f"def meta_block{meta_id}():\n " gen_src += "\n ".join(blk_srcs) # use custom_exec to compile the meta block _locals = {} custom_exec(py.code.Source(gen_src).compile(), _globals, _locals) ret = _locals[f'meta_block{meta_id}'] if _DEBUG: print(gen_src) # We will use pypyjit.dont_trace_here to compile standalone traces for # each meta block try: from pypyjit import dont_trace_here dont_trace_here(0, False, ret.__code__) except: pass return ret
def schedule_posedge_flip(self, top): if not hasattr(top, "_sched"): raise Exception( "Please create top._sched pass metadata namespace first!") # To reduce the time to compile the code and the amount of bytecode, I # use a heuristic to group signals that belong to # s.x.y.z._flip() # s.x.y.zz._flip() # becomes # x = s.x.y # x.z._flip() # x.zz._flip() hostobj_signals = defaultdict(list) for x in reversed(sorted( top._dsl.all_signals, \ key=lambda x: x.get_host_component().get_component_level() )): if x._dsl.needs_double_buffer: hostobj_signals[x.get_host_component()].append(x) done = False while not done: next_hostobj_signals = defaultdict(list) done = True for x, y in hostobj_signals.items(): if len(y) > 1: next_hostobj_signals[x].extend(y) elif x is top: next_hostobj_signals[x].extend(y) else: x = x.get_parent_object() next_hostobj_signals[x].append(y[0]) done = False hostobj_signals = next_hostobj_signals strs = [] for x, y in hostobj_signals.items(): if len(y) == 1: strs.append(f"{repr(y[0])}._flip()") elif x is top: for z in sorted(y, key=repr): strs.append(f"{repr(z)}._flip()") else: pos = len(repr(x)) + 1 strs.append(f"x = {repr(x)}") for z in sorted(y, key=repr): strs.append(f"x.{repr(z)[pos:]}._flip()") if not strs: def no_double_buffer(): pass top._sched.schedule_posedge_flip = [no_double_buffer] else: src = """ def compile_double_buffer( s ): def double_buffer(): {} return double_buffer """.format("\n ".join(strs)) import py # print(src) l = locals() custom_exec(py.code.Source(src).compile(), globals(), l) top._sched.schedule_posedge_flip = [ l['compile_double_buffer'](top) ]
def compile_scc(i): nonlocal scc_id scc = SCCs[i] if len(scc) == 1: return list(scc)[0] scc_id += 1 if _DEBUG: print(f"{'='*100}\n SCC{scc_id}\n{'='*100}") # For each non-trivial SCC, we need to figure out a intra-SCC # linear schedule that minimizes the time to re-execute this SCC # due to value changes. A bad schedule may inefficiently execute # the SCC for many times, each of which changes a few signals. # The current algorithm iteratively finds the "entry block" of # the SCC and expand its adjancent blocks. The implementation is # to first find the actual entry point, and then BFS to expand the # footprint until all nodes are visited. tmp_schedule = [] Q = deque() if scc_pred[i] is None: # We start bfs from the block that has the least number of input # edges in the SCC InD = {v: 0 for v in scc} for (u, v) in E: # u -> v if u in scc and v in scc: InD[v] += 1 Q.append(max(InD, key=InD.get)) else: # We start bfs with the blocks that are successors of the # predecessor scc in the previous SCC-level topological sort. pred = set(SCCs[scc_pred[i]]) # Sort by names for a fixed outcome for x in sorted(scc, key=lambda x: x.__name__): for v in G_T[ x]: # find reversed edges point back to pred SCC if v in pred: Q.append(x) # Perform bfs to find a heuristic schedule visited = set(Q) while Q: u = Q.popleft() tmp_schedule.append(u) for v in G[u]: if v in scc and v not in visited: Q.append(v) visited.add(v) variables = set() for (u, v) in E: # Collect all variables that triggers other blocks in the SCC if u in scc and v in scc: variables.update(constraint_objs[(u, v)]) if len(variables) == 0: raise Exception( "There is a cyclic dependency without involving variables." "Probably a loop that involves update_once:\n{}".format( ", ".join([x.__name__ for x in scc]))) # generate a loop for scc # Shunning: we just simply loop over the whole SCC block # TODO performance optimizations using Mamba techniques within a SCC block template = """ from copy import deepcopy def wrapped_SCC_{0}(): N = 0 while True: N += 1 if N > 100: raise Exception("Combinational loop detected at runtime in {{{4}}} after 100 iters!") {1} {3} {2} # print( "SCC block{0} is executed", num_iters, "times" ) break generated_block = wrapped_SCC_{0} """ # clean up non-top variables if top is there. For slices of Bits # we directly use the top level wide Bits since Bits clone is # rpython code final_variables = set() for x in sorted(variables, key=repr): w = x.get_top_level_signal() if w is x: final_variables.add(x) continue # w is not x if issubclass(w._dsl.Type, Bits): if w not in final_variables: final_variables.add(w) elif is_bitstruct_class(w._dsl.Type): if w not in final_variables: final_variables.add(x) else: final_variables.add(x) # also group them by common ancestor to reduce byte code # TODO use longest-common-prefix (LCP) algorithms ... final_var_host = defaultdict(list) for x in final_variables: final_var_host[x.get_host_component()].append(x) # Then, we generate the Python code that saves variables at the # beginning of each SCC iteration and the code that checks if the # values of those variables have changed copy_srcs = [] check_srcs = [] var_id = 0 for host, var_list in final_var_host.items(): hostlen = len(repr(host)) copy_srcs.append(f"host = {host!r}") check_srcs.append(f"host = {host!r}") sub_check_srcs = [] for var in var_list: var_id += 1 subname = repr(var)[hostlen + 1:] if issubclass(var._dsl.Type, Bits): copy_srcs.append(f"t{var_id}=host.{subname}.clone()") elif is_bitstruct_class(var._dsl.Type): copy_srcs.append(f"t{var_id}=host.{subname}.clone()") else: copy_srcs.append(f"t{var_id}=deepcopy(host.{subname})") sub_check_srcs.append(f"host.{subname} != t{var_id}") check_srcs.append( f"if { ' or '.join(sub_check_srcs)}: continue") # Divide all blks into meta blocks # Branchiness factor is the bound of branchiness in a meta block. branchiness_factor = 20 branchy_block_factor = 6 num_blks = 0 # sanity check cur_meta, cur_br, cur_count = [], 0, 0 scc_schedule = [] _globals = {'s': top} blk_srcs = [] # If there is only 10 blocks, we directly unroll it if len(tmp_schedule) < 10: blk_srcs = [] for i, b in enumerate(tmp_schedule): blk_srcs.append( f"blk{i}() # [br {self.branchiness[b]}, loop {int(self.only_loop_at_top[b])}] {b.__name__}" ) _globals[f"blk{i}"] = b # put it into the block's closure else: for i, blk in enumerate(tmp_schedule): # Same here. If an update block only has top-level loop, br = 0 br = 0 if self.only_loop_at_top[blk] else self.branchiness[ blk] if cur_br == 0: cur_meta.append(blk) cur_br += br cur_count += (br > 0) if cur_br >= branchiness_factor or cur_count >= branchy_block_factor: num_blks += len(cur_meta) scc_schedule.append(cur_meta) cur_meta, cur_br, cur_count = [], 0, 0 # clear else: if br == 0: # If no branchy block available, directly start a new metablock num_blks += len(cur_meta) scc_schedule.append(cur_meta) cur_meta, cur_br, cur_count = [blk], br, (br > 0) else: cur_meta.append(blk) cur_br += br cur_count += (br > 0) if cur_br + br >= branchiness_factor or cur_count + 1 >= branchy_block_factor: num_blks += len(cur_meta) scc_schedule.append(cur_meta) cur_meta, cur_br, cur_count = [], 0, 0 # clear if cur_meta: num_blks += len(cur_meta) scc_schedule.append(cur_meta) assert num_blks == len(tmp_schedule), f"Some blocks are missing during trace breaking of SCC "\ f"({num_blks} compiled, {len(tmp_schedule)} total)" blk_srcs = [] if len(scc_schedule) == 1: for i, b in enumerate(scc_schedule[-1]): blk_srcs.append( f"blk{i}() # [br {self.branchiness[b]}, loop {int(self.only_loop_at_top[b])}] {b.__name__}" ) _globals[f"blk{i}"] = b else: # TODO we might turn all meta blocks before the last one into meta # blocks, and directly fold the last block into the main loop # for i, meta in enumerate( scc_schedule[:-1] ): # b = self.compile_meta_block( meta ) # blk_srcs.append( f"{b.__name__}()" ) # _globals[ b.__name__ ] = b # for i, b in enumerate( scc_schedule[-1] ): # blk_srcs.append( f"blk_of_last_meta{i}() # [br {self.branchiness[b]}, loop {int(self.only_loop_at_top[b])}] {b.__name__}" ) # _globals[ f"blk_of_last_meta{i}" ] = b for i, meta in enumerate(scc_schedule): b = self.compile_meta_block(meta) blk_srcs.append(f"{b.__name__}()") _globals[b.__name__] = b scc_block_src = template.format( scc_id, "; ".join(copy_srcs), "\n ".join(check_srcs), '\n '.join(blk_srcs), ", ".join([x.__name__ for x in scc])) if _DEBUG: print(scc_block_src, "\n", "=" * 100) _locals = {} custom_exec( py.code.Source(scc_block_src).compile(), _globals, _locals) return _locals['generated_block']
def mk_bits( nbits ): # assert nbits < 512, "We don't allow bitwidth to exceed 512." if nbits not in _bits_types: custom_exec(compile( bits_template.format(nbits), filename=f"Bits{nbits}", mode="exec" ), globals(), locals() ) return _bits_types[nbits]
bits_template = """ class Bits{0}(Bits): nbits = {0} def __new__( cls, value=0 ): return Bits.__new__( cls, {0}, value ) _bits_types[{0}] = b{0} = Bits{0} """ except ImportError: from .PythonBits import Bits # print "[default w/o Mamba] Use Python Bits" bits_template = """ class Bits{0}(Bits): nbits = {0} def __init__( s, value=0 ): return super().__init__( {0}, value ) _bits_types[{0}] = b{0} = Bits{0} """ _bitwidths = list(range(1, 256)) + [ 384, 512 ] _bits_types = dict() custom_exec(compile( "".join([ bits_template.format(nbits) for nbits in _bitwidths ]), filename="bits_import.py", mode="exec"), globals(), locals() ) def mk_bits( nbits ): # assert nbits < 512, "We don't allow bitwidth to exceed 512." if nbits not in _bits_types: custom_exec(compile( bits_template.format(nbits), filename=f"Bits{nbits}", mode="exec" ), globals(), locals() ) return _bits_types[nbits]
def _create_assign_lambda( s, o, lamb ): assert isinstance( o, Signal ), "You can only assign(//=) a lambda function to a Wire/InPort/OutPort." srcs, line = inspect.getsourcelines( lamb ) src = compiled_re.sub( r'\2', ''.join(srcs) ).lstrip(' ') root = ast.parse(src) assert isinstance( root, ast.Module ) and len(root.body) == 1, "We only support single-statement lambda." root = root.body[0] assert isinstance( root, ast.AugAssign ) and isinstance( root.op, ast.FloorDiv ) lhs, rhs = root.target, root.value # We expect the lambda to have no argument: # {'args': [], 'vararg': None, 'kwonlyargs': [], 'kw_defaults': [], 'kwarg': None, 'defaults': []} assert isinstance( rhs, ast.Lambda ) and not rhs.args.args and rhs.args.vararg is None, \ "The lambda shouldn't contain any argument." rhs = rhs.body # Compose a new and valid function based on the lambda's lhs and rhs # Note that we don't need to add those source code of closure var # assignment to linecache. To get the matching line number in the # error message, we set the line number of update block # Shunning: bugfix: blk_name = "_lambda__{}".format( repr(o).replace(".","_").replace("[", "_").replace("]", "_").replace(":", "_") ) lambda_upblk = ast.FunctionDef( name=blk_name, args=ast.arguments(args=[], vararg=None, kwonlyargs=[], kw_defaults=[], kwarg=None, defaults=[]), body=[ast.Assign(targets=[lhs], value=rhs, lineno=2, col_offset=6)], decorator_list=[], returns=None, lineno=1, col_offset=4, ) lambda_upblk_module = ast.Module(body=[ lambda_upblk ]) # Manually wrap the lambda upblk with a closure function that adds the # desired variables to the closure of `_lambda__*` # We construct AST for the following function to add free variables in the # closure of the lambda function to the closure of the generated lambda # update block. # # def closure( lambda_closure ): # <FreeVarName1> = lambda_closure[<Idx1>].cell_contents # <FreeVarName2> = lambda_closure[<Idx2>].cell_contents # ... # <FreeVarNameN> = lambda_closure[<IdxN>].cell_contents # def _lambda__<lambda_blk_name>(): # # the assignment statement appears here # return _lambda__<lambda_blk_name> new_root = ast.Module( body=[ ast.FunctionDef( name="closure", args=ast.arguments(args=[ast.arg(arg="lambda_closure", annotation=None, lineno=1, col_offset=12)], vararg=None, kwonlyargs=[], kw_defaults=[], kwarg=None, defaults=[]), body=[ ast.Assign( targets=[ast.Name(id=var, ctx=ast.Store(), lineno=1+idx, col_offset=2)], value=ast.Attribute( value=ast.Subscript( value=ast.Name( id='lambda_closure', ctx=ast.Load(), lineno=1+idx, col_offset=5+len(var), ), slice=ast.Index( value=ast.Num( n=idx, lineno=1+idx, col_offset=19+len(var), ), ), ctx=ast.Load(), lineno=1+idx, col_offset=5+len(var), ), attr='cell_contents', ctx=ast.Load(), lineno=1+idx, col_offset=5+len(var), ), lineno=1+idx, col_offset=2, ) for idx, var in enumerate(lamb.__code__.co_freevars) ] + [ lambda_upblk ] + [ ast.Return( value=ast.Name( id=blk_name, ctx=ast.Load(), lineno=4+len(lamb.__code__.co_freevars), col_offset=9, ), lineno=4+len(lamb.__code__.co_freevars), col_offset=2, ) ], decorator_list=[], returns=None, lineno=1, col_offset=0, ) ] ) # In Python 3 we need to supply a dict as local to get the newly # compiled function from closure. # Then `closure(lamb.__closure__)` returns the lambda update block with # the correct free variables in its closure. dict_local = {} custom_exec( compile(new_root, blk_name, "exec"), lamb.__globals__, dict_local ) blk = dict_local[ 'closure' ]( lamb.__closure__ ) # Add the source code to linecache for the compiled function new_src = "def {}():\n {}\n".format( blk_name, src.replace("//=", "=") ) linecache.cache[ blk_name ] = (len(new_src), None, new_src.splitlines(), blk_name) ComponentLevel1.update( s, blk ) # This caching here does no caching because the block name contains # the signal name intentionally to avoid conflicts. With //= it is # more possible than normal update block to have conflicts: # if param == 1: s.out //= s.in_ + 1 # else: s.out //= s.out + 100 # Here these two blocks will implicity have the same name but they # have different contents based on different param. # So the cache call here is just to reuse the existing interface to # register the AST/src of the generated block for elaborate or passes # to use. s._cache_func_meta( blk, is_update_ff=False, given=("".join(srcs), lambda_upblk_module, line, inspect.getsourcefile( lamb )) ) return blk