def print_code(stencil: core.Stencil, host_file: TextIO) -> None: logger.info('generate host source code as %s' % host_file.name) printer = util.CppPrinter(host_file) print_define = lambda key, value: util.print_define(printer, key, value) print_header(printer) all_stmts = stencil.input_stmts + stencil.output_stmts + stencil.param_stmts printer.printlns( 'namespace soda {', 'namespace app {', '// app-specific constants', *(f'constexpr int {STENCIL_DIM_FMT[i]} = {d};' for i, d in enumerate(core.get_stencil_dim(stencil.stencil_window))), f'constexpr int kStencilDistance = {stencil.stencil_distance};', *(f'constexpr int {WIDTH_FMT[x.name]} = {x.width_in_bits};' for x in all_stmts), '', '// type alias', *(f'using {TYPE_FMT[x.name]} = {x.c_type};' for x in all_stmts), '', ) print_func(printer, stencil) printer.printlns( '} // namespace app', '} // namespace soda', '', ) print_test(printer, stencil)
def print_code(stencil, header_file): logger.info('generate host header code as %s' % header_file.name) printer = util.CppPrinter(header_file) println = printer.println do_indent = printer.do_indent un_indent = printer.un_indent println('#ifndef HALIDE_%s_H_' % stencil.app_name.upper()) println('#define HALIDE_%s_H_' % stencil.app_name.upper()) println() println('#ifndef HALIDE_ATTRIBUTE_ALIGN') do_indent() println('#ifdef _MSC_VER') do_indent() println('#define HALIDE_ATTRIBUTE_ALIGN(x) __declspec(align(x))') un_indent() println('#else') do_indent() println('#define HALIDE_ATTRIBUTE_ALIGN(x) __attribute__((aligned(x)))') un_indent() println('#endif') un_indent() println('#endif//HALIDE_ATTRIBUTE_ALIGN') println() println('#ifndef BUFFER_T_DEFINED') println('#define BUFFER_T_DEFINED') println('#include<stdbool.h>') println('#include<stdint.h>') println('typedef struct buffer_t {') do_indent() println('uint64_t dev;') println('uint8_t* host;') println('int32_t extent[4];') println('int32_t stride[4];') println('int32_t min[4];') println('int32_t elem_size;') println('HALIDE_ATTRIBUTE_ALIGN(1) bool host_dirty;') println('HALIDE_ATTRIBUTE_ALIGN(1) bool dev_dirty;') println('HALIDE_ATTRIBUTE_ALIGN(1) uint8_t _padding[10 - sizeof(void *)];') un_indent() println('} buffer_t;') println('#endif//BUFFER_T_DEFINED') println() println('#ifndef HALIDE_FUNCTION_ATTRS') println('#define HALIDE_FUNCTION_ATTRS') println('#endif//HALIDE_FUNCTION_ATTRS') println() tensors = stencil.input_names + stencil.output_names + stencil.param_names println('int {}({}const char* xclbin) HALIDE_FUNCTION_ATTRS;'.format( stencil.app_name, ''.join(map('buffer_t *var_{}_buffer, '.format, tensors)))) println() println('#endif//HALIDE_%s_H_' % stencil.app_name.upper()) println()
def print_code(stencil: core.Stencil, output_file: TextIO) -> None: """Prints the top-level code with the given arguments. Prints the OpenCL kernels with proper pragmas and channel declarations and references. Args: stencil: Stencil object to print. output_file: TextIO to write to. """ _logger.info('generate kernel code as %s' % output_file.name) printer = util.CppPrinter(output_file) println = printer.println println('#include <ihc_apint.h>', indent=0) println() println('#pragma OPENCL EXTENSION cl_intel_channels : enable', indent=0) println() # internal fifos super_source = stencil.dataflow_super_source for node in super_source.tpo_valid_node_gen(): for fifo in node.fifos: println(f'channel {fifo.cl_type} {fifo.cl_expr};') println() # in generated bitstream, kernels are sorted in alphabetical order # SODA relies on the correct ordering for memory channels of each tensor # so here we make sure the kernel names in alphabetical order width = len(str(sum(1 for _ in super_source.tpo_valid_node_gen()) - 1)) instance_idx: Dict[int, int] = collections.defaultdict(int) overall_idx = 0 for node in super_source.tpo_valid_node_gen(): module_trait, module_trait_id = super_source.module_table[node] print_kernel( f'{stencil.app_name}_{overall_idx:0{width}}_' f'module_{module_trait_id}_' f'instance_{instance_idx[module_trait_id]}', printer, node, module_trait, module_trait_id, burst_width=stencil.burst_width, ) instance_idx[module_trait_id] += 1 overall_idx += 1 println()
def print_code( stencil: core.Stencil, output_file: IO[str], interface: str = SUPPORTED_INTERFACES[0], ): _check_interface(interface) _logger.info('generate kernel code as %s' % output_file.name) printer = util.CppPrinter(output_file) print_header(printer, interface) if interface in {'m_axi', 'axis'}: printer.printlns( '#ifdef __SYNTHESIS__', '#warning this file should be used for simulation only', '#warning synthesis result may be sub-optimal', '#endif // __SYNTHESIS__', '', ) printer.printlns( '// this file can be generated from the following SODA DSL', f'/*\n{stencil}\n*/', '', '// stencil window size:' f' {tuple(core.get_stencil_dim(stencil.stencil_window))}', f'// stencil distace: {stencil.stencil_distance}', '// data layout is documented at', '// https://github.com/Blaok/soda/blob/master/docs/data-layout.md', '', ) if interface in {'m_axi', 'axis'}: _print_reinterpret(printer) if interface == 'm_axi': _print_data_struct(printer) _print_read_data_m_axi(printer) _print_write_data_m_axi(printer) _print_burst_read_m_axi(printer) _print_burst_write_m_axi(printer) elif interface == 'axis': _print_read_data_axis(printer) _print_write_data_axis(printer) for module_trait_id, module_trait in enumerate(stencil.module_traits): print_module_definition( printer, module_trait, module_trait_id, stencil.burst_width, interface, ) outputs = [] inputs = [] for stmt in stencil.output_stmts: for bank in stmt.dram: outputs.append((stmt.name, stmt.haoda_type, bank)) for stmt in stencil.input_stmts: for bank in stmt.dram: inputs.append((stmt.name, stmt.haoda_type, bank)) for stmt in stencil.param_stmts: inputs.append(('var_%s' % stmt.name, stmt.type, 0)) _print_interface(printer, stencil, inputs, outputs, stencil.dataflow_super_source, interface)
def print_code( stencil: core.Stencil, xo_file: IO[bytes], device_info: Dict[str, str], jobs: Optional[int] = os.cpu_count(), rpt_file: Optional[str] = None, interface: str = 'm_axi', ) -> None: """Generate hardware object file for the given Stencil. Working `vivado` and `vivado_hls` is required in the PATH. Args: stencil: Stencil object to generate from. xo_file: file object to write to. device_info: dict of 'part_num' and 'clock_period'. jobs: maximum number of jobs running in parallel. rpt_file: path of the generated report; None disables report generation. interface: interface type, supported values are 'm_axi' and 'axis'. """ iface_names = [] # for axis m_axi_names = [] # for m_axi inputs = [] outputs = [] for stmt in stencil.output_stmts: for bank in stmt.dram: port_name = util.get_port_name(stmt.name, bank) bundle_name = util.get_bundle_name(stmt.name, bank) iface_names.append(port_name) m_axi_names.append(bundle_name) outputs.append((port_name, bundle_name, stencil.burst_width, util.get_port_buf_name(stmt.name, bank))) for stmt in stencil.input_stmts: for bank in stmt.dram: port_name = util.get_port_name(stmt.name, bank) bundle_name = util.get_bundle_name(stmt.name, bank) iface_names.append(port_name) m_axi_names.append(bundle_name) inputs.append((port_name, bundle_name, stencil.burst_width, util.get_port_buf_name(stmt.name, bank))) top_name = stencil.kernel_name with tempfile.TemporaryDirectory(prefix='sodac-xrtl-') as tmpdir: kernel_xml = os.path.join(tmpdir, 'kernel.xml') with open(kernel_xml, 'w') as kernel_xml_obj: print_kernel_xml(top_name, inputs, outputs, kernel_xml_obj, interface) kernel_file = os.path.join(tmpdir, 'kernel.cpp') with open(kernel_file, 'w') as kernel_fileobj: hls_kernel.print_code(stencil, kernel_fileobj) args = [] for module_trait_id, module_trait in enumerate(stencil.module_traits): sio = io.StringIO() hls_kernel.print_module_definition(util.CppPrinter(sio), module_trait, module_trait_id, burst_width=stencil.burst_width) args.append( (len(sio.getvalue()), synthesis_module, tmpdir, [kernel_file], util.get_func_name(module_trait_id), device_info)) if interface == 'm_axi': sio = io.StringIO() print_dataflow_hls_interface(util.CppPrinter(sio), top_name, inputs, outputs) dataflow_kernel = os.path.join(tmpdir, 'dataflow_kernel.cpp') with open(dataflow_kernel, 'w') as dataflow_kernel_obj: dataflow_kernel_obj.write(sio.getvalue()) args.append((len(sio.getvalue()), synthesis_module, tmpdir, [dataflow_kernel], top_name, device_info)) args.sort(key=lambda x: x[0], reverse=True) super_source = stencil.dataflow_super_source job_server = util.release_job_slot() with concurrent.futures.ThreadPoolExecutor( max_workers=jobs) as executor: threads = [executor.submit(*x[1:]) for x in args] for future in concurrent.futures.as_completed(threads): returncode, stdout, stderr = future.result() log_func = _logger.error if returncode != 0 else _logger.debug if stdout: log_func(stdout.decode()) if stderr: log_func(stderr.decode()) if returncode != 0: util.pause_for_debugging() sys.exit(returncode) util.acquire_job_slot(job_server) # generate HLS report depths: Dict[int, int] = {} hls_resources = hls_report.HlsResources() if interface == 'm_axi': hls_resources = hls_report.resources( os.path.join(tmpdir, 'report', top_name + '_csynth.xml')) hls_resources -= hls_report.resources( os.path.join(tmpdir, 'report', 'Dataflow_csynth.xml')) _logger.info(hls_resources) for module_id, nodes in enumerate( super_source.module_trait_table.values()): module_name = util.get_func_name(module_id) report_file = os.path.join(tmpdir, 'report', module_name + '_csynth.xml') hls_resource = hls_report.resources(report_file) use_count = len(nodes) try: perf = hls_report.performance(report_file) _logger.info('%s, usage: %5d times, II: %3d, Depth: %3d', hls_resource, use_count, perf.ii, perf.depth) depths[module_id] = perf.depth except hls_report.BadReport as e: _logger.warn('%s in %s report (%s)', e, module_name, report_file) _logger.info('%s, usage: %5d times', hls_resource, use_count) raise e hls_resources += hls_resource * use_count _logger.info('total usage:') _logger.info(hls_resources) if rpt_file: rpt_json = collections.OrderedDict([('name', top_name)] + list(hls_resources)) with open(rpt_file, mode='w') as rpt_fileobj: json.dump(rpt_json, rpt_fileobj, indent=2) # update the module pipeline depths stencil.dataflow_super_source.update_module_depths(depths) hdl_dir = os.path.join(tmpdir, 'hdl') module_name = 'Dataflow' if interface == 'axis': module_name = top_name with open(os.path.join(hdl_dir, f'{module_name}.v'), mode='w') as fileobj: print_top_module( backend.VerilogPrinter(fileobj), stencil.dataflow_super_source, inputs, outputs, module_name, interface, ) util.pause_for_debugging() xo_filename = os.path.join(tmpdir, stencil.app_name + '.xo') kwargs = {} if interface == 'm_axi': kwargs['m_axi_names'] = m_axi_names elif interface == 'axis': kwargs['iface_names'] = iface_names with backend.PackageXo( xo_filename, top_name, kernel_xml, hdl_dir, **kwargs, ) as proc: stdout, stderr = proc.communicate() log_func = _logger.error if proc.returncode != 0 else _logger.debug log_func(stdout.decode()) log_func(stderr.decode()) with open(xo_filename, mode='rb') as xo_fileobj: shutil.copyfileobj(xo_fileobj, xo_file)