Ejemplo n.º 1
0
def print_code(stencil: core.Stencil, host_file: TextIO) -> None:
    logger.info('generate host source code as %s' % host_file.name)
    printer = util.CppPrinter(host_file)
    print_define = lambda key, value: util.print_define(printer, key, value)

    print_header(printer)

    all_stmts = stencil.input_stmts + stencil.output_stmts + stencil.param_stmts

    printer.printlns(
        'namespace soda {',
        'namespace app {',
        '// app-specific constants',
        *(f'constexpr int {STENCIL_DIM_FMT[i]} = {d};'
          for i, d in enumerate(core.get_stencil_dim(stencil.stencil_window))),
        f'constexpr int kStencilDistance = {stencil.stencil_distance};',
        *(f'constexpr int {WIDTH_FMT[x.name]} = {x.width_in_bits};'
          for x in all_stmts),
        '',
        '// type alias',
        *(f'using {TYPE_FMT[x.name]} = {x.c_type};' for x in all_stmts),
        '',
    )
    print_func(printer, stencil)
    printer.printlns(
        '} // namespace app',
        '} // namespace soda',
        '',
    )

    print_test(printer, stencil)
Ejemplo n.º 2
0
def print_code(stencil, header_file):
    logger.info('generate host header code as %s' % header_file.name)
    printer = util.CppPrinter(header_file)
    println = printer.println
    do_indent = printer.do_indent
    un_indent = printer.un_indent
    println('#ifndef HALIDE_%s_H_' % stencil.app_name.upper())
    println('#define HALIDE_%s_H_' % stencil.app_name.upper())
    println()

    println('#ifndef HALIDE_ATTRIBUTE_ALIGN')
    do_indent()
    println('#ifdef _MSC_VER')
    do_indent()
    println('#define HALIDE_ATTRIBUTE_ALIGN(x) __declspec(align(x))')
    un_indent()
    println('#else')
    do_indent()
    println('#define HALIDE_ATTRIBUTE_ALIGN(x) __attribute__((aligned(x)))')
    un_indent()
    println('#endif')
    un_indent()
    println('#endif//HALIDE_ATTRIBUTE_ALIGN')
    println()

    println('#ifndef BUFFER_T_DEFINED')
    println('#define BUFFER_T_DEFINED')
    println('#include<stdbool.h>')
    println('#include<stdint.h>')
    println('typedef struct buffer_t {')
    do_indent()
    println('uint64_t dev;')
    println('uint8_t* host;')
    println('int32_t extent[4];')
    println('int32_t stride[4];')
    println('int32_t min[4];')
    println('int32_t elem_size;')
    println('HALIDE_ATTRIBUTE_ALIGN(1) bool host_dirty;')
    println('HALIDE_ATTRIBUTE_ALIGN(1) bool dev_dirty;')
    println('HALIDE_ATTRIBUTE_ALIGN(1) uint8_t _padding[10 - sizeof(void *)];')
    un_indent()
    println('} buffer_t;')
    println('#endif//BUFFER_T_DEFINED')
    println()

    println('#ifndef HALIDE_FUNCTION_ATTRS')
    println('#define HALIDE_FUNCTION_ATTRS')
    println('#endif//HALIDE_FUNCTION_ATTRS')
    println()

    tensors = stencil.input_names + stencil.output_names + stencil.param_names
    println('int {}({}const char* xclbin) HALIDE_FUNCTION_ATTRS;'.format(
        stencil.app_name,
        ''.join(map('buffer_t *var_{}_buffer, '.format, tensors))))
    println()

    println('#endif//HALIDE_%s_H_' % stencil.app_name.upper())
    println()
Ejemplo n.º 3
0
def print_code(stencil: core.Stencil, output_file: TextIO) -> None:
    """Prints the top-level code with the given arguments.

  Prints the OpenCL kernels with proper pragmas and channel declarations and
  references.

  Args:
    stencil: Stencil object to print.
    output_file: TextIO to write to.
  """
    _logger.info('generate kernel code as %s' % output_file.name)
    printer = util.CppPrinter(output_file)
    println = printer.println

    println('#include <ihc_apint.h>', indent=0)
    println()

    println('#pragma OPENCL EXTENSION cl_intel_channels : enable', indent=0)
    println()

    # internal fifos
    super_source = stencil.dataflow_super_source
    for node in super_source.tpo_valid_node_gen():
        for fifo in node.fifos:
            println(f'channel {fifo.cl_type} {fifo.cl_expr};')

    println()

    # in generated bitstream, kernels are sorted in alphabetical order
    # SODA relies on the correct ordering for memory channels of each tensor
    # so here we make sure the kernel names in alphabetical order
    width = len(str(sum(1 for _ in super_source.tpo_valid_node_gen()) - 1))
    instance_idx: Dict[int, int] = collections.defaultdict(int)
    overall_idx = 0
    for node in super_source.tpo_valid_node_gen():
        module_trait, module_trait_id = super_source.module_table[node]
        print_kernel(
            f'{stencil.app_name}_{overall_idx:0{width}}_'
            f'module_{module_trait_id}_'
            f'instance_{instance_idx[module_trait_id]}',
            printer,
            node,
            module_trait,
            module_trait_id,
            burst_width=stencil.burst_width,
        )
        instance_idx[module_trait_id] += 1
        overall_idx += 1
        println()
Ejemplo n.º 4
0
def print_code(
    stencil: core.Stencil,
    output_file: IO[str],
    interface: str = SUPPORTED_INTERFACES[0],
):
  _check_interface(interface)

  _logger.info('generate kernel code as %s' % output_file.name)
  printer = util.CppPrinter(output_file)

  print_header(printer, interface)

  if interface in {'m_axi', 'axis'}:
    printer.printlns(
        '#ifdef __SYNTHESIS__',
        '#warning this file should be used for simulation only',
        '#warning synthesis result may be sub-optimal',
        '#endif  // __SYNTHESIS__',
        '',
    )

  printer.printlns(
      '// this file can be generated from the following SODA DSL',
      f'/*\n{stencil}\n*/',
      '',
      '// stencil window size:'
      f' {tuple(core.get_stencil_dim(stencil.stencil_window))}',
      f'// stencil distace: {stencil.stencil_distance}',
      '// data layout is documented at',
      '// https://github.com/Blaok/soda/blob/master/docs/data-layout.md',
      '',
  )

  if interface in {'m_axi', 'axis'}:
    _print_reinterpret(printer)

  if interface == 'm_axi':
    _print_data_struct(printer)
    _print_read_data_m_axi(printer)
    _print_write_data_m_axi(printer)
    _print_burst_read_m_axi(printer)
    _print_burst_write_m_axi(printer)
  elif interface == 'axis':
    _print_read_data_axis(printer)
    _print_write_data_axis(printer)

  for module_trait_id, module_trait in enumerate(stencil.module_traits):
    print_module_definition(
        printer,
        module_trait,
        module_trait_id,
        stencil.burst_width,
        interface,
    )

  outputs = []
  inputs = []
  for stmt in stencil.output_stmts:
    for bank in stmt.dram:
      outputs.append((stmt.name, stmt.haoda_type, bank))
  for stmt in stencil.input_stmts:
    for bank in stmt.dram:
      inputs.append((stmt.name, stmt.haoda_type, bank))
  for stmt in stencil.param_stmts:
    inputs.append(('var_%s' % stmt.name, stmt.type, 0))
  _print_interface(printer, stencil, inputs, outputs,
                   stencil.dataflow_super_source, interface)
Ejemplo n.º 5
0
def print_code(
    stencil: core.Stencil,
    xo_file: IO[bytes],
    device_info: Dict[str, str],
    jobs: Optional[int] = os.cpu_count(),
    rpt_file: Optional[str] = None,
    interface: str = 'm_axi',
) -> None:
    """Generate hardware object file for the given Stencil.

  Working `vivado` and `vivado_hls` is required in the PATH.

  Args:
    stencil: Stencil object to generate from.
    xo_file: file object to write to.
    device_info: dict of 'part_num' and 'clock_period'.
    jobs: maximum number of jobs running in parallel.
    rpt_file: path of the generated report; None disables report generation.
    interface: interface type, supported values are 'm_axi' and 'axis'.
  """

    iface_names = []  # for axis
    m_axi_names = []  # for m_axi
    inputs = []
    outputs = []

    for stmt in stencil.output_stmts:
        for bank in stmt.dram:
            port_name = util.get_port_name(stmt.name, bank)
            bundle_name = util.get_bundle_name(stmt.name, bank)
            iface_names.append(port_name)
            m_axi_names.append(bundle_name)
            outputs.append((port_name, bundle_name, stencil.burst_width,
                            util.get_port_buf_name(stmt.name, bank)))
    for stmt in stencil.input_stmts:
        for bank in stmt.dram:
            port_name = util.get_port_name(stmt.name, bank)
            bundle_name = util.get_bundle_name(stmt.name, bank)
            iface_names.append(port_name)
            m_axi_names.append(bundle_name)
            inputs.append((port_name, bundle_name, stencil.burst_width,
                           util.get_port_buf_name(stmt.name, bank)))

    top_name = stencil.kernel_name
    with tempfile.TemporaryDirectory(prefix='sodac-xrtl-') as tmpdir:
        kernel_xml = os.path.join(tmpdir, 'kernel.xml')
        with open(kernel_xml, 'w') as kernel_xml_obj:
            print_kernel_xml(top_name, inputs, outputs, kernel_xml_obj,
                             interface)

        kernel_file = os.path.join(tmpdir, 'kernel.cpp')
        with open(kernel_file, 'w') as kernel_fileobj:
            hls_kernel.print_code(stencil, kernel_fileobj)

        args = []
        for module_trait_id, module_trait in enumerate(stencil.module_traits):
            sio = io.StringIO()
            hls_kernel.print_module_definition(util.CppPrinter(sio),
                                               module_trait,
                                               module_trait_id,
                                               burst_width=stencil.burst_width)
            args.append(
                (len(sio.getvalue()), synthesis_module, tmpdir, [kernel_file],
                 util.get_func_name(module_trait_id), device_info))

        if interface == 'm_axi':
            sio = io.StringIO()
            print_dataflow_hls_interface(util.CppPrinter(sio), top_name,
                                         inputs, outputs)
            dataflow_kernel = os.path.join(tmpdir, 'dataflow_kernel.cpp')
            with open(dataflow_kernel, 'w') as dataflow_kernel_obj:
                dataflow_kernel_obj.write(sio.getvalue())
            args.append((len(sio.getvalue()), synthesis_module, tmpdir,
                         [dataflow_kernel], top_name, device_info))
        args.sort(key=lambda x: x[0], reverse=True)

        super_source = stencil.dataflow_super_source
        job_server = util.release_job_slot()
        with concurrent.futures.ThreadPoolExecutor(
                max_workers=jobs) as executor:
            threads = [executor.submit(*x[1:]) for x in args]
            for future in concurrent.futures.as_completed(threads):
                returncode, stdout, stderr = future.result()
                log_func = _logger.error if returncode != 0 else _logger.debug
                if stdout:
                    log_func(stdout.decode())
                if stderr:
                    log_func(stderr.decode())
                if returncode != 0:
                    util.pause_for_debugging()
                    sys.exit(returncode)
        util.acquire_job_slot(job_server)

        # generate HLS report
        depths: Dict[int, int] = {}
        hls_resources = hls_report.HlsResources()
        if interface == 'm_axi':
            hls_resources = hls_report.resources(
                os.path.join(tmpdir, 'report', top_name + '_csynth.xml'))
            hls_resources -= hls_report.resources(
                os.path.join(tmpdir, 'report', 'Dataflow_csynth.xml'))

        _logger.info(hls_resources)
        for module_id, nodes in enumerate(
                super_source.module_trait_table.values()):
            module_name = util.get_func_name(module_id)
            report_file = os.path.join(tmpdir, 'report',
                                       module_name + '_csynth.xml')
            hls_resource = hls_report.resources(report_file)
            use_count = len(nodes)
            try:
                perf = hls_report.performance(report_file)
                _logger.info('%s, usage: %5d times, II: %3d, Depth: %3d',
                             hls_resource, use_count, perf.ii, perf.depth)
                depths[module_id] = perf.depth
            except hls_report.BadReport as e:
                _logger.warn('%s in %s report (%s)', e, module_name,
                             report_file)
                _logger.info('%s, usage: %5d times', hls_resource, use_count)
                raise e
            hls_resources += hls_resource * use_count
        _logger.info('total usage:')
        _logger.info(hls_resources)
        if rpt_file:
            rpt_json = collections.OrderedDict([('name', top_name)] +
                                               list(hls_resources))
            with open(rpt_file, mode='w') as rpt_fileobj:
                json.dump(rpt_json, rpt_fileobj, indent=2)

        # update the module pipeline depths
        stencil.dataflow_super_source.update_module_depths(depths)

        hdl_dir = os.path.join(tmpdir, 'hdl')
        module_name = 'Dataflow'
        if interface == 'axis':
            module_name = top_name
        with open(os.path.join(hdl_dir, f'{module_name}.v'),
                  mode='w') as fileobj:
            print_top_module(
                backend.VerilogPrinter(fileobj),
                stencil.dataflow_super_source,
                inputs,
                outputs,
                module_name,
                interface,
            )

        util.pause_for_debugging()

        xo_filename = os.path.join(tmpdir, stencil.app_name + '.xo')
        kwargs = {}
        if interface == 'm_axi':
            kwargs['m_axi_names'] = m_axi_names
        elif interface == 'axis':
            kwargs['iface_names'] = iface_names
        with backend.PackageXo(
                xo_filename,
                top_name,
                kernel_xml,
                hdl_dir,
                **kwargs,
        ) as proc:
            stdout, stderr = proc.communicate()
        log_func = _logger.error if proc.returncode != 0 else _logger.debug
        log_func(stdout.decode())
        log_func(stderr.decode())
        with open(xo_filename, mode='rb') as xo_fileobj:
            shutil.copyfileobj(xo_fileobj, xo_file)