Example #1
0
def do_native_translation(code, **kwargs):
    arg_bufs = AntaresGlobal.local_arg_pros

    if 'einstein_v2' not in kwargs['attrs'].ir:
      raise Exception("Program for graphcore must be based on Antares IR")

    code = code[code.index(') {\n') + len(') {\n'):code.rindex('}\n')]
    code = remove_local_cache(code, arg_bufs)

    func_args, delta_args = '', []
    for buf in arg_bufs['_in']:
      if buf['name'].startswith('_'):
        delta_args.append(buf['name'])
        continue
      func_args += ' Input<Vector<%s>> %s; // local size: %s\n' % (_native_dtype(buf['dtype']), buf['name'], buf['shape'])
    for buf in arg_bufs['_out']:
      func_args += ' Output<Vector<%s>> %s; // local size: %s\n' % (_native_dtype(buf['dtype']), buf['name'], buf['shape'])

    function_name, codelet_name = 'compute_task', 'Vuid_%s' % hashlib.sha1(code.encode()).hexdigest()
    blend_code = kwargs['attrs'].blend.strip()
    blend_code = 'namespace {\n%s\n}\n\n' if blend_code else ''

    from antares.common import local_get_dir_file
    try:
      with open(local_get_dir_file('range_book.json'), 'r') as fp:
        range_book = json.load(fp)
    except FileNotFoundError:
      raise Exception("TODO: Graphcore code generation is not completely implemented in new emit_tvm_ir_v2()")

    props = []
    for k in range_book['book']:
      arr2d = range_book['book'][k]
      arr2d = [str(x)[1:-1].replace(', ', ',') for x in arr2d]
      arr2d = '/'.join(arr2d)
      props.append(k + '/' + arr2d)
    props = ';'.join(props)

    code = '''
// Antares Property (k * ax_id + l .. r): %s

#include <poplar/Vertex.hpp>

using namespace poplar;

%s
class %s: public Vertex {
public:
 bool compute() {
%s
  return true;
 }

%s
};
''' % (props, blend_code, codelet_name, code, func_args)
    return code
Example #2
0
def remove_local_cache(code, arg_bufs):
    result = []
    for line in code.split('\n'):
      if line.endswith('];') and line.find('=') < 0:
        output_buf = arg_bufs['_out'][0]
        print(line.split()[0], output_buf['dtype'])
        if line.split()[0] != _native_dtype(output_buf['dtype']):
          raise Exception("This backend doesn't support injective computation modifying the output type")
        line = '  ' + line.split('[')[0].strip().replace(' ', ' *') + ' = &' + output_buf['name'] + '[0];'
      result.append(line)
    return '\n'.join(result)
Example #3
0
def do_native_translation(code, **kwargs):
    arg_bufs = AntaresGlobal.current_arg_bufs

    idx = code.index('(', code.index('extern "C" __global__ ')) + 1
    tail = code.index(') {\n', idx)

    args = []
    for buf in arg_bufs['_in']:
        args.append((_native_dtype(buf['dtype']), buf['name']))
    for buf in arg_bufs['_out']:
        args.append((_native_dtype(buf['dtype']), buf['name']))

    code = 'extern "C" void kernel_main(%s) {\n  // [thread_compute]\n' % ', '.join(
        [t + '* ' + v for t, v in args]) + code[tail + len(") {\n"):]
    code = code.replace('threadIdx.x', '__rank__').replace(
        ' __global__ ', ' ').replace(' __restrict__ ', ' ')
    code = '#include <math.h>\n#include <algorithm>\nusing namespace std;\n\n' + kwargs[
        'attrs'].blend + '\n' + code
    code = remove_local_cache(code, arg_bufs)
    return code
Example #4
0
def do_native_translation(code, **kwargs):
    arg_bufs = AntaresGlobal.current_arg_bufs

    if 'einstein_v2' not in kwargs['attrs'].ir:
      raise Exception("Program for graphcore must be based on Antares IR")

    code = code[code.index(') {\n') + len(') {\n'):code.rindex('}\n')]
    code = remove_local_cache(code, arg_bufs)

    func_args, delta_args = '', []
    for buf in arg_bufs['_in']:
      if buf['name'].startswith('_'):
        delta_args.append(buf['name'])
        continue
      func_args += ' Input<Vector<%s>> %s; // local: %s\n' % (_native_dtype(buf['dtype']), buf['name'], buf['shape'])
    for buf in arg_bufs['_out']:
      func_args += ' Output<Vector<%s>> %s; // local: %s\n' % (_native_dtype(buf['dtype']), buf['name'], buf['shape'])

    for arg in delta_args:
      code = code.replace(arg + '[0]', arg)
      func_args += '\n int %s; // delta scaler' % arg

    function_name, codelet_name = 'compute_task', 'Vuid_%s' % hashlib.sha1(code.encode()).hexdigest()

    code = '''
#include <poplar/Vertex.hpp>

using namespace poplar;

namespace {
%s
}

class %s: public Vertex {
public:
 bool compute() {
%s
  return true;
 }

%s
};
''' % (kwargs['attrs'].blend, codelet_name, code, func_args)

    # Attach rules of input slices
    from antares.common import local_get_dir_file
    with open(local_get_dir_file('slices.json'), 'r') as fp:
      slices_info = json.load(fp)

    codelet_buf = ['']
    codelet_buf.append('std::stringstream codelet;')
    codelet_buf.append('codelet << R"(%s)";' % code)
    codelet_buf.append('g.addCodelets(codelet);')
    codelet_buf.append('')

    codelet_buf.append('poplar::VertexRef v;')
    codelet_buf.append('auto compset = g.addComputeSet(__func__);')
    codelet_buf.append('prog.add(poplar::program::Execute(compset));')
    codelet_buf.append('')

    global_result_shape = None
    output_props = arg_bufs['_out'][0]
    ax_names = [x['name'] for x in slices_info['data_axes']]

    for rank, (axis, tensor) in enumerate(slices_info['slices']):
      codelet_buf.append('v = g.addVertex(compset, "%s");' % codelet_name)
      codelet_buf.append('if (g.getTarget().getTargetType() == poplar::TargetType::IPU_MODEL) g.setCycleEstimate(v, 10);')

      for ax in ax_names:
        codelet_buf.append('g.setInitialValue(v["_%s"], %d);' % (ax, axis[ax][0]))

      for k in tensor:
        ls, rs = [], []
        for l, r in tensor[k]:
          ls.append(l)
          rs.append(r + 1)
        codelet_buf.append('g.connect(v["%s"], i.find("%s")->second.slice({%s}, {%s}).flatten());' % (k, k, str(ls)[1:-1], str(rs)[1:-1]))
        stride = [1] * len(ls)
        for i in reversed(range(len(stride) - 1)):
          stride[i] = stride[i + 1] * (rs[i + 1] - ls[i + 1])
        delta_val = int(np.dot(ls, stride))
        codelet_buf.append('g.setInitialValue(v["_%s"], %d);' % (k, delta_val))
      ls, rs = [], []
      for ax in ax_names:
        l, r = axis[ax]
        ls.append(l)
        rs.append(r + 1)
      global_result_shape = rs
      output_slice = 'result.slice({%s}, {%s}).flatten()' % (str(ls)[1:-1], str(rs)[1:-1])
      codelet_buf.append('g.connect(v["%s"], %s);' % (output_props['name'], output_slice))
      codelet_buf.append('g.setTileMapping(%s, %d);' % (output_slice, rank % 1216))
      codelet_buf.append('g.setTileMapping(v, %d);' % (rank % 1216))
      codelet_buf.append('')

    codelet_buf.insert(1, 'poplar::Tensor result = g.addVariable(poplar::%s, poplar::ArrayRef<std::size_t>({%s}), "%s");' % (_native_dtype(output_props['dtype']).upper(), str(global_result_shape)[1:-1], output_props['name']))
    codelet_buf.append('return std::move(result);')

    codelet_buf = '\n  '.join(codelet_buf)
    code = 'poplar::Tensor %s(poplar::Graph &g, poplar::program::Sequence &prog, const std::unordered_map<std::string, poplar::Tensor> &i) {%s\n}' % (function_name, codelet_buf)
    return code