Exemple #1
0
def do_native_translation(code, **kwargs):
    arg_bufs = AntaresGlobal.local_arg_pros

    if 'einstein_v2' not in kwargs['attrs'].ir:
      raise Exception("Program for graphcore must be based on Antares IR")

    code = code[code.index(') {\n') + len(') {\n'):code.rindex('}\n')]
    code = remove_local_cache(code, arg_bufs)

    func_args, delta_args = '', []
    for buf in arg_bufs['_in']:
      if buf['name'].startswith('_'):
        delta_args.append(buf['name'])
        continue
      func_args += ' Input<Vector<%s>> %s; // local size: %s\n' % (_native_dtype(buf['dtype']), buf['name'], buf['shape'])
    for buf in arg_bufs['_out']:
      func_args += ' Output<Vector<%s>> %s; // local size: %s\n' % (_native_dtype(buf['dtype']), buf['name'], buf['shape'])

    function_name, codelet_name = 'compute_task', 'Vuid_%s' % hashlib.sha1(code.encode()).hexdigest()
    blend_code = kwargs['attrs'].blend.strip()
    blend_code = 'namespace {\n%s\n}\n\n' if blend_code else ''

    from antares.common import local_get_dir_file
    try:
      with open(local_get_dir_file('range_book.json'), 'r') as fp:
        range_book = json.load(fp)
    except FileNotFoundError:
      raise Exception("TODO: Graphcore code generation is not completely implemented in new emit_tvm_ir_v2()")

    props = []
    for k in range_book['book']:
      arr2d = range_book['book'][k]
      arr2d = [str(x)[1:-1].replace(', ', ',') for x in arr2d]
      arr2d = '/'.join(arr2d)
      props.append(k + '/' + arr2d)
    props = ';'.join(props)

    code = '''
// Antares Property (k * ax_id + l .. r): %s

#include <poplar/Vertex.hpp>

using namespace poplar;

%s
class %s: public Vertex {
public:
 bool compute() {
%s
  return true;
 }

%s
};
''' % (props, blend_code, codelet_name, code, func_args)
    return code
Exemple #2
0
def run_pass_v2(ast_seq, global_input_dict, global_output_dict):
    if backend not in ['c-gc']:
        return

    if len(ast_seq) > 1:
        raise Exception(
            "TODO: Graphcore backend not handling multiple IR statements.")
    ast = ast_seq[0]

    steps = int(os.environ.get('STEP', '0'))
    pieces = os.environ.get('CONFIG', '').strip()

    data_axes = ast['props']['data_axes']
    if not pieces and steps > 0:
        return

    try:
        pieces = json.loads(pieces)
        pieces = [pieces['axis_%d' % i][-1] for i in range(len(data_axes))]
    except:
        pieces = [1] * len(data_axes)

    assert 'injective' not in ast, "Unhandled injective case for graphcore."
    range_book = {}
    walk_in_ast(ast['root'], scan_items, [ast, range_book], ast, 'root')
    ast['props']['shard'] = {'nparts': pieces, 'book': range_book}

    # AST props: ast['props']['data_axes'], ast['props']['input_dict']
    for i in range(len(pieces)):
        assert data_axes[i]['range'] % pieces[
            i] == 0, "Axis sharding must be exactly divided, while requesting %d // %d." % (
                data_axes[i]['range'], pieces[i])
        data_axes[i]['range'] //= pieces[i]

    for k in ast['props']['input_dict']:
        input_item = ast['props']['input_dict'][k]
        sub_shape = []
        for it in range_book[k]:
            bias_diff = it[3] - it[2] + 1
            if it[1] < 0 or it[0] == 0:
                sub_shape.append(bias_diff)
            elif it[0] > 0:
                sub_shape.append(it[0] * (data_axes[it[1]]['range'] - 1) +
                                 bias_diff)
            else:
                raise Exception('Unhandled book case:', it)
        input_item['shape'] = sub_shape

    from antares.common import local_get_dir_file
    output_key = ast['props']['output_name']
    ast['props']['shard']['local_shape'] = [
        x['range'] for x in ast['props']['data_axes']
    ]
    with open(local_get_dir_file('range_book.json'), 'w') as fp:
        json.dump(ast['props']['shard'], fp)
Exemple #3
0
def local_builder_build(inputs, timeout, n_parallel, build_func="default", verbose=1):
  build_results = []
  for i in range(len(inputs)):
    cfg_path = local_get_dir_file('my_kernel.cfg', i)
    try:
      os.remove(cfg_path)
    except:
      pass
    tvm.auto_scheduler.measure_record.save_records(cfg_path, [inputs[i]], [tvm.auto_scheduler.measure.MeasureResult([0.0], 0, 0, 0, 0)])
    build_results.append(tvm.auto_scheduler.measure.BuildResult(cfg_path, (), 0, 0, 0))
  return build_results
Exemple #4
0
def compute(ast):
  if backend not in ['c-gc']:
    return

  steps = int(os.environ.get('STEP', '0'))
  pieces = os.environ.get('CONFIG', '').strip()

  data_axes = ast['props']['data_axes']
  if not pieces and steps > 0:
    return

  try:
    pieces = json.loads(pieces)
    pieces = [pieces['axis_%d' % i][-1] for i in range(len(data_axes))]
  except:
    pieces = [1] * len(data_axes)

  assert 'injective' not in ast, "Unhandled injective case for graphcore."
  range_book = {}
  walk_in_ast(ast['root'], scan_items, [ast, range_book], ast, 'root')
  ast['props']['shard'] = {'nparts': pieces, 'book': range_book}

  # AST props: ast['props']['data_axes'], ast['props']['output_dict'], ast['props']['input_dict']
  for i in range(len(pieces)):
    assert data_axes[i]['range'] % pieces[i] == 0
    data_axes[i]['range'] //= pieces[i]
    for k in ast['props']['output_dict']:
      output_item = ast['props']['output_dict'][k]
      assert output_item['shape'][i] % pieces[i] == 0
      output_item['shape'][i] //= pieces[i]
  for k in ast['props']['input_dict']:
    input_item = ast['props']['input_dict'][k]
    sub_shape = []
    for it in range_book[k]:
      bias_diff = it[3] - it[2] + 1
      if it[1] < 0 or it[0] == 0:
        sub_shape.append(bias_diff)
      elif it[0] > 0:
        sub_shape.append(it[0] * (data_axes[it[1]]['range'] - 1) + bias_diff)
      else:
        raise Exception('Unhandled book case:', it)
    input_item['shape'] = sub_shape

  from antares.common import local_get_dir_file
  output_key = next(iter(ast['props']['output_dict']))
  ast['props']['shard']['local_shape'] = ast['props']['output_dict'][output_key]['shape']
  with open(local_get_dir_file('range_book.json'), 'w') as fp:
    json.dump(ast['props']['shard'], fp)
Exemple #5
0
def emit_tvm_ir(exprss, input_dict):
  ast = build_fused_ast(exprss, input_dict)
  from lang.auto_shard import auto_shard_on_ast
  auto_shard_on_ast(ast)
  bias_axis_body = ''
  if 'slices' in ast['props']:
    axis_dict, tensor_dict = ast['props']['slices'][0]
    for k in axis_dict:
      bias_axis_body += '_%s = input("_%s", [1], dtype="int32")[0]; ' % (k , k)
    for k in tensor_dict:
      bias_axis_body += '_%s = input("_%s", [1], dtype="int32")[0]; ' % (k , k)

    slices_info = {
      'data_axes': ast['props']['data_axes'],
      'slices': ast['props']['slices'],
    }
    from antares.common import local_get_dir_file
    with open(local_get_dir_file('slices.json'), 'w') as fp:
      json.dump(slices_info, fp)

  def emit_input_body(input_dict):
    input_body = ''
    for key in input_dict:
      input_info = input_dict[key]
      input_body += '%s = input("%s", %s, dtype="%s"); ' % (key, key, input_info['shape'], input_info['dtype'])
    return input_body

  def emit_reduce_body(ast):
    reduce_body, reduce_set = '', []
    props = ast['props']
    if props['reduce_axes']:
      for x in props['reduce_axes']:
        axis_name = warp_axis(x['name'])
        reduce_set.append(axis_name)
        reduce_body += '%s = loop(%d); ' % (axis_name, x['range'])
      reduce_maps = {'+': 'te.sum', '>': 'te.max', '<': 'te.min'}
      if props['reduce_type'] in reduce_maps:
        reduce_func = reduce_maps[props['reduce_type']]
      else:
        spec_idx = props['reduce_type'].find('(')
        if spec_idx >= 0:
          reduce_func = 'common_reduce("%s", %s)' % (props['reduce_type'][:spec_idx], props['reduce_type'][spec_idx:])
        else:
          reduce_func = 'common_reduce("%s")' % props['reduce_type']
      reduce_pattern = '%s(' % reduce_func + '%s' + ', axis=[%s])' % ', '.join(reduce_set)
    else:
      reduce_pattern = '%s'
    return reduce_body, reduce_pattern

  def emit_output_body(ast, reduce_pattern, final_output=True, injective=False):
    root, props = ast['root'], ast['props']
    output_shape = [x['range'] for x in props['data_axes']]
    output_name = next(iter(props['output_dict']))
    all_axis_range = np.product(output_shape) * np.product([x['range'] for x in props['reduce_axes']])
    output_begin = '%s = output(shape=%s, flops=(%d * %d), func=lambda %s: ' % (output_name, output_shape, props['flopbase'], all_axis_range, ', '.join([warp_axis(x['name']) for x in props['data_axes']]))
    basic_body = emit_tvm_body(root, props)
    output_end = ', dtype="%s", tag="%s", name="%s", final_output=%s); ' % (props['output_dict'][output_name]['dtype'], 'antares_injective' if injective else '', output_name, final_output)
    return output_begin + reduce_pattern % basic_body + output_end

  final_body = bias_axis_body + emit_input_body(ast['props']['input_dict'])

  has_injective = 'injective' in ast
  reduce_body, reduce_pattern = emit_reduce_body(ast)
  final_body += reduce_body + emit_output_body(ast, reduce_pattern, final_output=(not has_injective), injective=False)
  if has_injective:
    final_body += emit_output_body(ast['injective'], '%s', final_output=True, injective=True)
  return final_body
Exemple #6
0
def update_ast(config, ast_seq, global_input_dict, global_output_dict):
    if len(ast_seq) > 1:
        raise Exception(
            "TODO: Graphcore backend not handling multiple IR statements.")
    ast = ast_seq[0]

    data_axes = ast['props']['data_axes']

    try:
        pieces = config
        pieces = [(pieces['tile_%d' % i][1] * pieces['tile_%d' % i][2])
                  for i in range(len(data_axes))]
    except:
        pieces = [1] * len(data_axes)
    for i in range(len(pieces)):
        assert data_axes[i]['range'] % pieces[i] == 0
        pieces[i] = data_axes[i]['range'] // pieces[i]

    assert 'injective' not in ast, "Unhandled injective case for graphcore."
    range_book = {}
    walk_in_ast(ast, 'root', scan_items, [ast, range_book])
    ast['props']['shard'] = {'nparts': pieces, 'book': range_book}

    # AST props: ast['props']['data_axes'], ast['props']['input_dict']
    for i in range(len(pieces)):
        assert data_axes[i]['range'] % pieces[
            i] == 0, "Axis sharding must be exactly divided, while requesting %d // %d." % (
                data_axes[i]['range'], pieces[i])
        data_axes[i]['range'] //= pieces[i]

    for k in ast['props']['input_dict']:
        input_item = ast['props']['input_dict'][k]
        sub_shape = []
        for it in range_book[k]:
            bias_diff = it[3] - it[2] + 1
            if it[1] < 0 or it[0] == 0:
                sub_shape.append(bias_diff)
            elif it[0] > 0:
                sub_shape.append(it[0] * (data_axes[it[1]]['range'] - 1) +
                                 bias_diff)
            else:
                raise Exception('Unhandled book case:', it)
        input_item['shape'] = sub_shape

    from antares.common import local_get_dir_file
    output_key = ast['props']['output_name']
    ast['props']['shard']['local_shape'] = [
        x['range'] for x in ast['props']['data_axes']
    ]
    with open(local_get_dir_file('range_book.json'), 'w') as fp:
        json.dump(ast['props']['shard'], fp)
    for k in global_input_dict:
        if k in ast['props']['input_dict']:
            global_input_dict[k] = ast['props']['input_dict'][k]

    assert len(global_output_dict) == 1
    for k in global_output_dict:
        global_output_dict[k]['shape'] = [
            x['range'] for x in ast['props']['data_axes']
        ]
        break
Exemple #7
0
def do_native_translation(code, **kwargs):
    arg_bufs = AntaresGlobal.current_arg_bufs

    if 'einstein_v2' not in kwargs['attrs'].ir:
      raise Exception("Program for graphcore must be based on Antares IR")

    code = code[code.index(') {\n') + len(') {\n'):code.rindex('}\n')]
    code = remove_local_cache(code, arg_bufs)

    func_args, delta_args = '', []
    for buf in arg_bufs['_in']:
      if buf['name'].startswith('_'):
        delta_args.append(buf['name'])
        continue
      func_args += ' Input<Vector<%s>> %s; // local: %s\n' % (_native_dtype(buf['dtype']), buf['name'], buf['shape'])
    for buf in arg_bufs['_out']:
      func_args += ' Output<Vector<%s>> %s; // local: %s\n' % (_native_dtype(buf['dtype']), buf['name'], buf['shape'])

    for arg in delta_args:
      code = code.replace(arg + '[0]', arg)
      func_args += '\n int %s; // delta scaler' % arg

    function_name, codelet_name = 'compute_task', 'Vuid_%s' % hashlib.sha1(code.encode()).hexdigest()

    code = '''
#include <poplar/Vertex.hpp>

using namespace poplar;

namespace {
%s
}

class %s: public Vertex {
public:
 bool compute() {
%s
  return true;
 }

%s
};
''' % (kwargs['attrs'].blend, codelet_name, code, func_args)

    # Attach rules of input slices
    from antares.common import local_get_dir_file
    with open(local_get_dir_file('slices.json'), 'r') as fp:
      slices_info = json.load(fp)

    codelet_buf = ['']
    codelet_buf.append('std::stringstream codelet;')
    codelet_buf.append('codelet << R"(%s)";' % code)
    codelet_buf.append('g.addCodelets(codelet);')
    codelet_buf.append('')

    codelet_buf.append('poplar::VertexRef v;')
    codelet_buf.append('auto compset = g.addComputeSet(__func__);')
    codelet_buf.append('prog.add(poplar::program::Execute(compset));')
    codelet_buf.append('')

    global_result_shape = None
    output_props = arg_bufs['_out'][0]
    ax_names = [x['name'] for x in slices_info['data_axes']]

    for rank, (axis, tensor) in enumerate(slices_info['slices']):
      codelet_buf.append('v = g.addVertex(compset, "%s");' % codelet_name)
      codelet_buf.append('if (g.getTarget().getTargetType() == poplar::TargetType::IPU_MODEL) g.setCycleEstimate(v, 10);')

      for ax in ax_names:
        codelet_buf.append('g.setInitialValue(v["_%s"], %d);' % (ax, axis[ax][0]))

      for k in tensor:
        ls, rs = [], []
        for l, r in tensor[k]:
          ls.append(l)
          rs.append(r + 1)
        codelet_buf.append('g.connect(v["%s"], i.find("%s")->second.slice({%s}, {%s}).flatten());' % (k, k, str(ls)[1:-1], str(rs)[1:-1]))
        stride = [1] * len(ls)
        for i in reversed(range(len(stride) - 1)):
          stride[i] = stride[i + 1] * (rs[i + 1] - ls[i + 1])
        delta_val = int(np.dot(ls, stride))
        codelet_buf.append('g.setInitialValue(v["_%s"], %d);' % (k, delta_val))
      ls, rs = [], []
      for ax in ax_names:
        l, r = axis[ax]
        ls.append(l)
        rs.append(r + 1)
      global_result_shape = rs
      output_slice = 'result.slice({%s}, {%s}).flatten()' % (str(ls)[1:-1], str(rs)[1:-1])
      codelet_buf.append('g.connect(v["%s"], %s);' % (output_props['name'], output_slice))
      codelet_buf.append('g.setTileMapping(%s, %d);' % (output_slice, rank % 1216))
      codelet_buf.append('g.setTileMapping(v, %d);' % (rank % 1216))
      codelet_buf.append('')

    codelet_buf.insert(1, 'poplar::Tensor result = g.addVariable(poplar::%s, poplar::ArrayRef<std::size_t>({%s}), "%s");' % (_native_dtype(output_props['dtype']).upper(), str(global_result_shape)[1:-1], output_props['name']))
    codelet_buf.append('return std::move(result);')

    codelet_buf = '\n  '.join(codelet_buf)
    code = 'poplar::Tensor %s(poplar::Graph &g, poplar::program::Sequence &prog, const std::unordered_map<std::string, poplar::Tensor> &i) {%s\n}' % (function_name, codelet_buf)
    return code
Exemple #8
0
def do_native_translation_v2(codeset, **kwargs):
    if 'einstein_v2' not in kwargs['attrs'].ir:
        raise Exception("Program for graphcore must be based on Antares IR")

    kernel_name, in_args, out_args, body = codeset

    func_args, delta_args = '', []
    for buf in in_args:
        if buf[1].startswith('_'):
            delta_args.append(buf[1])
            continue
        func_args += ' Input<Vector<%s>> %s;\n' % (buf[0], buf[1])
    for buf in out_args:
        func_args += ' Output<Vector<%s>> %s;\n' % (buf[0], buf[1])

    blend_code = kwargs['attrs'].blend.strip()
    blend_code = 'namespace {\n%s\n}\n\n' if blend_code else ''

    from antares.common import local_get_dir_file
    try:
        with open(local_get_dir_file('range_book.json'), 'r') as fp:
            range_book = json.load(fp)
    except FileNotFoundError:
        raise Exception(
            "TODO: Graphcore body generation is not completely implemented in new emit_tvm_ir_v2()"
        )

    props = []
    for k in range_book['book']:
        arr2d = range_book['book'][k]
        arr2d = [str(x)[1:-1].replace(', ', ',') for x in arr2d]
        arr2d = '/'.join(arr2d)
        props.append(k + '/' + arr2d)
    props = ';'.join(props)

    full_body = f'''// Antares Property (k * ax_id + l .. r): {props}

#include <poplar/Vertex.hpp>

using namespace poplar;

#define int8 char
#define int16 short
#define int32 int
#define int64 long
#define float16 half
#define float32 float
#define float64 double

#define min(x, y) ((x) < (y) ? (x) : (y))
#define max(x, y) ((x) > (y) ? (x) : (y))

{blend_code}
class CODELET_{kernel_name}: public Vertex {{

public:
 bool compute() {{
{body}
  return true;
 }}

{func_args}}};
'''
    return full_body