def do_native_translation(code, **kwargs): arg_bufs = AntaresGlobal.local_arg_pros if 'einstein_v2' not in kwargs['attrs'].ir: raise Exception("Program for graphcore must be based on Antares IR") code = code[code.index(') {\n') + len(') {\n'):code.rindex('}\n')] code = remove_local_cache(code, arg_bufs) func_args, delta_args = '', [] for buf in arg_bufs['_in']: if buf['name'].startswith('_'): delta_args.append(buf['name']) continue func_args += ' Input<Vector<%s>> %s; // local size: %s\n' % (_native_dtype(buf['dtype']), buf['name'], buf['shape']) for buf in arg_bufs['_out']: func_args += ' Output<Vector<%s>> %s; // local size: %s\n' % (_native_dtype(buf['dtype']), buf['name'], buf['shape']) function_name, codelet_name = 'compute_task', 'Vuid_%s' % hashlib.sha1(code.encode()).hexdigest() blend_code = kwargs['attrs'].blend.strip() blend_code = 'namespace {\n%s\n}\n\n' if blend_code else '' from antares.common import local_get_dir_file try: with open(local_get_dir_file('range_book.json'), 'r') as fp: range_book = json.load(fp) except FileNotFoundError: raise Exception("TODO: Graphcore code generation is not completely implemented in new emit_tvm_ir_v2()") props = [] for k in range_book['book']: arr2d = range_book['book'][k] arr2d = [str(x)[1:-1].replace(', ', ',') for x in arr2d] arr2d = '/'.join(arr2d) props.append(k + '/' + arr2d) props = ';'.join(props) code = ''' // Antares Property (k * ax_id + l .. r): %s #include <poplar/Vertex.hpp> using namespace poplar; %s class %s: public Vertex { public: bool compute() { %s return true; } %s }; ''' % (props, blend_code, codelet_name, code, func_args) return code
def run_pass_v2(ast_seq, global_input_dict, global_output_dict): if backend not in ['c-gc']: return if len(ast_seq) > 1: raise Exception( "TODO: Graphcore backend not handling multiple IR statements.") ast = ast_seq[0] steps = int(os.environ.get('STEP', '0')) pieces = os.environ.get('CONFIG', '').strip() data_axes = ast['props']['data_axes'] if not pieces and steps > 0: return try: pieces = json.loads(pieces) pieces = [pieces['axis_%d' % i][-1] for i in range(len(data_axes))] except: pieces = [1] * len(data_axes) assert 'injective' not in ast, "Unhandled injective case for graphcore." range_book = {} walk_in_ast(ast['root'], scan_items, [ast, range_book], ast, 'root') ast['props']['shard'] = {'nparts': pieces, 'book': range_book} # AST props: ast['props']['data_axes'], ast['props']['input_dict'] for i in range(len(pieces)): assert data_axes[i]['range'] % pieces[ i] == 0, "Axis sharding must be exactly divided, while requesting %d // %d." % ( data_axes[i]['range'], pieces[i]) data_axes[i]['range'] //= pieces[i] for k in ast['props']['input_dict']: input_item = ast['props']['input_dict'][k] sub_shape = [] for it in range_book[k]: bias_diff = it[3] - it[2] + 1 if it[1] < 0 or it[0] == 0: sub_shape.append(bias_diff) elif it[0] > 0: sub_shape.append(it[0] * (data_axes[it[1]]['range'] - 1) + bias_diff) else: raise Exception('Unhandled book case:', it) input_item['shape'] = sub_shape from antares.common import local_get_dir_file output_key = ast['props']['output_name'] ast['props']['shard']['local_shape'] = [ x['range'] for x in ast['props']['data_axes'] ] with open(local_get_dir_file('range_book.json'), 'w') as fp: json.dump(ast['props']['shard'], fp)
def local_builder_build(inputs, timeout, n_parallel, build_func="default", verbose=1): build_results = [] for i in range(len(inputs)): cfg_path = local_get_dir_file('my_kernel.cfg', i) try: os.remove(cfg_path) except: pass tvm.auto_scheduler.measure_record.save_records(cfg_path, [inputs[i]], [tvm.auto_scheduler.measure.MeasureResult([0.0], 0, 0, 0, 0)]) build_results.append(tvm.auto_scheduler.measure.BuildResult(cfg_path, (), 0, 0, 0)) return build_results
def compute(ast): if backend not in ['c-gc']: return steps = int(os.environ.get('STEP', '0')) pieces = os.environ.get('CONFIG', '').strip() data_axes = ast['props']['data_axes'] if not pieces and steps > 0: return try: pieces = json.loads(pieces) pieces = [pieces['axis_%d' % i][-1] for i in range(len(data_axes))] except: pieces = [1] * len(data_axes) assert 'injective' not in ast, "Unhandled injective case for graphcore." range_book = {} walk_in_ast(ast['root'], scan_items, [ast, range_book], ast, 'root') ast['props']['shard'] = {'nparts': pieces, 'book': range_book} # AST props: ast['props']['data_axes'], ast['props']['output_dict'], ast['props']['input_dict'] for i in range(len(pieces)): assert data_axes[i]['range'] % pieces[i] == 0 data_axes[i]['range'] //= pieces[i] for k in ast['props']['output_dict']: output_item = ast['props']['output_dict'][k] assert output_item['shape'][i] % pieces[i] == 0 output_item['shape'][i] //= pieces[i] for k in ast['props']['input_dict']: input_item = ast['props']['input_dict'][k] sub_shape = [] for it in range_book[k]: bias_diff = it[3] - it[2] + 1 if it[1] < 0 or it[0] == 0: sub_shape.append(bias_diff) elif it[0] > 0: sub_shape.append(it[0] * (data_axes[it[1]]['range'] - 1) + bias_diff) else: raise Exception('Unhandled book case:', it) input_item['shape'] = sub_shape from antares.common import local_get_dir_file output_key = next(iter(ast['props']['output_dict'])) ast['props']['shard']['local_shape'] = ast['props']['output_dict'][output_key]['shape'] with open(local_get_dir_file('range_book.json'), 'w') as fp: json.dump(ast['props']['shard'], fp)
def emit_tvm_ir(exprss, input_dict): ast = build_fused_ast(exprss, input_dict) from lang.auto_shard import auto_shard_on_ast auto_shard_on_ast(ast) bias_axis_body = '' if 'slices' in ast['props']: axis_dict, tensor_dict = ast['props']['slices'][0] for k in axis_dict: bias_axis_body += '_%s = input("_%s", [1], dtype="int32")[0]; ' % (k , k) for k in tensor_dict: bias_axis_body += '_%s = input("_%s", [1], dtype="int32")[0]; ' % (k , k) slices_info = { 'data_axes': ast['props']['data_axes'], 'slices': ast['props']['slices'], } from antares.common import local_get_dir_file with open(local_get_dir_file('slices.json'), 'w') as fp: json.dump(slices_info, fp) def emit_input_body(input_dict): input_body = '' for key in input_dict: input_info = input_dict[key] input_body += '%s = input("%s", %s, dtype="%s"); ' % (key, key, input_info['shape'], input_info['dtype']) return input_body def emit_reduce_body(ast): reduce_body, reduce_set = '', [] props = ast['props'] if props['reduce_axes']: for x in props['reduce_axes']: axis_name = warp_axis(x['name']) reduce_set.append(axis_name) reduce_body += '%s = loop(%d); ' % (axis_name, x['range']) reduce_maps = {'+': 'te.sum', '>': 'te.max', '<': 'te.min'} if props['reduce_type'] in reduce_maps: reduce_func = reduce_maps[props['reduce_type']] else: spec_idx = props['reduce_type'].find('(') if spec_idx >= 0: reduce_func = 'common_reduce("%s", %s)' % (props['reduce_type'][:spec_idx], props['reduce_type'][spec_idx:]) else: reduce_func = 'common_reduce("%s")' % props['reduce_type'] reduce_pattern = '%s(' % reduce_func + '%s' + ', axis=[%s])' % ', '.join(reduce_set) else: reduce_pattern = '%s' return reduce_body, reduce_pattern def emit_output_body(ast, reduce_pattern, final_output=True, injective=False): root, props = ast['root'], ast['props'] output_shape = [x['range'] for x in props['data_axes']] output_name = next(iter(props['output_dict'])) all_axis_range = np.product(output_shape) * np.product([x['range'] for x in props['reduce_axes']]) output_begin = '%s = output(shape=%s, flops=(%d * %d), func=lambda %s: ' % (output_name, output_shape, props['flopbase'], all_axis_range, ', '.join([warp_axis(x['name']) for x in props['data_axes']])) basic_body = emit_tvm_body(root, props) output_end = ', dtype="%s", tag="%s", name="%s", final_output=%s); ' % (props['output_dict'][output_name]['dtype'], 'antares_injective' if injective else '', output_name, final_output) return output_begin + reduce_pattern % basic_body + output_end final_body = bias_axis_body + emit_input_body(ast['props']['input_dict']) has_injective = 'injective' in ast reduce_body, reduce_pattern = emit_reduce_body(ast) final_body += reduce_body + emit_output_body(ast, reduce_pattern, final_output=(not has_injective), injective=False) if has_injective: final_body += emit_output_body(ast['injective'], '%s', final_output=True, injective=True) return final_body
def update_ast(config, ast_seq, global_input_dict, global_output_dict): if len(ast_seq) > 1: raise Exception( "TODO: Graphcore backend not handling multiple IR statements.") ast = ast_seq[0] data_axes = ast['props']['data_axes'] try: pieces = config pieces = [(pieces['tile_%d' % i][1] * pieces['tile_%d' % i][2]) for i in range(len(data_axes))] except: pieces = [1] * len(data_axes) for i in range(len(pieces)): assert data_axes[i]['range'] % pieces[i] == 0 pieces[i] = data_axes[i]['range'] // pieces[i] assert 'injective' not in ast, "Unhandled injective case for graphcore." range_book = {} walk_in_ast(ast, 'root', scan_items, [ast, range_book]) ast['props']['shard'] = {'nparts': pieces, 'book': range_book} # AST props: ast['props']['data_axes'], ast['props']['input_dict'] for i in range(len(pieces)): assert data_axes[i]['range'] % pieces[ i] == 0, "Axis sharding must be exactly divided, while requesting %d // %d." % ( data_axes[i]['range'], pieces[i]) data_axes[i]['range'] //= pieces[i] for k in ast['props']['input_dict']: input_item = ast['props']['input_dict'][k] sub_shape = [] for it in range_book[k]: bias_diff = it[3] - it[2] + 1 if it[1] < 0 or it[0] == 0: sub_shape.append(bias_diff) elif it[0] > 0: sub_shape.append(it[0] * (data_axes[it[1]]['range'] - 1) + bias_diff) else: raise Exception('Unhandled book case:', it) input_item['shape'] = sub_shape from antares.common import local_get_dir_file output_key = ast['props']['output_name'] ast['props']['shard']['local_shape'] = [ x['range'] for x in ast['props']['data_axes'] ] with open(local_get_dir_file('range_book.json'), 'w') as fp: json.dump(ast['props']['shard'], fp) for k in global_input_dict: if k in ast['props']['input_dict']: global_input_dict[k] = ast['props']['input_dict'][k] assert len(global_output_dict) == 1 for k in global_output_dict: global_output_dict[k]['shape'] = [ x['range'] for x in ast['props']['data_axes'] ] break
def do_native_translation(code, **kwargs): arg_bufs = AntaresGlobal.current_arg_bufs if 'einstein_v2' not in kwargs['attrs'].ir: raise Exception("Program for graphcore must be based on Antares IR") code = code[code.index(') {\n') + len(') {\n'):code.rindex('}\n')] code = remove_local_cache(code, arg_bufs) func_args, delta_args = '', [] for buf in arg_bufs['_in']: if buf['name'].startswith('_'): delta_args.append(buf['name']) continue func_args += ' Input<Vector<%s>> %s; // local: %s\n' % (_native_dtype(buf['dtype']), buf['name'], buf['shape']) for buf in arg_bufs['_out']: func_args += ' Output<Vector<%s>> %s; // local: %s\n' % (_native_dtype(buf['dtype']), buf['name'], buf['shape']) for arg in delta_args: code = code.replace(arg + '[0]', arg) func_args += '\n int %s; // delta scaler' % arg function_name, codelet_name = 'compute_task', 'Vuid_%s' % hashlib.sha1(code.encode()).hexdigest() code = ''' #include <poplar/Vertex.hpp> using namespace poplar; namespace { %s } class %s: public Vertex { public: bool compute() { %s return true; } %s }; ''' % (kwargs['attrs'].blend, codelet_name, code, func_args) # Attach rules of input slices from antares.common import local_get_dir_file with open(local_get_dir_file('slices.json'), 'r') as fp: slices_info = json.load(fp) codelet_buf = [''] codelet_buf.append('std::stringstream codelet;') codelet_buf.append('codelet << R"(%s)";' % code) codelet_buf.append('g.addCodelets(codelet);') codelet_buf.append('') codelet_buf.append('poplar::VertexRef v;') codelet_buf.append('auto compset = g.addComputeSet(__func__);') codelet_buf.append('prog.add(poplar::program::Execute(compset));') codelet_buf.append('') global_result_shape = None output_props = arg_bufs['_out'][0] ax_names = [x['name'] for x in slices_info['data_axes']] for rank, (axis, tensor) in enumerate(slices_info['slices']): codelet_buf.append('v = g.addVertex(compset, "%s");' % codelet_name) codelet_buf.append('if (g.getTarget().getTargetType() == poplar::TargetType::IPU_MODEL) g.setCycleEstimate(v, 10);') for ax in ax_names: codelet_buf.append('g.setInitialValue(v["_%s"], %d);' % (ax, axis[ax][0])) for k in tensor: ls, rs = [], [] for l, r in tensor[k]: ls.append(l) rs.append(r + 1) codelet_buf.append('g.connect(v["%s"], i.find("%s")->second.slice({%s}, {%s}).flatten());' % (k, k, str(ls)[1:-1], str(rs)[1:-1])) stride = [1] * len(ls) for i in reversed(range(len(stride) - 1)): stride[i] = stride[i + 1] * (rs[i + 1] - ls[i + 1]) delta_val = int(np.dot(ls, stride)) codelet_buf.append('g.setInitialValue(v["_%s"], %d);' % (k, delta_val)) ls, rs = [], [] for ax in ax_names: l, r = axis[ax] ls.append(l) rs.append(r + 1) global_result_shape = rs output_slice = 'result.slice({%s}, {%s}).flatten()' % (str(ls)[1:-1], str(rs)[1:-1]) codelet_buf.append('g.connect(v["%s"], %s);' % (output_props['name'], output_slice)) codelet_buf.append('g.setTileMapping(%s, %d);' % (output_slice, rank % 1216)) codelet_buf.append('g.setTileMapping(v, %d);' % (rank % 1216)) codelet_buf.append('') codelet_buf.insert(1, 'poplar::Tensor result = g.addVariable(poplar::%s, poplar::ArrayRef<std::size_t>({%s}), "%s");' % (_native_dtype(output_props['dtype']).upper(), str(global_result_shape)[1:-1], output_props['name'])) codelet_buf.append('return std::move(result);') codelet_buf = '\n '.join(codelet_buf) code = 'poplar::Tensor %s(poplar::Graph &g, poplar::program::Sequence &prog, const std::unordered_map<std::string, poplar::Tensor> &i) {%s\n}' % (function_name, codelet_buf) return code
def do_native_translation_v2(codeset, **kwargs): if 'einstein_v2' not in kwargs['attrs'].ir: raise Exception("Program for graphcore must be based on Antares IR") kernel_name, in_args, out_args, body = codeset func_args, delta_args = '', [] for buf in in_args: if buf[1].startswith('_'): delta_args.append(buf[1]) continue func_args += ' Input<Vector<%s>> %s;\n' % (buf[0], buf[1]) for buf in out_args: func_args += ' Output<Vector<%s>> %s;\n' % (buf[0], buf[1]) blend_code = kwargs['attrs'].blend.strip() blend_code = 'namespace {\n%s\n}\n\n' if blend_code else '' from antares.common import local_get_dir_file try: with open(local_get_dir_file('range_book.json'), 'r') as fp: range_book = json.load(fp) except FileNotFoundError: raise Exception( "TODO: Graphcore body generation is not completely implemented in new emit_tvm_ir_v2()" ) props = [] for k in range_book['book']: arr2d = range_book['book'][k] arr2d = [str(x)[1:-1].replace(', ', ',') for x in arr2d] arr2d = '/'.join(arr2d) props.append(k + '/' + arr2d) props = ';'.join(props) full_body = f'''// Antares Property (k * ax_id + l .. r): {props} #include <poplar/Vertex.hpp> using namespace poplar; #define int8 char #define int16 short #define int32 int #define int64 long #define float16 half #define float32 float #define float64 double #define min(x, y) ((x) < (y) ? (x) : (y)) #define max(x, y) ((x) > (y) ? (x) : (y)) {blend_code} class CODELET_{kernel_name}: public Vertex {{ public: bool compute() {{ {body} return true; }} {func_args}}}; ''' return full_body