def _specialize_iet(self, iet, **kwargs): warning("The OPS backend is still work-in-progress") ops_init = Call(namespace['ops_init'], [0, 0, 2]) ops_partition = Call(namespace['ops_partition'], Literal('""')) ops_exit = Call(namespace['ops_exit']) ops_block = OpsBlock('block') # Extract all symbols that need to be converted to ops_dat dims = [] to_dat = set() for section, trees in find_affine_trees(iet).items(): dims.append(len(trees[0].dimensions)) symbols = set(FindSymbols('symbolics').visit(trees[0].root)) symbols -= set(FindSymbols('defines').visit(trees[0].root)) to_dat |= symbols # To ensure deterministic code generation we order the datasets to # be generated (since a set is an unordered collection) to_dat = filter_sorted(to_dat) name_to_ops_dat = {} pre_time_loop = [] for f in to_dat: if f.is_Constant: continue pre_time_loop.extend(create_ops_dat(f, name_to_ops_dat, ops_block)) for n, (section, trees) in enumerate(find_affine_trees(iet).items()): pre_loop, ops_kernel = opsit(trees, n) pre_time_loop.extend(pre_loop) self._ops_kernels.append(ops_kernel) assert (d == dims[0] for d in dims), \ "The OPS backend currently assumes that all kernels \ have the same number of dimensions" ops_block_init = Expression( ClusterizedEq( Eq(ops_block, namespace['ops_decl_block'](dims[0], Literal('"block"'))))) self._headers.append(namespace['ops_define_dimension'](dims[0])) self._includes.append('stdio.h') body = [ ops_init, ops_block_init, *pre_time_loop, ops_partition, iet, ops_exit ] return List(body=body)
def _specialize_iet(self, iet, **kwargs): mapper = {} self._includes.append('ops_seq.h') ops_init = Call("ops_init", [0, 0, 2]) ops_timing = Call("ops_timing_output", [FunctionPointer("stdout")]) ops_exit = Call("ops_exit") global_declarations = [] dims = None for n, (section, trees) in enumerate(find_affine_trees(iet).items()): callable_kernel, declarations, par_loop_call_block, dims = opsit( trees, n) global_declarations.extend(declarations) self._header_functions.append(callable_kernel) mapper[trees[0].root] = par_loop_call_block mapper.update({i.root: mapper.get(i.root) for i in trees}) # Drop trees self._headers.append('#define OPS_%sD' % dims) warning("The OPS backend is still work-in-progress") global_declarations.append(Transformer(mapper).visit(iet)) return List( body=[ops_init, *global_declarations, ops_timing, ops_exit])
def _specialize_iet(self, iet, **kwargs): """ Transform the Iteration/Expression tree to offload the computation of one or more loop nests onto YASK. This involves calling the YASK compiler to generate YASK code. Such YASK code is then called from within the transformed Iteration/Expression tree. """ mapper = {} self.yk_solns = OrderedDict() for n, (section, trees) in enumerate(find_affine_trees(iet).items()): dimensions = tuple( filter_ordered(i.dim.root for i in flatten(trees))) context = contexts.fetch(dimensions, self._dtype) # A unique name for the 'real' compiler and kernel solutions name = namespace['jit-soln'](Signer._digest( configuration, *[i.root for i in trees])) # Create a YASK compiler solution for this Operator yc_soln = context.make_yc_solution(name) try: # Generate YASK grids and populate `yc_soln` with equations local_grids = yaskit(trees, yc_soln) # Build the new IET nodes yk_soln_obj = YaskSolnObject(namespace['code-soln-name'](n)) funcall = make_sharedptr_funcall(namespace['code-soln-run'], ['time'], yk_soln_obj) funcall = Offloaded(funcall, self._dtype) mapper[trees[0].root] = funcall mapper.update({i.root: mapper.get(i.root) for i in trees}) # Drop trees # Mark `funcall` as an external function call self._func_table[namespace['code-soln-run']] = MetaCall( None, False) # JIT-compile the newly-created YASK kernel yk_soln = context.make_yk_solution(name, yc_soln, local_grids) self.yk_solns[(dimensions, yk_soln_obj)] = yk_soln # Print some useful information about the newly constructed solution log("Solution '%s' contains %d grid(s) and %d equation(s)." % (yc_soln.get_name(), yc_soln.get_num_grids(), yc_soln.get_num_equations())) except NotImplementedError as e: log("Unable to offload a candidate tree. Reason: [%s]" % str(e)) iet = Transformer(mapper).visit(iet) if not self.yk_solns: log("No offloadable trees found") # Some Iteration/Expression trees are not offloaded to YASK and may # require further processing to be executed in YASK, due to the differences # in storage layout employed by Devito and YASK yk_grid_objs = { i.name: YaskGridObject(i.name) for i in self._input if i.from_YASK } yk_grid_objs.update({i: YaskGridObject(i) for i in self._local_grids}) iet = make_grid_accesses(iet, yk_grid_objs) # Finally optimize all non-yaskized loops iet = super(OperatorYASK, self)._specialize_iet(iet, **kwargs) return iet
def _specialize_iet(self, iet, **kwargs): warning("The OPS backend is still work-in-progress") affine_trees = find_affine_trees(iet).items() # If there is no affine trees, then there is no loop to be optimized using OPS. if not affine_trees: return iet ops_init = Call(namespace['ops_init'], [0, 0, 2]) ops_partition = Call(namespace['ops_partition'], Literal('""')) ops_exit = Call(namespace['ops_exit']) # Extract all symbols that need to be converted to ops_dat dims = [] to_dat = set() for _, tree in affine_trees: dims.append(len(tree[0].dimensions)) symbols = set(FindSymbols('symbolics').visit(tree[0].root)) symbols -= set(FindSymbols('defines').visit(tree[0].root)) to_dat |= symbols # Create the OPS block for this problem ops_block = OpsBlock('block') ops_block_init = Expression( ClusterizedEq( Eq(ops_block, namespace['ops_decl_block'](dims[0], Literal('"block"'))))) # To ensure deterministic code generation we order the datasets to # be generated (since a set is an unordered collection) to_dat = filter_sorted(to_dat) name_to_ops_dat = {} pre_time_loop = [] after_time_loop = [] for f in to_dat: if f.is_Constant: continue pre_time_loop.extend( list(create_ops_dat(f, name_to_ops_dat, ops_block))) # To return the result to Devito, it is necessary to copy the data # from the dat object back to the CPU memory. after_time_loop.extend( create_ops_fetch(f, name_to_ops_dat, self.time_dimension.extreme_max)) # Generate ops kernels for each offloadable iteration tree mapper = {} for n, (_, tree) in enumerate(affine_trees): pre_loop, ops_kernel, ops_par_loop_call = opsit( tree, n, name_to_ops_dat, ops_block, dims[0]) pre_time_loop.extend(pre_loop) self._func_table[namespace['ops_kernel_file'](ops_kernel.name)] = \ MetaCall(ops_kernel, False) mapper[tree[0].root] = ops_par_loop_call mapper.update({i.root: mapper.get(i.root) for i in tree}) # Drop trees iet = Transformer(mapper).visit(iet) assert (d == dims[0] for d in dims), \ "The OPS backend currently assumes that all kernels \ have the same number of dimensions" self._headers.append(namespace['ops_define_dimension'](dims[0])) self._includes.extend(['stdio.h', 'ops_seq.h']) body = [ ops_init, ops_block_init, *pre_time_loop, ops_partition, iet, *after_time_loop, ops_exit ] return List(body=body)
def make_ops_kernels(iet): warning("The OPS backend is still work-in-progress") affine_trees = find_affine_trees(iet).items() # If there is no affine trees, then there is no loop to be optimized using OPS. if not affine_trees: return iet, {} ops_init = Call(namespace['ops_init'], [0, 0, 2]) ops_partition = Call(namespace['ops_partition'], Literal('""')) ops_exit = Call(namespace['ops_exit']) # Extract all symbols that need to be converted to ops_dat dims = [] to_dat = set() for _, tree in affine_trees: dims.append(len(tree[0].dimensions)) symbols = set(FindSymbols('symbolics').visit(tree[0].root)) symbols -= set(FindSymbols('defines').visit(tree[0].root)) to_dat |= symbols # Create the OPS block for this problem ops_block = OpsBlock('block') ops_block_init = Expression( ClusterizedEq( Eq(ops_block, namespace['ops_decl_block'](dims[0], Literal('"block"'))))) # To ensure deterministic code generation we order the datasets to # be generated (since a set is an unordered collection) to_dat = filter_sorted(to_dat) name_to_ops_dat = {} pre_time_loop = [] after_time_loop = [] for f in to_dat: if f.is_Constant: continue pre_time_loop.extend( list(create_ops_dat(f, name_to_ops_dat, ops_block))) # Copy data from device to host after_time_loop.extend( create_ops_fetch(f, name_to_ops_dat, f.grid.time_dim.extreme_max)) # Generate ops kernels for each offloadable iteration tree mapper = {} ffuncs = [] for n, (_, tree) in enumerate(affine_trees): pre_loop, ops_kernel, ops_par_loop_call = opsit( tree, n, name_to_ops_dat, ops_block, dims[0]) pre_time_loop.extend(pre_loop) ffuncs.append(ops_kernel) mapper[tree[0].root] = ops_par_loop_call mapper.update({i.root: mapper.get(i.root) for i in tree}) # Drop trees iet = Transformer(mapper).visit(iet) assert (d == dims[0] for d in dims), \ "The OPS backend currently assumes that all kernels \ have the same number of dimensions" iet = iet._rebuild(body=flatten([ ops_init, ops_block_init, pre_time_loop, ops_partition, iet.body, after_time_loop, ops_exit ])) return iet, { 'includes': ['stdio.h', 'ops_seq.h'], 'ffuncs': ffuncs, 'headers': [namespace['ops_define_dimension'](dims[0])] }
def make_yask_kernels(iet, **kwargs): yk_solns = kwargs.pop('yk_solns') mapper = {} for n, (section, trees) in enumerate(find_affine_trees(iet).items()): dimensions = tuple(filter_ordered(i.dim.root for i in flatten(trees))) # Retrieve the section dtype exprs = FindNodes(Expression).visit(section) dtypes = {e.dtype for e in exprs} if len(dtypes) != 1: log("Unable to offload in presence of mixed-precision arithmetic") continue dtype = dtypes.pop() context = contexts.fetch(dimensions, dtype) # A unique name for the 'real' compiler and kernel solutions name = namespace['jit-soln'](Signer._digest(configuration, *[i.root for i in trees])) # Create a YASK compiler solution for this Operator yc_soln = context.make_yc_solution(name) try: # Generate YASK vars and populate `yc_soln` with equations local_vars = yaskit(trees, yc_soln) # Build the new IET nodes yk_soln_obj = YASKSolnObject(namespace['code-soln-name'](n)) funcall = make_sharedptr_funcall(namespace['code-soln-run'], ['time'], yk_soln_obj) funcall = Offloaded(funcall, dtype) mapper[trees[0].root] = funcall mapper.update({i.root: mapper.get(i.root) for i in trees}) # Drop trees # JIT-compile the newly-created YASK kernel yk_soln = context.make_yk_solution(name, yc_soln, local_vars) yk_solns[(dimensions, yk_soln_obj)] = yk_soln # Print some useful information about the newly constructed solution log("Solution '%s' contains %d var(s) and %d equation(s)." % (yc_soln.get_name(), yc_soln.get_num_vars(), yc_soln.get_num_equations())) except NotImplementedError as e: log("Unable to offload a candidate tree. Reason: [%s]" % str(e)) iet = Transformer(mapper).visit(iet) if not yk_solns: log("No offloadable trees found") # Some Iteration/Expression trees are not offloaded to YASK and may # require further processing to be executed through YASK, due to the # different storage layout yk_var_objs = { i.name: YASKVarObject(i.name) for i in FindSymbols().visit(iet) if i.from_YASK } yk_var_objs.update({i: YASKVarObject(i) for i in get_local_vars(yk_solns)}) iet = make_var_accesses(iet, yk_var_objs) # The signature needs to be updated # TODO: this could be done automagically through the iet pass engine, but # currently it only supports *appending* to the parameters list. While here # we actually need to change it as some parameters may disappear (x_m, x_M, ...) parameters = derive_parameters(iet, True) iet = iet._rebuild(parameters=parameters) return iet, {}
def _specialize_iet(self, iet, **kwargs): """ Transform the Iteration/Expression tree to offload the computation of one or more loop nests onto YASK. This involves calling the YASK compiler to generate YASK code. Such YASK code is then called from within the transformed Iteration/Expression tree. """ mapper = {} self.yk_solns = OrderedDict() for n, (section, trees) in enumerate(find_affine_trees(iet).items()): dimensions = tuple(filter_ordered(i.dim.root for i in flatten(trees))) context = contexts.fetch(dimensions, self._dtype) # A unique name for the 'real' compiler and kernel solutions name = namespace['jit-soln'](Signer._digest(configuration, *[i.root for i in trees])) # Create a YASK compiler solution for this Operator yc_soln = context.make_yc_solution(name) try: # Generate YASK grids and populate `yc_soln` with equations local_grids = yaskit(trees, yc_soln) # Build the new IET nodes yk_soln_obj = YaskSolnObject(namespace['code-soln-name'](n)) funcall = make_sharedptr_funcall(namespace['code-soln-run'], ['time'], yk_soln_obj) funcall = Offloaded(funcall, self._dtype) mapper[trees[0].root] = funcall mapper.update({i.root: mapper.get(i.root) for i in trees}) # Drop trees # Mark `funcall` as an external function call self._func_table[namespace['code-soln-run']] = MetaCall(None, False) # JIT-compile the newly-created YASK kernel yk_soln = context.make_yk_solution(name, yc_soln, local_grids) self.yk_solns[(dimensions, yk_soln_obj)] = yk_soln # Print some useful information about the newly constructed solution log("Solution '%s' contains %d grid(s) and %d equation(s)." % (yc_soln.get_name(), yc_soln.get_num_grids(), yc_soln.get_num_equations())) except NotImplementedError as e: log("Unable to offload a candidate tree. Reason: [%s]" % str(e)) iet = Transformer(mapper).visit(iet) if not self.yk_solns: log("No offloadable trees found") # Some Iteration/Expression trees are not offloaded to YASK and may # require further processing to be executed in YASK, due to the differences # in storage layout employed by Devito and YASK yk_grid_objs = {i.name: YaskGridObject(i.name) for i in self._input if i.from_YASK} yk_grid_objs.update({i: YaskGridObject(i) for i in self._local_grids}) iet = make_grid_accesses(iet, yk_grid_objs) # Finally optimize all non-yaskized loops iet = super(OperatorYASK, self)._specialize_iet(iet, **kwargs) return iet
def _specialize_iet(cls, iet, **kwargs): """ Transform the Iteration/Expression tree to offload the computation of one or more loop nests onto YASK. This involves calling the YASK compiler to generate YASK code. Such YASK code is then called from within the transformed Iteration/Expression tree. """ mapper = {} yk_solns = kwargs.pop('yk_solns') for n, (section, trees) in enumerate(find_affine_trees(iet).items()): dimensions = tuple( filter_ordered(i.dim.root for i in flatten(trees))) # Retrieve the section dtype exprs = FindNodes(Expression).visit(section) dtypes = {e.dtype for e in exprs} if len(dtypes) != 1: log("Unable to offload in presence of mixed-precision arithmetic" ) continue dtype = dtypes.pop() context = contexts.fetch(dimensions, dtype) # A unique name for the 'real' compiler and kernel solutions name = namespace['jit-soln'](Signer._digest( configuration, *[i.root for i in trees])) # Create a YASK compiler solution for this Operator yc_soln = context.make_yc_solution(name) try: # Generate YASK vars and populate `yc_soln` with equations local_vars = yaskit(trees, yc_soln) # Build the new IET nodes yk_soln_obj = YaskSolnObject(namespace['code-soln-name'](n)) funcall = make_sharedptr_funcall(namespace['code-soln-run'], ['time'], yk_soln_obj) funcall = Offloaded(funcall, dtype) mapper[trees[0].root] = funcall mapper.update({i.root: mapper.get(i.root) for i in trees}) # Drop trees # JIT-compile the newly-created YASK kernel yk_soln = context.make_yk_solution(name, yc_soln, local_vars) yk_solns[(dimensions, yk_soln_obj)] = yk_soln # Print some useful information about the newly constructed solution log("Solution '%s' contains %d var(s) and %d equation(s)." % (yc_soln.get_name(), yc_soln.get_num_vars(), yc_soln.get_num_equations())) except NotImplementedError as e: log("Unable to offload a candidate tree. Reason: [%s]" % str(e)) iet = Transformer(mapper).visit(iet) if not yk_solns: log("No offloadable trees found") # Some Iteration/Expression trees are not offloaded to YASK and may # require further processing to be executed through YASK, due to the # different storage layout yk_var_objs = { i.name: YaskVarObject(i.name) for i in FindSymbols().visit(iet) if i.from_YASK } yk_var_objs.update( {i: YaskVarObject(i) for i in cls._get_local_vars(yk_solns)}) iet = make_var_accesses(iet, yk_var_objs) # The signature needs to be updated parameters = derive_parameters(iet, True) iet = iet._rebuild(parameters=parameters) return super(OperatorYASK, cls)._specialize_iet(iet, **kwargs)