def __get_spec_lib(self, state, opts): build_dir = self.store.build_dir conp = state['conp'] kgen = get_specrates_kernel(self.store.reacs, self.store.specs, opts, conp=conp) # generate kgen.generate(build_dir) # write header write_aux(build_dir, opts, self.store.specs, self.store.reacs)
def test_compile_jacobian(self, state): lang = state['lang'] jac_type = state['jac_type'] opts, oploop = self.__get_objs(lang=lang) build_dir = self.store.build_dir obj_dir = self.store.obj_dir lib_dir = self.store.lib_dir packages = {'c': 'pyjac_c', 'opencl': 'pyjac_ocl'} for state in oploop: # clean old self.__cleanup() # create / write files build_dir = self.store.build_dir conp = state['conp'] method = get_jacobian_kernel if jac_type == 'finite_difference': method = finite_difference_jacobian kgen = method(self.store.reacs, self.store.specs, opts, conp=conp) # generate kgen.generate(build_dir) # write header write_aux(build_dir, opts, self.store.specs, self.store.reacs) # test wrapper generation generate_wrapper(opts.lang, build_dir, obj_dir=obj_dir, out_dir=lib_dir, btype=build_type.jacobian) # create the test importer, and run imp = test_utils.get_import_source() with open(os.path.join(lib_dir, 'test_import.py'), 'w') as file: file.write(imp.substitute(path=lib_dir, package=packages[lang])) python_str = 'python{}.{}'.format(sys.version_info[0], sys.version_info[1]) subprocess.check_call( [python_str, os.path.join(lib_dir, 'test_import.py')])
def test_lockstep_driver(self): # get rate info rate_info = determine_jac_inds(self.store.reacs, self.store.specs, RateSpecialization.fixed) mod_test = get_run_source() for kind, loopy_opts in OptionLoopWrapper.from_get_oploop( self, do_ratespec=False, langs=get_test_langs(), do_vector=True, yield_index=True): # make namestore namestore = arc.NameStore(loopy_opts, rate_info) # kernel 1 - need the jacobian reset kernel reset = reset_arrays(loopy_opts, namestore) # kernel 2 - incrementer # make mapstore, arrays and kernel info mapstore = arc.MapStore(loopy_opts, namestore.phi_inds, None) # use arrays of 2 & 3 dimensions to test the driver's copying base_phi_shape = namestore.n_arr.shape P_lp, P_str = mapstore.apply_maps(namestore.P_arr, arc.global_ind) phi_lp, phi_str = mapstore.apply_maps(namestore.n_arr, arc.global_ind, arc.var_name) inputs = [P_lp.name, phi_lp.name] base_jac_shape = namestore.jac.shape jac_lp, jac_str = mapstore.apply_maps(namestore.jac, arc.global_ind, arc.var_name, arc.var_name) outputs = [jac_lp.name] kernel_data = [P_lp, phi_lp, jac_lp] kernel_data.extend(arc.initial_condition_dimension_vars( loopy_opts, None)) instructions = Template(""" ${phi_str} = ${phi_str} + ${P_str} {id=0, dep=*} ${jac_str} = ${jac_str} + ${phi_str} {id=1, dep=0, nosync=0} """).safe_substitute(**locals()) # handle atomicity can_vec, vec_spec = ic.get_deep_specializer( loopy_opts, atomic_ids=['1']) barriers = [] if loopy_opts.depth: # need a barrier between the reset & the kernel barriers = [(0, 1, 'global')] inner_kernel = k_gen.knl_info( name='inner', instructions=instructions, mapstore=mapstore, var_name=arc.var_name, kernel_data=kernel_data, silenced_warnings=['write_race(0)', 'write_race(1)'], can_vectorize=can_vec, vectorization_specializer=vec_spec) # put it in a generator generator = k_gen.make_kernel_generator( loopy_opts, kernel_type=KernelType.dummy, name='inner_kernel', kernels=[reset, inner_kernel], namestore=namestore, input_arrays=inputs[:], output_arrays=outputs[:], is_validation=True, driver_type=DriverType.lockstep, barriers=barriers) # use a "weird" (non-evenly divisibly by vector width) test-size to # properly test the copy-in / copy-out test_size = self.store.test_size - 37 if test_size <= 0: test_size = self.store.test_size - 1 assert test_size > 0 # and make with temporary_build_dirs() as (build, obj, lib): numpy_arrays = [] def __save(shape, name, zero=False): data = np.zeros(shape) if not zero: # make it a simple range data.flat[:] = np.arange(np.prod(shape)) # save myname = pjoin(lib, name + '.npy') # need to split inputs / answer np.save(myname, data.flatten('K')) numpy_arrays.append(data.flatten('K')) # write 'data' import loopy as lp for arr in kernel_data: if not isinstance(arr, lp.ValueArg): __save((test_size,) + arr.shape[1:], arr.name, arr.name in outputs) # and a parameter param = np.zeros((test_size,)) param[:] = np.arange(test_size) # build code generator.generate(build, data_order=loopy_opts.order, data_filename='data.bin', for_validation=True) # write header write_aux(build, loopy_opts, self.store.specs, self.store.reacs) # generate wrapper pywrap(loopy_opts.lang, build, obj_dir=obj, out_dir=lib, ktype=KernelType.dummy, file_base=generator.name, additional_inputs=inputs[:], additional_outputs=outputs[:]) # and calling script test = pjoin(lib, 'test.py') inputs = utils.stringify_args( [pjoin(lib, inp + '.npy') for inp in inputs], use_quotes=True) str_outputs = utils.stringify_args( [pjoin(lib, inp + '.npy') for inp in outputs], use_quotes=True) num_threads = _get_test_input( 'num_threads', psutil.cpu_count(logical=False)) with open(test, 'w') as file: file.write(mod_test.safe_substitute( package='pyjac_{lang}'.format( lang=utils.package_lang[loopy_opts.lang]), input_args=inputs, test_arrays=str_outputs, output_files=str_outputs, looser_tols='[]', loose_rtol=0, loose_atol=0, rtol=0, atol=0, non_array_args='{}, {}'.format( test_size, num_threads), kernel_name=generator.name.title(),)) try: utils.run_with_our_python([test]) except subprocess.CalledProcessError: logger = logging.getLogger(__name__) logger.debug(utils.stringify_args(vars(loopy_opts), kwd=True)) assert False, 'lockstep_driver error' # calculate answers ns = base_jac_shape[1] # pressure is added to phi phi = numpy_arrays[1].reshape((test_size, ns), order=loopy_opts.order) p_arr = numpy_arrays[0] phi = phi + p_arr[:, np.newaxis] jac = numpy_arrays[2].reshape((test_size, ns, ns), order=loopy_opts.order) # and the diagonal of the jacobian has the updated pressure added jac[:, range(ns), range(ns)] += phi[:, range(ns)] # and read in outputs test = np.load(pjoin(lib, outputs[0] + '.npy')).reshape( jac.shape, order=loopy_opts.order) assert np.array_equal(test, jac)
def __run_test(self, method, test_python_wrapper=True, ktype=KernelType.species_rates, **oploop_keywords): kwargs = {} if not test_python_wrapper: kwargs['shared'] = [True, False] oploop_keywords.update(kwargs) ignored_state_vals = ['conp'] + list(kwargs.keys()) wrapper = OptionLoopWrapper.from_get_oploop( self, ignored_state_vals=ignored_state_vals, do_conp=False, **oploop_keywords) for opts in wrapper: with temporary_build_dirs() as (build_dir, obj_dir, lib_dir): # write files # write files conp = wrapper.state['conp'] kgen = method(self.store.reacs, self.store.specs, opts, conp=conp) # generate kgen.generate(build_dir, species_names=[x.name for x in self.store.specs], rxn_strings=[str(x) for x in self.store.reacs]) # write header write_aux(build_dir, opts, self.store.specs, self.store.reacs) if test_python_wrapper: package = 'pyjac_{}'.format(utils.package_lang[opts.lang]) # test wrapper generation pywrap(opts.lang, build_dir, obj_dir=obj_dir, out_dir=lib_dir, ktype=ktype) imp = test_utils.get_import_source() with open(os.path.join(lib_dir, 'test_import.py'), 'w') as file: file.write( imp.substitute( path=lib_dir, package=package, kernel=utils.enum_to_string(ktype).title(), nsp=len(self.store.specs), nrxn=len(self.store.reacs))) utils.run_with_our_python( [os.path.join(lib_dir, 'test_import.py')]) else: # compile generate_library(opts.lang, build_dir, obj_dir=obj_dir, out_dir=lib_dir, shared=wrapper.state['shared'], ktype=ktype)
def test_read_initial_conditions(self): setup = test_utils.get_read_ics_source() wrapper = OptionLoopWrapper.from_get_oploop(self, do_conp=True) for opts in wrapper: with temporary_build_dirs() as (build_dir, obj_dir, lib_dir): conp = wrapper.state['conp'] # make a dummy generator insns = (""" {spec} = {param} {{id=0}} """) domain = arc.creator('domain', arc.kint_type, (10, ), 'C', initializer=np.arange( 10, dtype=arc.kint_type)) mapstore = arc.MapStore(opts, domain, None) # create global args param = arc.creator(arc.pressure_array, np.float64, (arc.problem_size.name, 10), opts.order) spec = arc.creator(arc.state_vector, np.float64, (arc.problem_size.name, 10), opts.order) namestore = type('', (object, ), {'jac': ''}) # create array / array strings param_lp, param_str = mapstore.apply_maps(param, 'j', 'i') spec_lp, spec_str = mapstore.apply_maps(spec, 'j', 'i') # create kernel infos info = knl_info('spec_eval', insns.format(param=param_str, spec=spec_str), mapstore, kernel_data=[spec_lp, param_lp, arc.work_size], silenced_warnings=['write_race(0)']) # create generators kgen = make_kernel_generator( opts, KernelType.dummy, [info], namestore, input_arrays=[param.name, spec.name], output_arrays=[spec.name], name='ric_tester') # make kernels kgen._make_kernels() # and generate RIC _, record, _ = kgen._generate_wrapping_kernel(build_dir) kgen._generate_common(build_dir, record) ric = os.path.join( build_dir, 'read_initial_conditions' + utils.file_ext[opts.lang]) # write header write_aux(build_dir, opts, self.store.specs, self.store.reacs) with open(os.path.join(build_dir, 'setup.py'), 'w') as file: file.write( setup.safe_substitute(buildpath=build_dir, obj_dir=obj_dir)) # and compile from pyjac.libgen import compile, get_toolchain toolchain = get_toolchain(opts.lang) compile(opts.lang, toolchain, [ric], obj_dir=obj_dir) # write wrapper self.__write_with_subs('read_ic_wrapper.pyx', os.path.join(self.store.script_dir, 'test_utils'), build_dir, header_ext=utils.header_ext[opts.lang]) # setup utils.run_with_our_python([ os.path.join(build_dir, 'setup.py'), 'build_ext', '--build-lib', lib_dir ]) infile = os.path.join(self.store.script_dir, 'test_utils', 'ric_tester.py.in') outfile = os.path.join(lib_dir, 'ric_tester.py') # cogify try: Cog().callableMain([ 'cogapp', '-e', '-d', '-Dconp={}'.format(conp), '-o', outfile, infile ]) except Exception: import logging logger = logging.getLogger(__name__) logger.error('Error generating initial conditions reader:' ' {}'.format(outfile)) raise # save phi, param in correct order phi = (self.store.phi_cp if conp else self.store.phi_cv) savephi = phi.flatten(opts.order) param = self.store.P if conp else self.store.V savephi.tofile(os.path.join(lib_dir, 'phi_test.npy')) param.tofile(os.path.join(lib_dir, 'param_test.npy')) # save bin file out_file = np.concatenate( ( np.reshape(phi[:, 0], (-1, 1)), # temperature np.reshape(param, (-1, 1)), # param phi[:, 1:]), axis=1 # species ) out_file = out_file.flatten('K') with open(os.path.join(lib_dir, 'data.bin'), 'wb') as file: out_file.tofile(file) # and run utils.run_with_our_python( [outfile, opts.order, str(self.store.test_size)])
def test_strided_copy(): wrapper = __test_cases() for opts in wrapper: lang = opts.lang order = opts.order depth = opts.depth width = opts.width with temporary_build_dirs() as (build_dir, obj_dir, lib_dir): vec_size = depth if depth else (width if width else 0) # set max per run such that we will have a non-full run (1024 - 1008) # this should also be evenly divisible by depth and width # (as should the non full run) max_per_run = 16 # number of ics should be divisibly by depth and width ics = max_per_run * 8 + vec_size if vec_size: assert ics % vec_size == 0 assert max_per_run % vec_size == 0 assert int(np.floor(ics / max_per_run) * max_per_run) % vec_size == 0 # build initial callgen callgen = CallgenResult( order=opts.order, lang=opts.lang, dev_mem_type=wrapper.state['dev_mem_type'], type_map=type_map(opts.lang)) # set type dtype = np.dtype('float64') # create test arrays def __create(shape): if not isinstance(shape, tuple): shape = (shape,) shape = (ics,) + shape arr = np.zeros(shape, dtype=dtype, order=order) arr.flat[:] = np.arange(np.prod(shape)) return arr arrays = [__create(16), __create(10), __create(20), __create((20, 20)), __create(())] const = [np.arange(10, dtype=dtype)] # max size for initialization in kernel max_size = max([x.size for x in arrays]) def _get_dtype(dtype): return lp.to_loopy_type( dtype, target=get_target(opts.lang)) lp_arrays = [lp.GlobalArg('a{}'.format(i), shape=(arc.problem_size.name,) + a.shape[1:], order=order, dtype=_get_dtype(arrays[i].dtype)) for i, a in enumerate(arrays)] + \ [lp.TemporaryVariable( 'a{}'.format(i + len(arrays)), dtype=_get_dtype(dtype), order=order, initializer=const[i], read_only=True, shape=const[i].shape) for i in range(len(const))] const = lp_arrays[len(arrays):] # now update args callgen = callgen.copy(name='test', input_args={'test': [x for x in lp_arrays if x not in const]}, output_args={'test' : []}, host_constants={'test': const}) temp_fname = os.path.join(build_dir, 'in' + utils.file_ext[lang]) fname = os.path.join(build_dir, 'test' + utils.file_ext[lang]) with open(temp_fname, 'w') as file: file.write(dedent(""" /*[[[cog # expected globals: # callgen - path to serialized callgen object # lang - the language to use # problem_size - the problem size # max_per_run - the run-size # max_size - the maximum array size # order - The data ordering import cog import os import numpy as np from six.moves import cPickle as pickle # unserialize the callgen with open(callgen, 'rb') as file: callgen = pickle.load(file) # determine the headers to include lang_headers = [] if lang == 'opencl': lang_headers.extend([ '#include "memcpy_2d.oclh"', '#include "vectorization.oclh"', '#include <CL/cl.h>', '#include "error_check.oclh"']) elif lang == 'c': lang_headers.extend([ '#include "memcpy_2d.hpp"', '#include "error_check.hpp"']) cog.outl('\\n'.join(lang_headers)) ]]] [[[end]]]*/ // normal headers #include <stdlib.h> #include <string.h> #include <assert.h> int main() { /*[[[cog if lang == 'opencl': cog.outl( 'double* h_temp_d;\\n' 'int* h_temp_i;\\n' '// create a context / queue\\n' 'int lim = 10;\\n' 'cl_uint num_platforms;\\n' 'cl_uint num_devices;\\n' 'cl_platform_id platform [lim];\\n' 'cl_device_id device [lim];\\n' 'cl_int return_code;\\n' 'cl_context context;\\n' 'cl_command_queue queue;\\n' 'check_err(clGetPlatformIDs(lim, platform, &num_platforms));\\n' 'for (int i = 0; i < num_platforms; ++i)\\n' '{\\n' ' check_err(clGetDeviceIDs(platform[i], CL_DEVICE_TYPE_ALL, ' ' lim, device, &num_devices));\\n' ' if(num_devices > 0)\\n' ' break;\\n' '}\\n' 'context = clCreateContext(NULL, 1, &device[0], NULL, NULL, ' '&return_code);\\n' 'check_err(return_code);\\n' '//create queue\\n' 'queue = clCreateCommandQueue(context, device[0], 0, ' '&return_code);\\n' 'check_err(return_code);\\n') ]]] [[[end]]]*/ /*[[[cog # determine maximum array size cog.outl('double zero [{max_size}] = {{0}};'.format( max_size=max_size)) # init variables cog.outl('int problem_size = {};'.format(problem_size)) cog.outl('int per_run = {};'.format(max_per_run)) ]]] [[[end]]]*/ /*[[[cog # create memory tool from string import Template import loopy as lp from pyjac.kernel_utils.memory_tools import get_memory from pyjac.kernel_utils.memory_tools import HostNamer from pyjac.kernel_utils.memory_tools import DeviceNamer mem = get_memory(callgen, host_namer=HostNamer(), device_namer=DeviceNamer()) # declare host and device arrays for arr in callgen.kernel_args['test'] + callgen.work_arrays: if not isinstance(arr, lp.ValueArg): cog.outl(mem.define(False, arr)) cog.outl(mem.define(True, arr)) # define host constants for arr in callgen.host_constants['test']: cog.outl(mem.define(False, arr, host_constant=True, force_no_const=True)) cog.outl(mem.define(True, arr)) # and declare the temporary array cog.outl(mem.define(True, lp.GlobalArg( 'temp_d', dtype=lp.to_loopy_type(np.float64)))) # allocate host and device arrays for arr in callgen.kernel_args['test'] + callgen.work_arrays: if not isinstance(arr, lp.ValueArg): cog.outl(mem.alloc(False, arr)) cog.outl(mem.alloc(True, arr)) for arr in callgen.host_constants['test']: # alloc device version of host constant cog.outl(mem.alloc(True, arr)) # copy host constants cog.outl(mem.copy(True, arr, host_constant=True)) def _get_size(arr): size = 1 for x in arr.shape: if not isinstance(x, int): assert x.name == 'problem_size' size *= int(problem_size) else: size *= x return size # save copies of host arrays host_copies = [Template( '${type} ${save} [${size}] = {${vals}};\\n' 'memset(${host}, 0, ${size} * sizeof(${type}));' ).safe_substitute( save='h_' + arr.name + '_save', host='h_' + arr.name, size=_get_size(arr), vals=', '.join([str(x) for x in np.arange( _get_size(arr)).flatten(order)]), type=callgen.type_map[arr.dtype]) for arr in callgen.kernel_args['test'] + callgen.host_constants['test']] for hc in host_copies: cog.outl(hc) ]]] [[[end]]]*/ // kernel for (size_t offset = 0; offset < problem_size; offset += per_run) { int this_run = problem_size - offset < per_run ? \ problem_size - offset : per_run; /* Memory Transfers into the kernel, if any */ /*[[[cog mem2 = get_memory(callgen, host_namer=HostNamer(postfix='_save'), device_namer=DeviceNamer()) for arr in callgen.kernel_args['test']: cog.outl(mem2.copy(True, arr)) ]]] [[[end]]]*/ /* Memory Transfers out */ /*[[[cog for arr in callgen.kernel_args['test']: cog.outl(mem.copy(False, arr)) ]]] [[[end]]]*/ } /*[[[cog # and finally check check_template = Template( 'for(int i = 0; i < ${size}; ++i)\\n' '{\\n' ' assert(${host}[i] == ${save}[i]);\\n' '}\\n') checks = [check_template.safe_substitute( host=mem.get_name(False, arr), save=mem2.get_name(False, arr), size=_get_size(arr)) for arr in callgen.kernel_args['test']] for check in checks: cog.outl(check) ]]] [[[end]]]*/ /*[[[cog if lang == 'opencl': cog.outl('check_err(clFlush(queue));') cog.outl('check_err(clReleaseCommandQueue(queue));') cog.outl('check_err(clReleaseContext(context));') ]]] [[[end]]]*/ return 0; } """.strip())) # serialize callgen with open(os.path.join(build_dir, 'callgen.pickle'), 'wb') as file: pickle.dump(callgen, file) # cogify from cogapp import Cog cmd = [ 'cog', '-e', '-d', '-Dcallgen={}'.format( os.path.join(build_dir, 'callgen.pickle')), '-Dmax_per_run={}'.format(max_per_run), '-Dproblem_size={}'.format(ics), '-Dmax_size={}'.format(max_size), '-Dlang={}'.format(lang), '-Dorder={}'.format(order), '-o', fname, temp_fname] Cog().callableMain(cmd) files = [fname] # write aux write_aux(build_dir, opts, [], []) # copy any deps def __copy_deps(lang, scan_path, out_path, change_extension=True, ffilt=None, nfilt=None): deps = [x for x in os.listdir(scan_path) if os.path.isfile( os.path.join(scan_path, x)) and not x.endswith('.in')] if ffilt is not None: deps = [x for x in deps if ffilt in x] if nfilt is not None: deps = [x for x in deps if nfilt not in x] files = [] for dep in deps: dep_dest = dep dep_is_header = dep.endswith(utils.header_ext[lang]) ext = (utils.file_ext[lang] if not dep_is_header else utils.header_ext[lang]) if change_extension and not dep.endswith(ext): dep_dest = dep[:dep.rfind('.')] + ext shutil.copyfile(os.path.join(scan_path, dep), os.path.join(out_path, dep_dest)) if not dep_is_header: files.append(os.path.join(out_path, dep_dest)) return files scan = os.path.join(script_dir, os.pardir, 'kernel_utils', lang) files += __copy_deps(lang, scan, build_dir, nfilt='.py') scan = os.path.join(script_dir, os.pardir, 'kernel_utils', 'common') files += __copy_deps(host_langs[lang], scan, build_dir, change_extension=False, ffilt='memcpy_2d') # build toolchain = get_toolchain(lang) obj_files = compile( lang, toolchain, files, source_dir=build_dir, obj_dir=obj_dir) lib = link(toolchain, obj_files, 'memory_test', lib_dir=lib_dir) # and run subprocess.check_call(lib)
def test_strided_copy(state): lang = state['lang'] order = state['order'] depth = state['depth'] width = state['width'] # cleanup clean_dir(build_dir) clean_dir(obj_dir) clean_dir(lib_dir) # create utils.create_dir(build_dir) utils.create_dir(obj_dir) utils.create_dir(lib_dir) vec_size = depth if depth else (width if width else 0) # set max per run such that we will have a non-full run (1024 - 1008) # this should also be evenly divisible by depth and width # (as should the non full run) max_per_run = 16 # number of ics should be divisibly by depth and width ics = max_per_run * 8 + vec_size if vec_size: assert ics % vec_size == 0 assert max_per_run % vec_size == 0 assert int(np.floor(ics / max_per_run) * max_per_run) % vec_size == 0 dtype = np.dtype('float64') # create test arrays def __create(shape): if not isinstance(shape, tuple): shape = (shape, ) shape = (ics, ) + shape arr = np.zeros(shape, dtype=dtype, order=order) arr.flat[:] = np.arange(np.prod(shape)) return arr arrays = [ __create(16), __create(10), __create(20), __create((20, 20)), __create(()) ] const = [np.arange(10, dtype=dtype)] lp_arrays = [lp.GlobalArg('a{}'.format(i), shape=('problem_size',) + a.shape[1:], order=order, dtype=(arrays + const)[i].dtype) for i, a in enumerate(arrays)] + \ [lp.TemporaryVariable('a{}'.format(i + len(arrays)), dtype=dtype, order=order, initializer=const[i], read_only=True, shape=const[i].shape) for i in range(len(const))] const = lp_arrays[len(arrays):] dtype = 'double' # create array splitter opts = type('', (object, ), { 'width': width, 'depth': depth, 'order': order, 'lang': lang }) asplit = array_splitter(opts) # split numpy arrays = asplit.split_numpy_arrays(arrays) # make dummy knl knl = lp.make_kernel( '{[i]: 0 <= i <= 1}', """ if i > 1 a0[i, i] = 0 a1[i, i] = 0 a2[i, i] = 0 a3[i, i, i] = 0 a4[i] = 0 <> k = a5[i] end """, lp_arrays) # split loopy lp_arrays = asplit.split_loopy_arrays(knl).args # now create a simple library mem = memory_manager(opts.lang, opts.order, asplit._have_split(), dev_type=state['device_type'], strided_c_copy=lang == 'c') mem.add_arrays([x for x in lp_arrays], in_arrays=[x.name for x in lp_arrays if x not in const], out_arrays=[x.name for x in lp_arrays if x not in const], host_constants=const) # create "kernel" size_type = 'int' lang_headers = [] if lang == 'opencl': lang_headers.extend([ '#include "memcpy_2d.oclh"', '#include "vectorization.oclh"', '#include <CL/cl.h>', '#include "ocl_errorcheck.oclh"' ]) size_type = 'cl_uint' elif lang == 'c': lang_headers.extend( ['#include "memcpy_2d.h"', '#include "error_check.h"']) # kernel must copy in and out, using the mem_manager's format knl = Template(""" for (size_t offset = 0; offset < problem_size; offset += per_run) { ${type} this_run = problem_size - offset < per_run ? \ problem_size - offset : per_run; /* Memory Transfers into the kernel, if any */ ${mem_transfers_in} /* Memory Transfers out */ ${mem_transfers_out} } """).safe_substitute(type=size_type, mem_transfers_in=mem._mem_transfers( to_device=True, host_postfix='_save'), mem_transfers_out=mem.get_mem_transfers_out(), problem_size=ics) # create the host memory allocations host_names = ['h_' + arr.name for arr in lp_arrays] host_allocs = mem.get_mem_allocs(True, host_postfix='') # device memory allocations device_allocs = mem.get_mem_allocs(False) # copy to save for test host_name_saves = ['h_' + a.name + '_save' for a in lp_arrays] host_const_allocs = mem.get_host_constants() host_copies = [ Template(""" ${type} ${save} [${size}] = {${vals}}; memset(${host}, 0, ${size} * sizeof(${type})); """).safe_substitute(save='h_' + lp_arrays[i].name + '_save', host='h_' + lp_arrays[i].name, size=arrays[i].size, vals=', '.join( [str(x) for x in arrays[i].flatten()]), type=dtype) for i in range(len(arrays)) ] # and finally checks check_template = Template(""" for(int i = 0; i < ${size}; ++i) { assert(${host}[i] == ${save}[i]); } """) checks = [ check_template.safe_substitute(host=host_names[i], save=host_name_saves[i], size=arrays[i].size) for i in range(len(arrays)) ] # and preambles ocl_preamble = """ double* temp_d; int* temp_i; // create a context / queue int lim = 10; cl_uint num_platforms; cl_uint num_devices; cl_platform_id platform [lim]; cl_device_id device [lim]; cl_int return_code; cl_context context; cl_command_queue queue; check_err(clGetPlatformIDs(lim, platform, &num_platforms)); for (int i = 0; i < num_platforms; ++i) { check_err(clGetDeviceIDs(platform[i], CL_DEVICE_TYPE_ALL, lim, device, &num_devices)); if(num_devices > 0) break; } context = clCreateContext(NULL, 1, &device[0], NULL, NULL, &return_code); check_err(return_code); //create queue queue = clCreateCommandQueue(context, device[0], 0, &return_code); check_err(return_code); """ preamble = '' if lang == 'opencl': preamble = ocl_preamble end = '' if lang == 'opencl': end = """ check_err(clFlush(queue)); check_err(clReleaseCommandQueue(queue)); check_err(clReleaseContext(context)); """ file_src = Template(""" ${lang_headers} #include <stdlib.h> #include <string.h> #include <assert.h> void main() { ${preamble} double zero [${max_dim}] = {0}; ${size_type} problem_size = ${problem_size}; ${size_type} per_run = ${max_per_run}; ${host_allocs} ${host_const_allocs} ${mem_declares} ${device_allocs} ${mem_saves} ${host_constant_copy} ${knl} ${checks} ${end} exit(0); } """).safe_substitute(lang_headers='\n'.join(lang_headers), mem_declares=mem.get_defns(), host_allocs=host_allocs, host_const_allocs=host_const_allocs, device_allocs=device_allocs, mem_saves='\n'.join(host_copies), host_constant_copy=mem.get_host_constants_in(), checks='\n'.join(checks), knl=knl, preamble=preamble, end=end, size_type=size_type, max_per_run=max_per_run, problem_size=ics, max_dim=max([x.size for x in arrays])) # write file fname = os.path.join(build_dir, 'test' + utils.file_ext[lang]) with open(fname, 'w') as file: file.write(file_src) files = [fname] # write aux write_aux(build_dir, opts, [], []) # copy any deps def __copy_deps(lang, scan_path, out_path, change_extension=True, ffilt=None): deps = [ x for x in os.listdir(scan_path) if os.path.isfile(os.path.join(scan_path, x)) and not x.endswith('.in') ] if ffilt is not None: deps = [x for x in deps if ffilt in x] files = [] for dep in deps: dep_dest = dep dep_is_header = dep.endswith(utils.header_ext[lang]) ext = (utils.file_ext[lang] if not dep_is_header else utils.header_ext[lang]) if change_extension and not dep.endswith(ext): dep_dest = dep[:dep.rfind('.')] + ext shutil.copyfile(os.path.join(scan_path, dep), os.path.join(out_path, dep_dest)) if not dep_is_header: files.append(os.path.join(out_path, dep_dest)) return files scan = os.path.join(script_dir, os.pardir, 'kernel_utils', lang) files += __copy_deps(lang, scan, build_dir) scan = os.path.join(script_dir, os.pardir, 'kernel_utils', 'common') files += __copy_deps(host_langs[lang], scan, build_dir, change_extension=False, ffilt='memcpy_2d') # build files = [ file_struct(lang, lang, f[:f.rindex('.')], [build_dir], [], build_dir, obj_dir, True, True) for f in files ] assert not any(compiler(x) for x in files) lib = libgen(lang, obj_dir, lib_dir, [x.filename for x in files], True, False, True) lib = os.path.join(lib_dir, lib) # and run subprocess.check_call(lib)
def test_read_initial_conditions(self): build_dir = self.store.build_dir obj_dir = self.store.obj_dir lib_dir = self.store.lib_dir setup = test_utils.get_read_ics_source() utils.create_dir(build_dir) utils.create_dir(obj_dir) utils.create_dir(lib_dir) oploop = OptionLoop( OrderedDict([ # no need to test conv ('conp', [True]), ('order', ['C', 'F']), ('depth', [4, None]), ('width', [4, None]), ('lang', ['c']) ])) for state in oploop: if state['depth'] and state['width']: continue self.__cleanup(False) # create dummy loopy opts opts = type('', (object, ), state)() asplit = array_splitter(opts) # get source path = os.path.realpath( os.path.join(self.store.script_dir, os.pardir, 'kernel_utils', 'common', 'read_initial_conditions.c.in')) with open(path, 'r') as file: ric = Template(file.read()) # subs ric = ric.safe_substitute(mechanism='mechanism.h', vectorization='vectorization.h') # write with open(os.path.join(build_dir, 'read_initial_conditions.c'), 'w') as file: file.write(ric) # write header write_aux(build_dir, opts, self.store.specs, self.store.reacs) # write setup with open(os.path.join(build_dir, 'setup.py'), 'w') as file: file.write(setup.safe_substitute(buildpath=build_dir)) # copy read ics header to final dest shutil.copyfile( os.path.join(self.store.script_dir, os.pardir, 'kernel_utils', 'common', 'read_initial_conditions.h'), os.path.join(build_dir, 'read_initial_conditions.h')) # copy wrapper shutil.copyfile( os.path.join(self.store.script_dir, 'test_utils', 'read_ic_wrapper.pyx'), os.path.join(build_dir, 'read_ic_wrapper.pyx')) # setup python_str = 'python{}.{}'.format(sys.version_info[0], sys.version_info[1]) call = [ python_str, os.path.join(build_dir, 'setup.py'), 'build_ext', '--build-lib', lib_dir ] subprocess.check_call(call) # copy in tester shutil.copyfile( os.path.join(self.store.script_dir, 'test_utils', 'ric_tester.py'), os.path.join(lib_dir, 'ric_tester.py')) # For simplicity (and really, lack of need) we test CONP only # hence, the extra variable is the volume, while the fixed parameter # is the pressure # save phi, param in correct order phi = (self.store.phi_cp if opts.conp else self.store.phi_cv) save_phi, = asplit.split_numpy_arrays(phi) save_phi = save_phi.flatten(opts.order) param = self.store.P if opts.conp else self.store.V save_phi.tofile(os.path.join(lib_dir, 'phi_test.npy')) param.tofile(os.path.join(lib_dir, 'param_test.npy')) # save bin file out_file = np.concatenate( ( np.reshape(phi[:, 0], (-1, 1)), # temperature np.reshape(param, (-1, 1)), # param phi[:, 1:]), axis=1 # species ) out_file = out_file.flatten('K') with open(os.path.join(lib_dir, 'data.bin'), 'wb') as file: out_file.tofile(file) # and run subprocess.check_call([ python_str, os.path.join(lib_dir, 'ric_tester.py'), opts.order, str(self.store.test_size) ])