def override_log(key, old, new): logging.info( 'Replacing {} for test type: {}. Old value:' ' ({}), New value: ({})'.format( key, stringify_args( [ttype, test['eval-type'], jtype, stype], joiner='.'), stringify_args(listify(old)), stringify_args(listify(new))))
def _raise(desc, inp, nameref, shape): logger = logging.getLogger(__name__) logger.debug('{} array for driver kernel {} does not ' 'match expected shape (from array {}). ' 'Expected: ({}), got: ({})'.format( desc, inp.name, nameref, stringify_args(inp.shape), stringify_args(shape)) ) raise InvalidInputSpecificationException(inp.name)
def __init__(self, bad_inputs): from pyjac.utils import stringify_args, listify self.message = ( 'Inputs: ({}) were incorrectly, or conflictingly specified. ' 'See debug output for more information'.format( stringify_args(listify(bad_inputs)))) super(InvalidInputSpecificationException, self).__init__(self.message)
def __internal_validator(self, field, valuelist, valid, message, necessary=True): valuelist = listify(valuelist) if six.callable(valid): badvals = [x for x in valuelist if not valid(x)] else: badvals = [x for x in valuelist if x not in valid] if badvals and necessary: args = (badvals, ) if not six.callable(valid): args = (badvals, valid) self._error( field, message.format(*tuple(stringify_args(x) for x in args)))
def test_lockstep_driver(self): # get rate info rate_info = determine_jac_inds(self.store.reacs, self.store.specs, RateSpecialization.fixed) mod_test = get_run_source() for kind, loopy_opts in OptionLoopWrapper.from_get_oploop( self, do_ratespec=False, langs=get_test_langs(), do_vector=True, yield_index=True): # make namestore namestore = arc.NameStore(loopy_opts, rate_info) # kernel 1 - need the jacobian reset kernel reset = reset_arrays(loopy_opts, namestore) # kernel 2 - incrementer # make mapstore, arrays and kernel info mapstore = arc.MapStore(loopy_opts, namestore.phi_inds, None) # use arrays of 2 & 3 dimensions to test the driver's copying base_phi_shape = namestore.n_arr.shape P_lp, P_str = mapstore.apply_maps(namestore.P_arr, arc.global_ind) phi_lp, phi_str = mapstore.apply_maps(namestore.n_arr, arc.global_ind, arc.var_name) inputs = [P_lp.name, phi_lp.name] base_jac_shape = namestore.jac.shape jac_lp, jac_str = mapstore.apply_maps(namestore.jac, arc.global_ind, arc.var_name, arc.var_name) outputs = [jac_lp.name] kernel_data = [P_lp, phi_lp, jac_lp] kernel_data.extend(arc.initial_condition_dimension_vars( loopy_opts, None)) instructions = Template(""" ${phi_str} = ${phi_str} + ${P_str} {id=0, dep=*} ${jac_str} = ${jac_str} + ${phi_str} {id=1, dep=0, nosync=0} """).safe_substitute(**locals()) # handle atomicity can_vec, vec_spec = ic.get_deep_specializer( loopy_opts, atomic_ids=['1']) barriers = [] if loopy_opts.depth: # need a barrier between the reset & the kernel barriers = [(0, 1, 'global')] inner_kernel = k_gen.knl_info( name='inner', instructions=instructions, mapstore=mapstore, var_name=arc.var_name, kernel_data=kernel_data, silenced_warnings=['write_race(0)', 'write_race(1)'], can_vectorize=can_vec, vectorization_specializer=vec_spec) # put it in a generator generator = k_gen.make_kernel_generator( loopy_opts, kernel_type=KernelType.dummy, name='inner_kernel', kernels=[reset, inner_kernel], namestore=namestore, input_arrays=inputs[:], output_arrays=outputs[:], is_validation=True, driver_type=DriverType.lockstep, barriers=barriers) # use a "weird" (non-evenly divisibly by vector width) test-size to # properly test the copy-in / copy-out test_size = self.store.test_size - 37 if test_size <= 0: test_size = self.store.test_size - 1 assert test_size > 0 # and make with temporary_build_dirs() as (build, obj, lib): numpy_arrays = [] def __save(shape, name, zero=False): data = np.zeros(shape) if not zero: # make it a simple range data.flat[:] = np.arange(np.prod(shape)) # save myname = pjoin(lib, name + '.npy') # need to split inputs / answer np.save(myname, data.flatten('K')) numpy_arrays.append(data.flatten('K')) # write 'data' import loopy as lp for arr in kernel_data: if not isinstance(arr, lp.ValueArg): __save((test_size,) + arr.shape[1:], arr.name, arr.name in outputs) # and a parameter param = np.zeros((test_size,)) param[:] = np.arange(test_size) # build code generator.generate(build, data_order=loopy_opts.order, data_filename='data.bin', for_validation=True) # write header write_aux(build, loopy_opts, self.store.specs, self.store.reacs) # generate wrapper pywrap(loopy_opts.lang, build, obj_dir=obj, out_dir=lib, ktype=KernelType.dummy, file_base=generator.name, additional_inputs=inputs[:], additional_outputs=outputs[:]) # and calling script test = pjoin(lib, 'test.py') inputs = utils.stringify_args( [pjoin(lib, inp + '.npy') for inp in inputs], use_quotes=True) str_outputs = utils.stringify_args( [pjoin(lib, inp + '.npy') for inp in outputs], use_quotes=True) num_threads = _get_test_input( 'num_threads', psutil.cpu_count(logical=False)) with open(test, 'w') as file: file.write(mod_test.safe_substitute( package='pyjac_{lang}'.format( lang=utils.package_lang[loopy_opts.lang]), input_args=inputs, test_arrays=str_outputs, output_files=str_outputs, looser_tols='[]', loose_rtol=0, loose_atol=0, rtol=0, atol=0, non_array_args='{}, {}'.format( test_size, num_threads), kernel_name=generator.name.title(),)) try: utils.run_with_our_python([test]) except subprocess.CalledProcessError: logger = logging.getLogger(__name__) logger.debug(utils.stringify_args(vars(loopy_opts), kwd=True)) assert False, 'lockstep_driver error' # calculate answers ns = base_jac_shape[1] # pressure is added to phi phi = numpy_arrays[1].reshape((test_size, ns), order=loopy_opts.order) p_arr = numpy_arrays[0] phi = phi + p_arr[:, np.newaxis] jac = numpy_arrays[2].reshape((test_size, ns, ns), order=loopy_opts.order) # and the diagonal of the jacobian has the updated pressure added jac[:, range(ns), range(ns)] += phi[:, range(ns)] # and read in outputs test = np.load(pjoin(lib, outputs[0] + '.npy')).reshape( jac.shape, order=loopy_opts.order) assert np.array_equal(test, jac)
def __init__(self, otype, value, allowed): from pyjac.utils import stringify_args self.message = ('Value "{}" for override type "{}" is not allowed. ' 'Allowed values are: {}'.format( otype, value, stringify_args(allowed))) super(InvalidOverrideException, self).__init__(self.message)
def get_test_matrix(work_dir, test_type, test_matrix, for_validation, raise_on_missing=True, langs=get_test_langs()): """Runs a set of mechanisms and an ordered dictionary for performance and functional testing Parameters ---------- work_dir : str Working directory with mechanisms and for data test_type: :class:`build_type.jacobian` Controls some testing options (e.g., whether to do a sparse matrix or not) test_matrix: str The test matrix file to load for_validation: bool If determines which test type to load from the test matrix, validation or performance raise_on_missing: bool Raise an exception of the specified :param:`test_matrix` file is not found langs: list of str The allowed languages, modifiable by the :envvar:`TEST_LANGS` or test_langs in :file:`test_setup.py` Returns ------- mechanisms : dict A dictionary indicating which mechanism are available for testing, The structure is as follows: mech_name : {'mech' : file path to the Cantera mechanism 'ns' : number of species in the mechanism 'limits' : {'full': XXX, 'sparse': XXX}}: a dictionary of limits on the number of conditions that can be evaluated for this mechanism (full & sparse jacobian respectively) due to memory constraints params : OrderedDict The parameters to put in an oploop max_vec_width : int The maximum vector width to test """ work_dir = abspath(work_dir) # validate the test matrix matrix_name = test_matrix test_matrix = build_and_validate('test_matrix_schema.yaml', test_matrix) # check that we have the working directory if not exists(work_dir): raise Exception('Work directory {} for '.format(work_dir) + 'testing not found, exiting...') # load the models models = load_models(work_dir, test_matrix) assert isinstance(test_type, build_type) # load tests tests = load_tests(test_matrix, matrix_name) # filter those that match the test type valid_str = 'validation' if for_validation else 'performance' tests = [test for test in tests if test['test-type'] == valid_str] tests = [ test for test in tests if test['eval-type'] == enum_to_string(test_type) or test['eval-type'] == 'both' ] # and dictify tests = [OrderedDict(test) for test in tests] if not tests: raise Exception('No tests found in matrix {} for {} test of {}, ' 'exiting...'.format(matrix_name, valid_str, enum_to_string(test_type))) # get defaults we haven't migrated to schema yet rate_spec = ['fixed', 'hybrid'] if test_type != build_type.jacobian \ else ['fixed'] sparse = ([ enum_to_string(JacobianFormat.sparse), enum_to_string(JacobianFormat.full) ] if test_type == build_type.jacobian else [enum_to_string(JacobianFormat.full)]) jac_types = [ enum_to_string(JacobianType.exact), enum_to_string(JacobianType.finite_difference) ] if (test_type == build_type.jacobian and not for_validation) else [enum_to_string(JacobianType.exact)] split_kernels = [False] # and default # of cores, this may be overriden default_num_cores, can_override_cores = num_cores_default() # load platforms platforms = load_platforms(test_matrix, langs=langs, raise_on_empty=raise_on_missing) platforms = [OrderedDict(platform) for platform in platforms] out_params = [] logger = logging.getLogger(__name__) for test in tests: # filter platforms plats = [p.copy() for p in platforms] if 'platforms' in test: plats = [ plat for plat in plats if plat['platform'] in test['platforms'] ] if len(plats) < len(platforms): logger.info( 'Platforms ({}) filtered out for test type: {}'.format( ', '.join([ p['platform'] for p in platforms if p not in plats ]), ' - '.join([test['test-type'], test['eval-type']]))) if not len(plats): logger.warn('No platforms found for test {}, skipping...'.format( ' - '.join([test['test-type'], test['eval-type']]))) continue for plookup in plats: clean = plookup.copy() # get default number of cores cores = default_num_cores[:] # get default vector widths widths = plookup['width'] is_wide = widths is not None depths = plookup['depth'] is_deep = depths is not None if is_deep and not is_wide: widths = depths[:] # sanity check if is_wide or is_deep: assert widths is not None # special gpu handling for cores is_gpu = False # test platform type if platform_is_gpu(plookup['platform']): # set cores to 1 is_gpu = True cores = [1] def apply_vectypes(lookup, widths, is_wide=is_wide, is_deep=is_deep): if is_wide or is_deep: # set vec widths use_par = None in widths or (is_wide and is_deep) lookup['vecsize'] = [x for x in widths[:] if x is not None] base = [True] if not use_par else [True, False] if is_wide: lookup['wide'] = base[:] base.pop() if is_deep: lookup['deep'] = base[:] else: lookup['vecsize'] = [None] lookup['wide'] = [False] lookup['deep'] = [False] del lookup['width'] del lookup['depth'] apply_vectypes(plookup, widths) # default is both conp / conv conp = [True, False] order = ['C', 'F'] # loop over possible overrides oploop = OptionLoop( OrderedDict([('ttype', [enum_to_string(test_type)]), ('jtype', jac_types), ('stype', sparse)])) for i, state in enumerate(oploop): ttype = state['ttype'] jtype = state['jtype'] stype = state['stype'] def override_log(key, old, new): logging.info( 'Replacing {} for test type: {}. Old value:' ' ({}), New value: ({})'.format( key, stringify_args( [ttype, test['eval-type'], jtype, stype], joiner='.'), stringify_args(listify(old)), stringify_args(listify(new)))) # copy defaults icores = cores[:] iorder = order[:] iconp = conp[:] ivecsizes = widths[:] if widths is not None else [None] imodels = tuple(models.keys()) # load overides overrides = get_overrides(test, ttype, jtype, stype) # check that we can apply if 'num_cores' in overrides and not can_override_cores: raise InvalidTestEnivironmentException( ttype, 'num_cores', matrix_name, 'num_threads') elif 'num_cores' in overrides and is_gpu: logger = logging.getLogger(__name__) logger.info( 'Discarding unused "num_cores" override for GPU ' 'platform {}'.format(plookup['platform'])) del overrides['num_cores'] # 'num_cores', 'order', 'conp', 'vecsize', 'vectype' # now apply overrides outplat = plookup.copy() for current in overrides: ivectypes_override = None for override in overrides: if override == 'num_cores': override_log('num_cores', icores, overrides[override]) icores = overrides[override] elif override == 'order' and not is_gpu: override_log('order', iorder, overrides[override]) iorder = overrides[override] elif override == 'gpuorder' and is_gpu: override_log('order', iorder, overrides[override]) iorder = overrides[override] elif override == 'conp': iconp_save = iconp[:] iconp = [] if 'conp' in overrides[override]: iconp.append(True) if 'conv' in overrides[override]: iconp.append(False) override_log('conp', iconp_save, iconp) elif override == 'vecsize' and not is_gpu: override_log('vecsize', ivecsizes, overrides[override]) outplat['vecsize'] = listify(overrides[override]) elif override == 'gpuvecsize' and is_gpu: override_log('gpuvecsize', ivecsizes, overrides[override]) outplat['vecsize'] = listify(overrides[override]) elif override == 'vectype' and not is_gpu: # we have to do this at the end ivectypes_override = overrides[override] elif override == 'gpuvectype' and is_gpu: ivectypes_override = overrides[override] elif override == 'models': # check that all models are valid for model in overrides[override]: if model not in imodels: raise InvalidOverrideException( override, model, imodels) # and replace override_log('models', stringify_args(imodels), stringify_args(overrides[override])) imodels = tuple(overrides[override]) if ivectypes_override is not None: c = clean.copy() apply_vectypes(c, outplat['vecsize'], is_wide='wide' in ivectypes_override, is_deep='deep' in ivectypes_override) # and copy into working outplat['wide'] = c['wide'] if 'wide' in c else [False] outplat['deep'] = c['deep'] if 'deep' in c else [False] outplat['vecsize'] = c['vecsize'] old = [''] if is_wide: old += ['wide'] if is_deep: old += ['deep'] elif not is_wide: old += ['par'] override_log('vecsize', old, ivectypes_override) # and finally, convert back to an option loop format out_params.append( [('num_cores', icores), ('order', iorder), ('rate_spec', rate_spec), ('split_kernels', split_kernels), ('conp', iconp), ('sparse', [stype]), ('jac_type', [jtype]), ('models', [imodels])] + [(key, value) for key, value in six.iteritems(outplat)]) max_vec_width = 1 vector_params = [ dict(p)['vecsize'] for p in out_params if 'vecsize' in dict(p) and dict(p)['vecsize'] != [None] ] if vector_params: max_vec_width = max(max_vec_width, max([max(x) for x in vector_params])) from . import reduce_oploop loop = reduce_oploop(out_params) return models, loop, max_vec_width
def get_driver(loopy_opts, namestore, inputs, outputs, driven, test_size=None): """ Implements a driver function for kernel evaluation. This allows pyJac to utilize a smaller working-buffer (sized to the global work size), and implements a static(like) scheduling algorithm Notes ----- Currently Loopy doesn't have the machinery to enable native calling of other loopy kernels, so we have to fudge this a bit (and this can't be used for unit-tests). Future versions will allow us to natively wrap test functions (i.e., once the new function calling interface is in place in Loopy) :see:`driver-function` for more information Parameters ---------- loopy_opts: :class:`loopy_options` The loopy options specifying how to create this kernel namestore: :class:`NameStore` The namestore class that owns our arrays inputs: list of :class:`lp.KernelArgument` The arrays that should be copied into internal working buffers before calling subfunctions outputs: list of :class:`lp.KernelArgument` The arrays should be copied back into global memory after calling subfunctions driven: :class:`kernel_generator` The kernel generator to wrap in the driver Returns ------- knl_list : list of :class:`knl_info` The generated infos for feeding into the kernel generator """ # we have to do some shennanigains here to get this to work in loopy: # # 1. Loopy currently doesn't allow you to alter the for-loop increment size, # so for OpenCL where we must increment by the global work size, we have to # put a dummy for-loop in, and teach the kernel generator to work around it # # 2. Additionally, the OpenMP target in Loopy is Coming Soon (TM), hence we need # our own dynamic scheduling preamble for the driver loop ( # if we're operating in queue-form) # # 3. Finally, Loopy is just now supporting the ability to natively call other # kernels, so for the moment we still need to utilize the dummy function # calling we have set-up for the finite difference Jacobian # first, get our input / output arrays arrays = {} to_find = set(listify(inputs)) | set(listify(outputs)) # create mapping of array names array_names = {v.name: v for k, v in six.iteritems(vars(namestore)) if isinstance(v, arc.creator) and not ( v.fixed_indicies or v.affine)} for arr in to_find: arr_creator = next((array_names[x] for x in array_names if x == arr), None) if arr_creator is None: continue arrays[arr] = arr_creator if len(arrays) != len(to_find): missing = to_find - set(arrays.keys()) logger = logging.getLogger(__name__) logger.debug('Input/output arrays for queue_driver kernel {} not found.' .format(stringify_args(missing))) raise InvalidInputSpecificationException(missing) def arr_non_ic(array_input): return len(array_input.shape) > 1 # ensure the inputs and output are all identically sized (among those that have) # a non-initial condition dimension def __check(check_input): shape = () def _raise(desc, inp, nameref, shape): logger = logging.getLogger(__name__) logger.debug('{} array for driver kernel {} does not ' 'match expected shape (from array {}). ' 'Expected: ({}), got: ({})'.format( desc, inp.name, nameref, stringify_args(inp.shape), stringify_args(shape)) ) raise InvalidInputSpecificationException(inp.name) nameref = None desc = 'Input' if check_input else 'Output' for inp in [arrays[x] for x in (inputs if check_input else outputs)]: if not arr_non_ic(inp): # only the initial condition dimension, fine continue if shape: if inp.shape != shape and len(inp.shape) == len(shape): # allow different shapes in the last index if not all(x == y for x, y in zip(*( inp.shape[:-1], shape[:-1]))): _raise(desc, inp, nameref, shape) # otherwise, take the maximum of the shape entry shape = shape[:-1] + (max(shape[-1], inp.shape[-1]),) elif inp.shape != shape: _raise(desc, inp, nameref, shape) else: nameref = inp.name shape = inp.shape[:] if not shape: logger = logging.getLogger(__name__) logger.debug('No {} arrays supplied to driver that require ' 'copying to working buffer!'.format(desc)) raise InvalidInputSpecificationException('Driver ' + desc + ' arrays') return shape def create_interior_kernel(for_input): shape = __check(for_input) name = 'copy_{}'.format('in' if for_input else 'out') # get arrays arrs = [arrays[x] for x in (inputs if for_input else outputs)] # create a dummy map and store map_shape = np.arange(shape[1], dtype=arc.kint_type) mapper = arc.creator(name, arc.kint_type, map_shape.shape, 'C', initializer=map_shape) mapstore = arc.MapStore(loopy_opts, mapper, test_size) # determine what other inames we need, if any namer = UniqueNameGenerator(set([mapstore.iname])) extra_inames = [] for i in six.moves.range(2, len(shape)): iname = namer(mapstore.iname) extra_inames.append((iname, '0 <= {} < {}'.format( iname, shape[i]))) indicies = [arc.global_ind, mapstore.iname] + [ ex[0] for ex in extra_inames] global_indicies = indicies[:] global_indicies[0] += ' + ' + driver_offset.name # bake in SIMD pre-split vec_spec = None split_spec = None conditional_index = get_problem_index(loopy_opts) def __build(arr, local, **kwargs): inds = global_indicies if not local else indicies if isinstance(arr, arc.jac_creator) and arr.is_sparse: # this is a sparse Jacobian, hence we have to override the default # indexing (as we're doing a straight copy) kwargs['ignore_lookups'] = True if arr_non_ic(arr): return mapstore.apply_maps(arr, *inds, **kwargs) else: return mapstore.apply_maps(arr, inds[0], **kwargs) # create working buffer version of arrays working_buffers = [] working_strs = [] for arr in arrs: arr_lp, arr_str = __build(arr, True, use_local_name=True) working_buffers.append(arr_lp) working_strs.append(arr_str) # create global versions of arrays buffers = [] strs = [] for arr in arrs: arr_lp, arr_str = __build(arr, False, reshape_to_working_buffer=False) buffers.append(arr_lp) strs.append(arr_str) # now create the instructions instruction_template = Template(""" if ${ind} < ${problem_size} ${shape_check} ${local_buffer} = ${global_buffer} {id=copy_${name}} end """) if for_input else Template(""" if ${ind} < ${problem_size} ${shape_check} ${global_buffer} = ${local_buffer} {id=copy_${name}} end """) warnings = [] instructions = [] for i, arr in enumerate(arrs): # get shape check shape_check = '' if arr.shape[-1] != shape[-1] and len(arr.shape) == len(shape): shape_check = ' and {} < {}'.format( indicies[-1], arr.shape[-1]) instructions.append(instruction_template.substitute( local_buffer=working_strs[i], global_buffer=strs[i], ind=conditional_index, problem_size=arc.problem_size.name, name=arr.name, shape_check=shape_check)) warnings.append('write_race(copy_{})'.format(arr.name)) if loopy_opts.is_simd: warnings.append('vectorize_failed') warnings.append('unrolled_vector_iname_conditional') instructions = '\n'.join(instructions) kwargs = {} if loopy_opts.lang == 'c': # override the number of copies in this function to 1 # (i.e., 1 per-thread) kwargs['iname_domain_override'] = [(arc.global_ind, '0 <= {} < 1'.format( arc.global_ind))] priorities = ([arc.global_ind + '_outer'] if loopy_opts.pre_split else [ arc.global_ind]) + [arc.var_name] # and return the kernel info return knl_info(name=name, instructions=instructions, mapstore=mapstore, var_name=arc.var_name, extra_inames=extra_inames, kernel_data=buffers + working_buffers + [ arc.work_size, arc.problem_size, driver_offset], silenced_warnings=warnings, vectorization_specializer=vec_spec, split_specializer=split_spec, unrolled_vector=True, loop_priority=set([tuple(priorities + [ iname[0] for iname in extra_inames])]), **kwargs) copy_in = create_interior_kernel(True) # create a dummy kernel info that simply calls our internal function instructions = driven.name + '()' # create mapstore call_name = driven.name repeats = 1 if loopy_opts.depth: # we need 'var_name' to have a non-unity size repeats = loopy_opts.vector_width map_shape = np.arange(repeats, dtype=arc.kint_type) mapper = arc.creator(call_name, arc.kint_type, map_shape.shape, 'C', initializer=map_shape) mapstore = arc.MapStore(loopy_opts, mapper, test_size) mangler = lp_pregen.MangleGen(call_name, tuple(), tuple()) kwargs = {} if loopy_opts.lang == 'c': # override the number of calls to the driven function in the driver, this # is currently fixed to 1 (i.e., 1 per-thread) kwargs['iname_domain_override'] = [(arc.global_ind, '0 <= {} < 1'.format( arc.global_ind))] func_call = knl_info(name='driver', instructions=instructions, mapstore=mapstore, kernel_data=[arc.work_size, arc.problem_size], var_name=arc.var_name, extra_inames=copy_in.extra_inames[:], manglers=[mangler], **kwargs) copy_out = create_interior_kernel(False) # and return return [copy_in, func_call, copy_out]