def __test_cases(): for state in OptionLoop( OrderedDict([('lang', ['opencl', 'c']), (('jac_type'), ['exact', 'approximate', 'finite_difference'])])): yield param(state)
def loopy_opts(langs=['opencl'], width=[4, None], depth=[4, None], order=['C', 'F']): oploop = OptionLoop( OrderedDict([('lang', langs), ('width', width), ('depth', depth), ('order', order)])) for state in oploop: if state['depth'] and state['width']: continue yield type('', (object, ), state)
def __get_objs(self, lang='opencl', depth=None, width=None, order='C'): opts = loopy_options(lang=lang, width=width, depth=depth, ilp=False, unr=None, order=order, platform='CPU') oploop = OptionLoop( OrderedDict([('conp', [True]), ('shared', [True, False])])) return opts, oploop
def __test_cases(): for state in OptionLoop( OrderedDict([('lang', ['opencl', 'c']), ('order', ['C', 'F']), ('width', [4, None]), ('depth', [4, None]), ('device_type', (cl.device_type.CPU, cl.device_type.GPU, None))])): if state['depth'] and state['width']: continue elif (state['depth'] is not None or state['width'] is not None) \ and state['lang'] == 'c': continue elif (state['lang'] == 'c' and state['device_type'] is not None): continue yield param(state)
def performance_tester(home, work_dir, use_old_opt, num_threads): """Runs performance testing for pyJac, TChem, and finite differences. Parameters ---------- home : str Directory of source code files work_dir : str Working directory with mechanisms and for data use_old_opt : bool If ``True``, use old optimization files found num_threads : int Number of OpenMP threads to parallelize performance testing Returns ------- None """ build_dir = 'out' test_dir = 'test' work_dir = os.path.abspath(work_dir) #find the mechanisms to test mechanism_list = {} if not os.path.exists(work_dir): print('Error: work directory {} for '.format(work_dir) + 'performance testing not found, exiting...') sys.exit(-1) for name in os.listdir(work_dir): if os.path.isdir(os.path.join(work_dir, name)): #check for cti files = [ f for f in os.listdir(os.path.join(work_dir, name)) if os.path.isfile(os.path.join(work_dir, name, f)) ] for f in files: if f.endswith('.cti'): mechanism_list[name] = {} mechanism_list[name]['mech'] = f mechanism_list[name]['chemkin'] = f.replace('.cti', '.dat') gas = ct.Solution(os.path.join(work_dir, name, f)) mechanism_list[name]['ns'] = gas.n_species thermo = next((tf for tf in files if 'therm' in tf), None) if thermo is not None: mechanism_list[name]['thermo'] = thermo if len(mechanism_list) == 0: print('No mechanisms found for performance testing in ' '{}, exiting...'.format(work_dir)) sys.exit(-1) if os.getenv('TCHEM_HOME'): tchem_home = os.getenv('TCHEM_HOME') else: raise SystemError('TCHEM_HOME environment variable not set.') cpu_repeats = 10 gpu_repeats = 10 def false_factory(): return False c_params = { 'lang': 'c', 'cache_opt': [False, True], 'finite_diffs': [False, True] } cuda_params = { 'lang': 'cuda', 'cache_opt': [False, True], 'shared': [False, True], 'finite_diffs': [False, True] } tchem_params = {'lang': 'tchem'} #set up testing environment env = os.environ.copy() env['OMP_NUM_THREADS'] = str(1) env['MKL_NUM_THREADS'] = str(1) for mech_name, mech_info in sorted(mechanism_list.items(), key=lambda x: x[1]['ns']): #get the cantera object gas = ct.Solution(os.path.join(work_dir, mech_name, mech_info['mech'])) pmod = any([is_pdep(rxn) for rxn in gas.reactions()]) #ensure directory structure is valid os.chdir(os.path.join(work_dir, mech_name)) subprocess.check_call(['mkdir', '-p', build_dir]) subprocess.check_call(['mkdir', '-p', test_dir]) num_conditions = 0 npy_files = [ f for f in os.listdir(os.path.join(work_dir, mech_name)) if f.endswith('.npy') and os.path.isfile(f) ] data = None with open('data.bin', 'wb') as file: #load PaSR data for different pressures/conditions, # and save to binary C file for npy in sorted(npy_files): state_data = np.load(npy) state_data = state_data.reshape( state_data.shape[0] * state_data.shape[1], state_data.shape[2]) if data is None: data = state_data else: data = np.vstack((data, state_data)) num_conditions += state_data.shape[0] print(num_conditions, data.shape) if num_conditions == 0: print('No data found in folder {}, continuing...'.format( mech_name)) continue data.tofile(file) #figure out gpu steps step_size = 1 steplist = [] while step_size < num_conditions: steplist.append(step_size) step_size *= 2 if step_size / 2 != num_conditions: steplist.append(num_conditions) the_path = os.getcwd() first_run = True op = OptionLoop(c_params, false_factory) op = op + OptionLoop(cuda_params, false_factory) op = op + OptionLoop(tchem_params, false_factory) haveOpt = False if os.path.isfile( os.path.join(os.getcwd(), build_dir, 'optimized.pickle')): haveOpt = True for state in op: lang = state['lang'] temp_lang = 'c' if lang != 'cuda' else 'cuda' FD = state['finite_diffs'] if FD: filename = 'fd_jacob{}'.format(utils.file_ext[temp_lang]) shutil.copy(os.path.join(home, filename), os.path.join(build_dir, filename)) opt = state['cache_opt'] smem = state['shared'] if any([ isinstance(rxn, ct.PlogReaction) or isinstance(rxn, ct.ChebyshevReaction) for rxn in gas.reactions() ]) and lang == 'tchem': print('TChem performance evaluation disabled; ' 'not compatible with Plog or Chebyshev reactions.') continue data_output = ('{}_{}_{}_{}'.format( lang, 'co' if opt else 'nco', 'smem' if smem else 'nosmem', 'fd' if FD else 'ajac') + '_output.txt') data_output = os.path.join(the_path, data_output) if lang != 'cuda': repeats = cpu_repeats num_completed = check_file(data_output) todo = {num_conditions: repeats - num_completed} else: repeats = gpu_repeats todo = check_step_file(data_output, steplist) for x in todo: todo[x] = repeats - todo[x] if not any(todo[x] > 0 for x in todo): continue if opt and haveOpt and not use_old_opt: raise Exception('Previous optimization file found... exiting') if lang != 'tchem': create_jacobian(lang, mech_info['mech'], optimize_cache=opt, build_path=build_dir, no_shared=not smem, num_blocks=8, num_threads=64, multi_thread=multiprocessing.cpu_count()) #now we need to write the reader filename = ('read_initial_conditions' '{}'.format(utils.file_ext[temp_lang])) shutil.copy(os.path.join(home, filename), os.path.join(os.getcwd(), build_dir, filename)) #write the tester file_data = {'datafile': os.path.join(the_path, 'data.bin')} if lang == 'c' or lang == 'cuda': filename = 'tester{}.in'.format(utils.file_ext[temp_lang]) with open(os.path.join(home, filename), 'r') as file: src = Template(file.read()) src = src.substitute(file_data) else: file_data['mechfile'] = mech_info['chemkin'] if 'thermo' in mech_info: file_data['thermofile'] = mech_info['thermo'] else: #it's the same file file_data['thermofile'] = mech_info['chemkin'] with open(os.path.join(home, 'tc_tester.c.in'), 'r') as file: src = Template(file.read()) src = src.substitute(file_data) filename = 'test{}'.format(utils.file_ext[temp_lang]) with open(os.path.join(build_dir, filename), 'w') as file: file.write(src) #copy timer shutil.copy(os.path.join(home, 'timer.h'), os.path.join(os.getcwd(), build_dir, 'timer.h')) #get file lists i_dirs = [build_dir] files = ['test', 'read_initial_conditions'] lib = None #now build the library if lang != 'tchem': lib = generate_library(lang, build_dir, test_dir, finite_difference=FD, shared=not STATIC) lib = os.path.normpath(lib) lib = ( lib[lib.index('lib') + len('lib'):lib.index('.so' if not STATIC else '.a')]) else: files += ['mechanism', 'mass_mole'] # Compile generated source code structs = [ file_struct(lang, temp_lang, f, i_dirs, (['-DFINITE_DIFF'] if FD else []), build_dir, test_dir, not STATIC) for f in files ] if lang != 'cuda': for s in structs: s.args.append('-fopenmp') pool = multiprocessing.Pool() results = pool.map(compiler, structs) pool.close() pool.join() if any(r == -1 for r in results): sys.exit(-1) linker(lang, temp_lang, test_dir, files, lib) if lang == 'tchem': #copy periodic table and mechanisms in shutil.copy( os.path.join(tchem_home, 'data', 'periodictable.dat'), 'periodictable.dat') with open(data_output, 'a+') as file: for stepsize in todo: for i in range(todo[stepsize]): print(i, "/", todo[stepsize]) subprocess.check_call([ os.path.join(the_path, test_dir, 'speedtest'), str(stepsize), str(num_threads) ], stdout=file, env=env)
(Pr + 1)) * ci elif fall_type == 'chem': dci = (-Pr * Theta_Pr / (Pr + 1) + Theta_Fi - theta_Pr / (Pr + 1)) * ci elif var == 'P': if fall_type == 'fall': dci = Fi * theta_Pr / (Pr + 1) + \ (Theta_Fi - theta_Pr / (Pr + 1)) * ci elif fall_type == 'chem': dci = (Theta_Fi - theta_Pr / (Pr + 1)) * ci return Xi, dci oploop = OptionLoop({ 'fall_type': ['chem', 'fall'], 'blend_type': ['lind', 'troe', 'sri'], 'pr_type': ['mix', 'unity', 'spec'], 'var': ['T', 'nj', 'V', 'P'] }) for i, state in enumerate(oploop): term_dict = {} Xi, dci = __get_dci(**state) def __rec_lims(term): def __separate(args): has = [] hasnt = [] for a in args: (has if a.has(Xi) else hasnt).append(a) return has, hasnt
def get_test_matrix(work_dir, test_type, test_matrix, for_validation, raise_on_missing=True, langs=get_test_langs()): """Runs a set of mechanisms and an ordered dictionary for performance and functional testing Parameters ---------- work_dir : str Working directory with mechanisms and for data test_type: :class:`build_type.jacobian` Controls some testing options (e.g., whether to do a sparse matrix or not) test_matrix: str The test matrix file to load for_validation: bool If determines which test type to load from the test matrix, validation or performance raise_on_missing: bool Raise an exception of the specified :param:`test_matrix` file is not found langs: list of str The allowed languages, modifiable by the :envvar:`TEST_LANGS` or test_langs in :file:`test_setup.py` Returns ------- mechanisms : dict A dictionary indicating which mechanism are available for testing, The structure is as follows: mech_name : {'mech' : file path to the Cantera mechanism 'ns' : number of species in the mechanism 'limits' : {'full': XXX, 'sparse': XXX}}: a dictionary of limits on the number of conditions that can be evaluated for this mechanism (full & sparse jacobian respectively) due to memory constraints params : OrderedDict The parameters to put in an oploop max_vec_width : int The maximum vector width to test """ work_dir = abspath(work_dir) # validate the test matrix matrix_name = test_matrix test_matrix = build_and_validate('test_matrix_schema.yaml', test_matrix) # check that we have the working directory if not exists(work_dir): raise Exception('Work directory {} for '.format(work_dir) + 'testing not found, exiting...') # load the models models = load_models(work_dir, test_matrix) assert isinstance(test_type, build_type) # load tests tests = load_tests(test_matrix, matrix_name) # filter those that match the test type valid_str = 'validation' if for_validation else 'performance' tests = [test for test in tests if test['test-type'] == valid_str] tests = [ test for test in tests if test['eval-type'] == enum_to_string(test_type) or test['eval-type'] == 'both' ] # and dictify tests = [OrderedDict(test) for test in tests] if not tests: raise Exception('No tests found in matrix {} for {} test of {}, ' 'exiting...'.format(matrix_name, valid_str, enum_to_string(test_type))) # get defaults we haven't migrated to schema yet rate_spec = ['fixed', 'hybrid'] if test_type != build_type.jacobian \ else ['fixed'] sparse = ([ enum_to_string(JacobianFormat.sparse), enum_to_string(JacobianFormat.full) ] if test_type == build_type.jacobian else [enum_to_string(JacobianFormat.full)]) jac_types = [ enum_to_string(JacobianType.exact), enum_to_string(JacobianType.finite_difference) ] if (test_type == build_type.jacobian and not for_validation) else [enum_to_string(JacobianType.exact)] split_kernels = [False] # and default # of cores, this may be overriden default_num_cores, can_override_cores = num_cores_default() # load platforms platforms = load_platforms(test_matrix, langs=langs, raise_on_empty=raise_on_missing) platforms = [OrderedDict(platform) for platform in platforms] out_params = [] logger = logging.getLogger(__name__) for test in tests: # filter platforms plats = [p.copy() for p in platforms] if 'platforms' in test: plats = [ plat for plat in plats if plat['platform'] in test['platforms'] ] if len(plats) < len(platforms): logger.info( 'Platforms ({}) filtered out for test type: {}'.format( ', '.join([ p['platform'] for p in platforms if p not in plats ]), ' - '.join([test['test-type'], test['eval-type']]))) if not len(plats): logger.warn('No platforms found for test {}, skipping...'.format( ' - '.join([test['test-type'], test['eval-type']]))) continue for plookup in plats: clean = plookup.copy() # get default number of cores cores = default_num_cores[:] # get default vector widths widths = plookup['width'] is_wide = widths is not None depths = plookup['depth'] is_deep = depths is not None if is_deep and not is_wide: widths = depths[:] # sanity check if is_wide or is_deep: assert widths is not None # special gpu handling for cores is_gpu = False # test platform type if platform_is_gpu(plookup['platform']): # set cores to 1 is_gpu = True cores = [1] def apply_vectypes(lookup, widths, is_wide=is_wide, is_deep=is_deep): if is_wide or is_deep: # set vec widths use_par = None in widths or (is_wide and is_deep) lookup['vecsize'] = [x for x in widths[:] if x is not None] base = [True] if not use_par else [True, False] if is_wide: lookup['wide'] = base[:] base.pop() if is_deep: lookup['deep'] = base[:] else: lookup['vecsize'] = [None] lookup['wide'] = [False] lookup['deep'] = [False] del lookup['width'] del lookup['depth'] apply_vectypes(plookup, widths) # default is both conp / conv conp = [True, False] order = ['C', 'F'] # loop over possible overrides oploop = OptionLoop( OrderedDict([('ttype', [enum_to_string(test_type)]), ('jtype', jac_types), ('stype', sparse)])) for i, state in enumerate(oploop): ttype = state['ttype'] jtype = state['jtype'] stype = state['stype'] def override_log(key, old, new): logging.info( 'Replacing {} for test type: {}. Old value:' ' ({}), New value: ({})'.format( key, stringify_args( [ttype, test['eval-type'], jtype, stype], joiner='.'), stringify_args(listify(old)), stringify_args(listify(new)))) # copy defaults icores = cores[:] iorder = order[:] iconp = conp[:] ivecsizes = widths[:] if widths is not None else [None] imodels = tuple(models.keys()) # load overides overrides = get_overrides(test, ttype, jtype, stype) # check that we can apply if 'num_cores' in overrides and not can_override_cores: raise InvalidTestEnivironmentException( ttype, 'num_cores', matrix_name, 'num_threads') elif 'num_cores' in overrides and is_gpu: logger = logging.getLogger(__name__) logger.info( 'Discarding unused "num_cores" override for GPU ' 'platform {}'.format(plookup['platform'])) del overrides['num_cores'] # 'num_cores', 'order', 'conp', 'vecsize', 'vectype' # now apply overrides outplat = plookup.copy() for current in overrides: ivectypes_override = None for override in overrides: if override == 'num_cores': override_log('num_cores', icores, overrides[override]) icores = overrides[override] elif override == 'order' and not is_gpu: override_log('order', iorder, overrides[override]) iorder = overrides[override] elif override == 'gpuorder' and is_gpu: override_log('order', iorder, overrides[override]) iorder = overrides[override] elif override == 'conp': iconp_save = iconp[:] iconp = [] if 'conp' in overrides[override]: iconp.append(True) if 'conv' in overrides[override]: iconp.append(False) override_log('conp', iconp_save, iconp) elif override == 'vecsize' and not is_gpu: override_log('vecsize', ivecsizes, overrides[override]) outplat['vecsize'] = listify(overrides[override]) elif override == 'gpuvecsize' and is_gpu: override_log('gpuvecsize', ivecsizes, overrides[override]) outplat['vecsize'] = listify(overrides[override]) elif override == 'vectype' and not is_gpu: # we have to do this at the end ivectypes_override = overrides[override] elif override == 'gpuvectype' and is_gpu: ivectypes_override = overrides[override] elif override == 'models': # check that all models are valid for model in overrides[override]: if model not in imodels: raise InvalidOverrideException( override, model, imodels) # and replace override_log('models', stringify_args(imodels), stringify_args(overrides[override])) imodels = tuple(overrides[override]) if ivectypes_override is not None: c = clean.copy() apply_vectypes(c, outplat['vecsize'], is_wide='wide' in ivectypes_override, is_deep='deep' in ivectypes_override) # and copy into working outplat['wide'] = c['wide'] if 'wide' in c else [False] outplat['deep'] = c['deep'] if 'deep' in c else [False] outplat['vecsize'] = c['vecsize'] old = [''] if is_wide: old += ['wide'] if is_deep: old += ['deep'] elif not is_wide: old += ['par'] override_log('vecsize', old, ivectypes_override) # and finally, convert back to an option loop format out_params.append( [('num_cores', icores), ('order', iorder), ('rate_spec', rate_spec), ('split_kernels', split_kernels), ('conp', iconp), ('sparse', [stype]), ('jac_type', [jtype]), ('models', [imodels])] + [(key, value) for key, value in six.iteritems(outplat)]) max_vec_width = 1 vector_params = [ dict(p)['vecsize'] for p in out_params if 'vecsize' in dict(p) and dict(p)['vecsize'] != [None] ] if vector_params: max_vec_width = max(max_vec_width, max([max(x) for x in vector_params])) from . import reduce_oploop loop = reduce_oploop(out_params) return models, loop, max_vec_width
def __run_and_check(mech, thermo, initial_conditions, build_path, num_threads, num_conditions, test_data, skip_c, skip_cuda, atol, rtol, small_atol, small_rtol, finite_difference, end_time, use_old_validation, small_time_step, minor_species): #first compile and run cvodes to get the baseline __check_exit( create_jacobian(lang='c', mech_name=mech, therm_name=thermo, initial_state=initial_conditions, optimize_cache=False, build_path=build_path)) #verify that the minor species exist and get indicies gas = ct.Solution(mech) minor_species = [(x, gas.species_index(x)) for x in minor_species.split(',') if x] #get num vars nvar = None with open(pjoin(build_path, 'mechanism.h'), 'r') as file: for line in file.readlines(): if 'NN' in line: match = re.search(r'\b(\d+)$', line.strip()) if match: nvar = int(match.group(1)) break if num_conditions is None and test_data is not None: num_conditions = np.fromfile('ign_data.bin').reshape( (-1, nvar + 2)).shape[0] assert nvar is not None arg_list = [ '-j{}'.format(num_threads), 'DEBUG=FALSE', 'FAST_MATH=FALSE', 'LOG_OUTPUT=TRUE', 'LOG_END_ONLY=TRUE', 'SHUFFLE=FALSE', 'PRINT=FALSE', 'CV_HMAX=0', 'CV_MAX_STEPS=-1', 'FINITE_DIFFERENCE={}'.format( 'FALSE' if not finite_difference else 'TRUE'), 'mechanism_dir={}'.format(build_path) ] if initial_conditions: arg_list.append('SAME_IC=TRUE') num_conditions = 1 #they're all the same else: arg_list.append('SAME_IC=FALSE') oploop = None if not skip_c: oploop = OptionLoop({ 'lang': ['c'], 'cache_opt': [False], #, True], 'smem': [False] }) if not skip_cuda: if oploop is not None: oploop += OptionLoop({ 'lang': ['cuda'], 'cache_opt': [False], #, True], 'smem': [False] }) #, True]}) else: oploop = OptionLoop({ 'lang': ['cuda'], 'cache_opt': [False], #, True], 'smem': [False, True] }) if oploop is None: raise Exception('No languages to test specified') builder = {'c': 'cpu', 'cuda': 'gpu'} small_tol = [ 'ATOL={:.0e}'.format(small_atol), 'RTOL={:.0e}'.format(small_rtol) ] large_tol = ['ATOL={:.0e}'.format(atol), 'RTOL={:.0e}'.format(rtol)] #build the validation set for this timestep extra_args = [ 't_step={:.0e}'.format(small_time_step), 't_end={:.0e}'.format(end_time) ] if not use_old_validation: with open('logerr', 'a') as errfile: subprocess.check_call([scons, 'cpu'] + arg_list + extra_args + small_tol, stdout=errfile) #run subprocess.check_call([ pjoin(cwd(), valid_int), str(num_threads), str(num_conditions) ], stdout=errfile) #copy to saved data shutil.copy(pjoin(cwd(), 'log', keyfile), pjoin(cwd(), 'log', 'valid.bin')) force_opt = True validator = np.fromfile(pjoin('log', 'valid.bin'), dtype='float64') validator = validator.reshape((-1, 1 + num_conditions * nvar)) #force constant time steps arg_list += ['CONST_TIME_STEP=TRUE'] with open('logfile', 'a') as file: with open('logerr', 'a') as errfile: for op in oploop: lang = op['lang'] cache_opt = op['cache_opt'] shared_mem = op['smem'] __check_exit( create_jacobian(lang=lang, mech_name=mech, therm_name=thermo, initial_state=initial_conditions, optimize_cache=cache_opt, multi_thread=num_threads, no_shared=not shared_mem, force_optimize=force_opt, build_path=build_path)) if cache_opt: #successful force optimize force_opt = False file.write( '\nlang:{}\n' 'cache_opt: {}\n' 'shared_mem: {}\n'.format(lang, cache_opt, (shared_mem and lang == 'cuda'))) start = int(np.log10(end_time)) for j in range(start, start - 6, -1): t_step = np.power(10.0, j) extra_args = [ 't_step={:.0e}'.format(t_step), 't_end={:.0e}'.format(end_time) ] file.write('t_step={:.0e}\n'.format(t_step)) file.flush() subprocess.check_call([scons, builder[lang]] + arg_list + extra_args + large_tol, stdout=errfile, stderr=errfile) __execute(builder[lang], num_threads, num_conditions, t_step) __check_error(builder[lang], num_conditions, nvar, validator, atol, rtol, minor_species)
def test_read_initial_conditions(self): build_dir = self.store.build_dir obj_dir = self.store.obj_dir lib_dir = self.store.lib_dir setup = test_utils.get_read_ics_source() utils.create_dir(build_dir) utils.create_dir(obj_dir) utils.create_dir(lib_dir) oploop = OptionLoop( OrderedDict([ # no need to test conv ('conp', [True]), ('order', ['C', 'F']), ('depth', [4, None]), ('width', [4, None]), ('lang', ['c']) ])) for state in oploop: if state['depth'] and state['width']: continue self.__cleanup(False) # create dummy loopy opts opts = type('', (object, ), state)() asplit = array_splitter(opts) # get source path = os.path.realpath( os.path.join(self.store.script_dir, os.pardir, 'kernel_utils', 'common', 'read_initial_conditions.c.in')) with open(path, 'r') as file: ric = Template(file.read()) # subs ric = ric.safe_substitute(mechanism='mechanism.h', vectorization='vectorization.h') # write with open(os.path.join(build_dir, 'read_initial_conditions.c'), 'w') as file: file.write(ric) # write header write_aux(build_dir, opts, self.store.specs, self.store.reacs) # write setup with open(os.path.join(build_dir, 'setup.py'), 'w') as file: file.write(setup.safe_substitute(buildpath=build_dir)) # copy read ics header to final dest shutil.copyfile( os.path.join(self.store.script_dir, os.pardir, 'kernel_utils', 'common', 'read_initial_conditions.h'), os.path.join(build_dir, 'read_initial_conditions.h')) # copy wrapper shutil.copyfile( os.path.join(self.store.script_dir, 'test_utils', 'read_ic_wrapper.pyx'), os.path.join(build_dir, 'read_ic_wrapper.pyx')) # setup python_str = 'python{}.{}'.format(sys.version_info[0], sys.version_info[1]) call = [ python_str, os.path.join(build_dir, 'setup.py'), 'build_ext', '--build-lib', lib_dir ] subprocess.check_call(call) # copy in tester shutil.copyfile( os.path.join(self.store.script_dir, 'test_utils', 'ric_tester.py'), os.path.join(lib_dir, 'ric_tester.py')) # For simplicity (and really, lack of need) we test CONP only # hence, the extra variable is the volume, while the fixed parameter # is the pressure # save phi, param in correct order phi = (self.store.phi_cp if opts.conp else self.store.phi_cv) save_phi, = asplit.split_numpy_arrays(phi) save_phi = save_phi.flatten(opts.order) param = self.store.P if opts.conp else self.store.V save_phi.tofile(os.path.join(lib_dir, 'phi_test.npy')) param.tofile(os.path.join(lib_dir, 'param_test.npy')) # save bin file out_file = np.concatenate( ( np.reshape(phi[:, 0], (-1, 1)), # temperature np.reshape(param, (-1, 1)), # param phi[:, 1:]), axis=1 # species ) out_file = out_file.flatten('K') with open(os.path.join(lib_dir, 'data.bin'), 'wb') as file: out_file.tofile(file) # and run subprocess.check_call([ python_str, os.path.join(lib_dir, 'ric_tester.py'), opts.order, str(self.store.test_size) ])
def run(thedir, blacklist=[], force=False, repeats=5, num_cond=131072, threads=[6, 12], langs=['c', 'cuda'], atol=1e-10, rtol=1e-7): jthread = str(multiprocessing.cpu_count()) make_sure_path_exists(os.path.join(thedir, 'output')) try: mechanism = os.path.join(thedir, glob.glob(os.path.join(thedir, '*.cti'))[0]) except: print "Mechanism file not found in {}, skipping...".format(thedir) return home = os.getcwd() same_powers = get_powers(num_cond) diff_powers = get_powers(get_diff_ics_cond(thedir, mechanism)) #copy the datafile shutil.copy(os.path.join(thedir, datafile), os.path.join(home, 'ign_data.bin')) opt_list = [False] smem_list = [True, False] t_list = [1e-6, 1e-4] ics_list = [False] fd_list = [True, False] c_params = OptionLoop( { 'lang': 'c', 'opt': opt_list, 't_step': t_list, 'same_ics': ics_list, 'FD': fd_list }, lambda: False) cuda_params = OptionLoop( { 'lang': 'cuda', 'opt': opt_list, 't_step': t_list, 'smem': smem_list, 'same_ics': ics_list, 'FD': fd_list }, lambda: False) op = c_params + cuda_params pickley = None for state in op: opt = state['opt'] smem = state['smem'] t_step = state['t_step'] same = state['same_ics'] FD = state['FD'] thepow = same_powers if same else diff_powers #custom rules so evaluation doesn't take so damn long if opt: continue #turn on FD for long timestep with H2 for direct comparison if FD and t_step == 1e-4 and 'H2' not in thedir: continue #generate mechanisms if 'c' in langs: mech_dir = 'cpu_{}'.format('co' if opt else 'nco') mech_dir = os.path.join(thedir, mech_dir) + os.path.sep make_sure_path_exists(mech_dir) if opt and pickley is not None: if pickley != os.path.join(mech_dir, 'optimized.pickle'): shutil.copy(pickley, os.path.join(mech_dir, 'optimized.pickle')) create_jacobian(lang='c', mech_name=mechanism, optimize_cache=opt, build_path=mech_dir, multi_thread=int(jthread)) if opt and pickley is None: pickley = os.path.join(mech_dir, 'optimized.pickle') if 'cuda' in langs: gpu_mech_dir = 'gpu_{}_{}'.format('co' if opt else 'nco', 'smem' if smem else 'nosmem') gpu_mech_dir = os.path.join(thedir, gpu_mech_dir) make_sure_path_exists(gpu_mech_dir) if opt and pickley is not None: if pickley != os.path.join(gpu_mech_dir, 'optimized.pickle'): shutil.copy(pickley, os.path.join(gpu_mech_dir, 'optimized.pickle')) create_jacobian(lang='cuda', mech_name=mechanism, optimize_cache=opt, build_path=gpu_mech_dir, no_shared=not smem, multi_thread=int(jthread)) if opt and pickley is None: pickley = os.path.join(gpu_mech_dir, 'optimized.pickle') #now build and run args = [ '-j', jthread, 'DEBUG=False', 'FAST_MATH=False', 'LOG_OUTPUT=False', 'SHUFFLE=False', 'LOG_END_ONLY=False', 'PRINT=False', 't_step={:e}'.format(t_step), 't_end={:e}'.format(t_step), 'DIVERGENCE_WARPS=0', 'CV_HMAX=0', 'CV_MAX_STEPS=-1', 'ATOL={:e}'.format(atol), 'RTOL={:e}'.format(rtol), 'FINITE_DIFFERENCE={}'.format(FD) ] args.append('SAME_IC={}'.format(same)) #run with repeats if 'c' in langs: run_me = get_executables(blacklist + ['gpu'], inverse=['int']) subprocess.check_call([scons, 'cpu'] + args + ['mechanism_dir={}'.format(mech_dir)]) for exe in run_me: for thread in threads: for cond in thepow: if 'exp' in exe and FD: #the exponential integrators are not formulated for FD Jacobians #thus we don't evaluate them continue filename = os.path.join( thedir, 'output', exe + '_{}_{}_{}_{}_{}_{:e}.txt'.format( cond, thread, 'co' if opt else 'nco', 'sameic' if same else 'psric', 'FD' if FD else 'AJ', t_step)) my_repeats = repeats - check_file(filename) with open(filename, 'a') as file: for repeat in range(my_repeats): subprocess.check_call([ os.path.join(home, exe), str(thread), str(cond) ], stdout=file) if 'cuda' in langs: #run with repeats subprocess.check_call([scons, 'gpu'] + args + ['mechanism_dir={}'.format(gpu_mech_dir)]) run_me = get_executables(blacklist, inverse=['int-gpu']) for exe in run_me: for cond in thepow: if 'exp' in exe and FD: #the exponential integrators are not formulated for FD Jacobians #thus we don't evaluate them continue filename = os.path.join( thedir, 'output', exe + '_{}_{}_{}_{}_{}_{:e}.txt'.format( cond, 'co' if opt else 'nco', 'smem' if smem else 'nosmem', 'sameic' if same else 'psric', 'FD' if FD else 'AJ', t_step)) my_repeats = repeats - check_file(filename) with open(filename, 'a') as file: for repeat in range(my_repeats): subprocess.check_call( [os.path.join(home, exe), str(cond)], stdout=file)