Esempio n. 1
0
 def __test_cases():
     for state in OptionLoop(
             OrderedDict([('lang', ['opencl', 'c']),
                          (('jac_type'),
                           ['exact', 'approximate',
                            'finite_difference'])])):
         yield param(state)
Esempio n. 2
0
def loopy_opts(langs=['opencl'],
               width=[4, None],
               depth=[4, None],
               order=['C', 'F']):

    oploop = OptionLoop(
        OrderedDict([('lang', langs), ('width', width), ('depth', depth),
                     ('order', order)]))
    for state in oploop:
        if state['depth'] and state['width']:
            continue
        yield type('', (object, ), state)
Esempio n. 3
0
    def __get_objs(self, lang='opencl', depth=None, width=None, order='C'):
        opts = loopy_options(lang=lang,
                             width=width,
                             depth=depth,
                             ilp=False,
                             unr=None,
                             order=order,
                             platform='CPU')

        oploop = OptionLoop(
            OrderedDict([('conp', [True]), ('shared', [True, False])]))
        return opts, oploop
Esempio n. 4
0
def __test_cases():
    for state in OptionLoop(
            OrderedDict([('lang', ['opencl', 'c']), ('order', ['C', 'F']),
                         ('width', [4, None]), ('depth', [4, None]),
                         ('device_type', (cl.device_type.CPU,
                                          cl.device_type.GPU, None))])):
        if state['depth'] and state['width']:
            continue
        elif (state['depth'] is not None or state['width'] is not None) \
                and state['lang'] == 'c':
            continue
        elif (state['lang'] == 'c' and state['device_type'] is not None):
            continue
        yield param(state)
Esempio n. 5
0
def performance_tester(home, work_dir, use_old_opt, num_threads):
    """Runs performance testing for pyJac, TChem, and finite differences.

    Parameters
    ----------
    home : str
        Directory of source code files
    work_dir : str
        Working directory with mechanisms and for data
    use_old_opt : bool
        If ``True``, use old optimization files found
    num_threads : int
        Number of OpenMP threads to parallelize performance testing

    Returns
    -------
    None

    """
    build_dir = 'out'
    test_dir = 'test'

    work_dir = os.path.abspath(work_dir)

    #find the mechanisms to test
    mechanism_list = {}
    if not os.path.exists(work_dir):
        print('Error: work directory {} for '.format(work_dir) +
              'performance testing not found, exiting...')
        sys.exit(-1)
    for name in os.listdir(work_dir):
        if os.path.isdir(os.path.join(work_dir, name)):
            #check for cti
            files = [
                f for f in os.listdir(os.path.join(work_dir, name))
                if os.path.isfile(os.path.join(work_dir, name, f))
            ]
            for f in files:
                if f.endswith('.cti'):
                    mechanism_list[name] = {}
                    mechanism_list[name]['mech'] = f
                    mechanism_list[name]['chemkin'] = f.replace('.cti', '.dat')
                    gas = ct.Solution(os.path.join(work_dir, name, f))
                    mechanism_list[name]['ns'] = gas.n_species

                    thermo = next((tf for tf in files if 'therm' in tf), None)
                    if thermo is not None:
                        mechanism_list[name]['thermo'] = thermo

    if len(mechanism_list) == 0:
        print('No mechanisms found for performance testing in '
              '{}, exiting...'.format(work_dir))
        sys.exit(-1)

    if os.getenv('TCHEM_HOME'):
        tchem_home = os.getenv('TCHEM_HOME')
    else:
        raise SystemError('TCHEM_HOME environment variable not set.')

    cpu_repeats = 10
    gpu_repeats = 10

    def false_factory():
        return False

    c_params = {
        'lang': 'c',
        'cache_opt': [False, True],
        'finite_diffs': [False, True]
    }
    cuda_params = {
        'lang': 'cuda',
        'cache_opt': [False, True],
        'shared': [False, True],
        'finite_diffs': [False, True]
    }
    tchem_params = {'lang': 'tchem'}

    #set up testing environment
    env = os.environ.copy()
    env['OMP_NUM_THREADS'] = str(1)
    env['MKL_NUM_THREADS'] = str(1)

    for mech_name, mech_info in sorted(mechanism_list.items(),
                                       key=lambda x: x[1]['ns']):
        #get the cantera object
        gas = ct.Solution(os.path.join(work_dir, mech_name, mech_info['mech']))
        pmod = any([is_pdep(rxn) for rxn in gas.reactions()])

        #ensure directory structure is valid
        os.chdir(os.path.join(work_dir, mech_name))
        subprocess.check_call(['mkdir', '-p', build_dir])
        subprocess.check_call(['mkdir', '-p', test_dir])

        num_conditions = 0
        npy_files = [
            f for f in os.listdir(os.path.join(work_dir, mech_name))
            if f.endswith('.npy') and os.path.isfile(f)
        ]
        data = None
        with open('data.bin', 'wb') as file:
            #load PaSR data for different pressures/conditions,
            # and save to binary C file
            for npy in sorted(npy_files):
                state_data = np.load(npy)
                state_data = state_data.reshape(
                    state_data.shape[0] * state_data.shape[1],
                    state_data.shape[2])
                if data is None:
                    data = state_data
                else:
                    data = np.vstack((data, state_data))
                num_conditions += state_data.shape[0]
                print(num_conditions, data.shape)
            if num_conditions == 0:
                print('No data found in folder {}, continuing...'.format(
                    mech_name))
                continue
            data.tofile(file)

        #figure out gpu steps
        step_size = 1
        steplist = []
        while step_size < num_conditions:
            steplist.append(step_size)
            step_size *= 2
        if step_size / 2 != num_conditions:
            steplist.append(num_conditions)

        the_path = os.getcwd()
        first_run = True
        op = OptionLoop(c_params, false_factory)
        op = op + OptionLoop(cuda_params, false_factory)
        op = op + OptionLoop(tchem_params, false_factory)

        haveOpt = False
        if os.path.isfile(
                os.path.join(os.getcwd(), build_dir, 'optimized.pickle')):
            haveOpt = True

        for state in op:
            lang = state['lang']
            temp_lang = 'c' if lang != 'cuda' else 'cuda'
            FD = state['finite_diffs']
            if FD:
                filename = 'fd_jacob{}'.format(utils.file_ext[temp_lang])
                shutil.copy(os.path.join(home, filename),
                            os.path.join(build_dir, filename))

            opt = state['cache_opt']
            smem = state['shared']

            if any([
                    isinstance(rxn, ct.PlogReaction)
                    or isinstance(rxn, ct.ChebyshevReaction)
                    for rxn in gas.reactions()
            ]) and lang == 'tchem':
                print('TChem performance evaluation disabled; '
                      'not compatible with Plog or Chebyshev reactions.')
                continue

            data_output = ('{}_{}_{}_{}'.format(
                lang, 'co' if opt else 'nco', 'smem' if smem else 'nosmem',
                'fd' if FD else 'ajac') + '_output.txt')

            data_output = os.path.join(the_path, data_output)
            if lang != 'cuda':
                repeats = cpu_repeats
                num_completed = check_file(data_output)
                todo = {num_conditions: repeats - num_completed}
            else:
                repeats = gpu_repeats
                todo = check_step_file(data_output, steplist)
                for x in todo:
                    todo[x] = repeats - todo[x]
            if not any(todo[x] > 0 for x in todo):
                continue

            if opt and haveOpt and not use_old_opt:
                raise Exception('Previous optimization file found... exiting')

            if lang != 'tchem':
                create_jacobian(lang,
                                mech_info['mech'],
                                optimize_cache=opt,
                                build_path=build_dir,
                                no_shared=not smem,
                                num_blocks=8,
                                num_threads=64,
                                multi_thread=multiprocessing.cpu_count())

            #now we need to write the reader
            filename = ('read_initial_conditions'
                        '{}'.format(utils.file_ext[temp_lang]))
            shutil.copy(os.path.join(home, filename),
                        os.path.join(os.getcwd(), build_dir, filename))

            #write the tester
            file_data = {'datafile': os.path.join(the_path, 'data.bin')}
            if lang == 'c' or lang == 'cuda':
                filename = 'tester{}.in'.format(utils.file_ext[temp_lang])
                with open(os.path.join(home, filename), 'r') as file:
                    src = Template(file.read())
                src = src.substitute(file_data)
            else:
                file_data['mechfile'] = mech_info['chemkin']
                if 'thermo' in mech_info:
                    file_data['thermofile'] = mech_info['thermo']
                else:
                    #it's the same file
                    file_data['thermofile'] = mech_info['chemkin']
                with open(os.path.join(home, 'tc_tester.c.in'), 'r') as file:
                    src = Template(file.read())
                src = src.substitute(file_data)
            filename = 'test{}'.format(utils.file_ext[temp_lang])
            with open(os.path.join(build_dir, filename), 'w') as file:
                file.write(src)

            #copy timer
            shutil.copy(os.path.join(home, 'timer.h'),
                        os.path.join(os.getcwd(), build_dir, 'timer.h'))

            #get file lists
            i_dirs = [build_dir]
            files = ['test', 'read_initial_conditions']

            lib = None
            #now build the library
            if lang != 'tchem':
                lib = generate_library(lang,
                                       build_dir,
                                       test_dir,
                                       finite_difference=FD,
                                       shared=not STATIC)

                lib = os.path.normpath(lib)
                lib = (
                    lib[lib.index('lib') +
                        len('lib'):lib.index('.so' if not STATIC else '.a')])
            else:
                files += ['mechanism', 'mass_mole']

            # Compile generated source code
            structs = [
                file_struct(lang, temp_lang, f, i_dirs,
                            (['-DFINITE_DIFF'] if FD else []), build_dir,
                            test_dir, not STATIC) for f in files
            ]
            if lang != 'cuda':
                for s in structs:
                    s.args.append('-fopenmp')

            pool = multiprocessing.Pool()
            results = pool.map(compiler, structs)
            pool.close()
            pool.join()
            if any(r == -1 for r in results):
                sys.exit(-1)

            linker(lang, temp_lang, test_dir, files, lib)

            if lang == 'tchem':
                #copy periodic table and mechanisms in
                shutil.copy(
                    os.path.join(tchem_home, 'data', 'periodictable.dat'),
                    'periodictable.dat')

            with open(data_output, 'a+') as file:
                for stepsize in todo:
                    for i in range(todo[stepsize]):
                        print(i, "/", todo[stepsize])
                        subprocess.check_call([
                            os.path.join(the_path, test_dir, 'speedtest'),
                            str(stepsize),
                            str(num_threads)
                        ],
                                              stdout=file,
                                              env=env)
Esempio n. 6
0
                                              (Pr + 1)) * ci
        elif fall_type == 'chem':
            dci = (-Pr * Theta_Pr / (Pr + 1) + Theta_Fi - theta_Pr /
                   (Pr + 1)) * ci
    elif var == 'P':
        if fall_type == 'fall':
            dci = Fi * theta_Pr / (Pr + 1) + \
                (Theta_Fi - theta_Pr / (Pr + 1)) * ci
        elif fall_type == 'chem':
            dci = (Theta_Fi - theta_Pr / (Pr + 1)) * ci
    return Xi, dci


oploop = OptionLoop({
    'fall_type': ['chem', 'fall'],
    'blend_type': ['lind', 'troe', 'sri'],
    'pr_type': ['mix', 'unity', 'spec'],
    'var': ['T', 'nj', 'V', 'P']
})

for i, state in enumerate(oploop):
    term_dict = {}
    Xi, dci = __get_dci(**state)

    def __rec_lims(term):
        def __separate(args):
            has = []
            hasnt = []
            for a in args:
                (has if a.has(Xi) else hasnt).append(a)
            return has, hasnt
Esempio n. 7
0
def get_test_matrix(work_dir,
                    test_type,
                    test_matrix,
                    for_validation,
                    raise_on_missing=True,
                    langs=get_test_langs()):
    """Runs a set of mechanisms and an ordered dictionary for
    performance and functional testing

    Parameters
    ----------
    work_dir : str
        Working directory with mechanisms and for data
    test_type: :class:`build_type.jacobian`
        Controls some testing options (e.g., whether to do a sparse matrix or not)
    test_matrix: str
        The test matrix file to load
    for_validation: bool
        If determines which test type to load from the test matrix,
        validation or performance
    raise_on_missing: bool
        Raise an exception of the specified :param:`test_matrix` file is not found
    langs: list of str
        The allowed languages, modifiable by the :envvar:`TEST_LANGS` or test_langs
        in :file:`test_setup.py`
    Returns
    -------
    mechanisms : dict
        A dictionary indicating which mechanism are available for testing,
        The structure is as follows:
            mech_name : {'mech' : file path to the Cantera mechanism
                         'ns' : number of species in the mechanism
                         'limits' : {'full': XXX, 'sparse': XXX}}: a dictionary of
                            limits on the number of conditions that can be evaluated
                            for this mechanism (full & sparse jacobian respectively)
                            due to memory constraints
    params  : OrderedDict
        The parameters to put in an oploop
    max_vec_width : int
        The maximum vector width to test

    """
    work_dir = abspath(work_dir)

    # validate the test matrix
    matrix_name = test_matrix
    test_matrix = build_and_validate('test_matrix_schema.yaml', test_matrix)

    # check that we have the working directory
    if not exists(work_dir):
        raise Exception('Work directory {} for '.format(work_dir) +
                        'testing not found, exiting...')

    # load the models
    models = load_models(work_dir, test_matrix)
    assert isinstance(test_type, build_type)

    # load tests
    tests = load_tests(test_matrix, matrix_name)
    # filter those that match the test type
    valid_str = 'validation' if for_validation else 'performance'
    tests = [test for test in tests if test['test-type'] == valid_str]
    tests = [
        test for test in tests
        if test['eval-type'] == enum_to_string(test_type)
        or test['eval-type'] == 'both'
    ]
    # and dictify
    tests = [OrderedDict(test) for test in tests]
    if not tests:
        raise Exception('No tests found in matrix {} for {} test of {}, '
                        'exiting...'.format(matrix_name, valid_str,
                                            enum_to_string(test_type)))

    # get defaults we haven't migrated to schema yet
    rate_spec = ['fixed', 'hybrid'] if test_type != build_type.jacobian \
        else ['fixed']
    sparse = ([
        enum_to_string(JacobianFormat.sparse),
        enum_to_string(JacobianFormat.full)
    ] if test_type == build_type.jacobian else
              [enum_to_string(JacobianFormat.full)])
    jac_types = [
        enum_to_string(JacobianType.exact),
        enum_to_string(JacobianType.finite_difference)
    ] if (test_type == build_type.jacobian
          and not for_validation) else [enum_to_string(JacobianType.exact)]
    split_kernels = [False]

    # and default # of cores, this may be overriden
    default_num_cores, can_override_cores = num_cores_default()

    # load platforms
    platforms = load_platforms(test_matrix,
                               langs=langs,
                               raise_on_empty=raise_on_missing)
    platforms = [OrderedDict(platform) for platform in platforms]
    out_params = []
    logger = logging.getLogger(__name__)
    for test in tests:
        # filter platforms
        plats = [p.copy() for p in platforms]
        if 'platforms' in test:
            plats = [
                plat for plat in plats if plat['platform'] in test['platforms']
            ]
            if len(plats) < len(platforms):
                logger.info(
                    'Platforms ({}) filtered out for test type: {}'.format(
                        ', '.join([
                            p['platform'] for p in platforms if p not in plats
                        ]), ' - '.join([test['test-type'],
                                        test['eval-type']])))
        if not len(plats):
            logger.warn('No platforms found for test {}, skipping...'.format(
                ' - '.join([test['test-type'], test['eval-type']])))
            continue

        for plookup in plats:
            clean = plookup.copy()
            # get default number of cores
            cores = default_num_cores[:]
            # get default vector widths
            widths = plookup['width']
            is_wide = widths is not None
            depths = plookup['depth']
            is_deep = depths is not None
            if is_deep and not is_wide:
                widths = depths[:]
            # sanity check
            if is_wide or is_deep:
                assert widths is not None
            # special gpu handling for cores
            is_gpu = False
            # test platform type
            if platform_is_gpu(plookup['platform']):
                # set cores to 1
                is_gpu = True
                cores = [1]

            def apply_vectypes(lookup,
                               widths,
                               is_wide=is_wide,
                               is_deep=is_deep):
                if is_wide or is_deep:
                    # set vec widths
                    use_par = None in widths or (is_wide and is_deep)
                    lookup['vecsize'] = [x for x in widths[:] if x is not None]
                    base = [True] if not use_par else [True, False]
                    if is_wide:
                        lookup['wide'] = base[:]
                        base.pop()
                    if is_deep:
                        lookup['deep'] = base[:]
                else:
                    lookup['vecsize'] = [None]
                    lookup['wide'] = [False]
                    lookup['deep'] = [False]
                del lookup['width']
                del lookup['depth']

            apply_vectypes(plookup, widths)

            # default is both conp / conv
            conp = [True, False]
            order = ['C', 'F']

            # loop over possible overrides
            oploop = OptionLoop(
                OrderedDict([('ttype', [enum_to_string(test_type)]),
                             ('jtype', jac_types), ('stype', sparse)]))
            for i, state in enumerate(oploop):
                ttype = state['ttype']
                jtype = state['jtype']
                stype = state['stype']

                def override_log(key, old, new):
                    logging.info(
                        'Replacing {} for test type: {}. Old value:'
                        ' ({}), New value: ({})'.format(
                            key,
                            stringify_args(
                                [ttype, test['eval-type'], jtype, stype],
                                joiner='.'), stringify_args(listify(old)),
                            stringify_args(listify(new))))

                # copy defaults
                icores = cores[:]
                iorder = order[:]
                iconp = conp[:]
                ivecsizes = widths[:] if widths is not None else [None]
                imodels = tuple(models.keys())
                # load overides
                overrides = get_overrides(test, ttype, jtype, stype)

                # check that we can apply
                if 'num_cores' in overrides and not can_override_cores:
                    raise InvalidTestEnivironmentException(
                        ttype, 'num_cores', matrix_name, 'num_threads')
                elif 'num_cores' in overrides and is_gpu:
                    logger = logging.getLogger(__name__)
                    logger.info(
                        'Discarding unused "num_cores" override for GPU '
                        'platform {}'.format(plookup['platform']))
                    del overrides['num_cores']

                # 'num_cores', 'order', 'conp', 'vecsize', 'vectype'
                # now apply overrides
                outplat = plookup.copy()
                for current in overrides:
                    ivectypes_override = None
                    for override in overrides:
                        if override == 'num_cores':
                            override_log('num_cores', icores,
                                         overrides[override])
                            icores = overrides[override]
                        elif override == 'order' and not is_gpu:
                            override_log('order', iorder, overrides[override])
                            iorder = overrides[override]
                        elif override == 'gpuorder' and is_gpu:
                            override_log('order', iorder, overrides[override])
                            iorder = overrides[override]
                        elif override == 'conp':
                            iconp_save = iconp[:]
                            iconp = []
                            if 'conp' in overrides[override]:
                                iconp.append(True)
                            if 'conv' in overrides[override]:
                                iconp.append(False)
                            override_log('conp', iconp_save, iconp)
                        elif override == 'vecsize' and not is_gpu:
                            override_log('vecsize', ivecsizes,
                                         overrides[override])
                            outplat['vecsize'] = listify(overrides[override])
                        elif override == 'gpuvecsize' and is_gpu:
                            override_log('gpuvecsize', ivecsizes,
                                         overrides[override])
                            outplat['vecsize'] = listify(overrides[override])
                        elif override == 'vectype' and not is_gpu:
                            # we have to do this at the end
                            ivectypes_override = overrides[override]
                        elif override == 'gpuvectype' and is_gpu:
                            ivectypes_override = overrides[override]
                        elif override == 'models':
                            # check that all models are valid
                            for model in overrides[override]:
                                if model not in imodels:
                                    raise InvalidOverrideException(
                                        override, model, imodels)
                            # and replace
                            override_log('models', stringify_args(imodels),
                                         stringify_args(overrides[override]))
                            imodels = tuple(overrides[override])

                    if ivectypes_override is not None:
                        c = clean.copy()
                        apply_vectypes(c,
                                       outplat['vecsize'],
                                       is_wide='wide' in ivectypes_override,
                                       is_deep='deep' in ivectypes_override)
                        # and copy into working
                        outplat['wide'] = c['wide'] if 'wide' in c else [False]
                        outplat['deep'] = c['deep'] if 'deep' in c else [False]
                        outplat['vecsize'] = c['vecsize']
                        old = ['']
                        if is_wide:
                            old += ['wide']
                        if is_deep:
                            old += ['deep']
                        elif not is_wide:
                            old += ['par']
                        override_log('vecsize', old, ivectypes_override)

                # and finally, convert back to an option loop format
                out_params.append(
                    [('num_cores', icores), ('order',
                                             iorder), ('rate_spec', rate_spec),
                     ('split_kernels', split_kernels), ('conp', iconp),
                     ('sparse', [stype]), ('jac_type',
                                           [jtype]), ('models', [imodels])] +
                    [(key, value) for key, value in six.iteritems(outplat)])

    max_vec_width = 1
    vector_params = [
        dict(p)['vecsize'] for p in out_params
        if 'vecsize' in dict(p) and dict(p)['vecsize'] != [None]
    ]
    if vector_params:
        max_vec_width = max(max_vec_width,
                            max([max(x) for x in vector_params]))
    from . import reduce_oploop
    loop = reduce_oploop(out_params)
    return models, loop, max_vec_width
Esempio n. 8
0
def __run_and_check(mech, thermo, initial_conditions, build_path, num_threads,
                    num_conditions, test_data, skip_c, skip_cuda, atol, rtol,
                    small_atol, small_rtol, finite_difference, end_time,
                    use_old_validation, small_time_step, minor_species):
    #first compile and run cvodes to get the baseline
    __check_exit(
        create_jacobian(lang='c',
                        mech_name=mech,
                        therm_name=thermo,
                        initial_state=initial_conditions,
                        optimize_cache=False,
                        build_path=build_path))
    #verify that the minor species exist and get indicies
    gas = ct.Solution(mech)
    minor_species = [(x, gas.species_index(x))
                     for x in minor_species.split(',') if x]
    #get num vars
    nvar = None
    with open(pjoin(build_path, 'mechanism.h'), 'r') as file:
        for line in file.readlines():
            if 'NN' in line:
                match = re.search(r'\b(\d+)$', line.strip())
                if match:
                    nvar = int(match.group(1))
                    break
    if num_conditions is None and test_data is not None:
        num_conditions = np.fromfile('ign_data.bin').reshape(
            (-1, nvar + 2)).shape[0]
    assert nvar is not None
    arg_list = [
        '-j{}'.format(num_threads), 'DEBUG=FALSE', 'FAST_MATH=FALSE',
        'LOG_OUTPUT=TRUE', 'LOG_END_ONLY=TRUE', 'SHUFFLE=FALSE', 'PRINT=FALSE',
        'CV_HMAX=0', 'CV_MAX_STEPS=-1', 'FINITE_DIFFERENCE={}'.format(
            'FALSE' if not finite_difference else 'TRUE'),
        'mechanism_dir={}'.format(build_path)
    ]

    if initial_conditions:
        arg_list.append('SAME_IC=TRUE')
        num_conditions = 1  #they're all the same
    else:
        arg_list.append('SAME_IC=FALSE')

    oploop = None
    if not skip_c:
        oploop = OptionLoop({
            'lang': ['c'],
            'cache_opt': [False],  #, True],
            'smem': [False]
        })
    if not skip_cuda:
        if oploop is not None:
            oploop += OptionLoop({
                'lang': ['cuda'],
                'cache_opt': [False],  #, True],
                'smem': [False]
            })  #, True]})
        else:
            oploop = OptionLoop({
                'lang': ['cuda'],
                'cache_opt': [False],  #, True],
                'smem': [False, True]
            })
    if oploop is None:
        raise Exception('No languages to test specified')
    builder = {'c': 'cpu', 'cuda': 'gpu'}
    small_tol = [
        'ATOL={:.0e}'.format(small_atol), 'RTOL={:.0e}'.format(small_rtol)
    ]
    large_tol = ['ATOL={:.0e}'.format(atol), 'RTOL={:.0e}'.format(rtol)]
    #build the validation set for this timestep
    extra_args = [
        't_step={:.0e}'.format(small_time_step),
        't_end={:.0e}'.format(end_time)
    ]

    if not use_old_validation:
        with open('logerr', 'a') as errfile:
            subprocess.check_call([scons, 'cpu'] + arg_list + extra_args +
                                  small_tol,
                                  stdout=errfile)
            #run
            subprocess.check_call([
                pjoin(cwd(), valid_int),
                str(num_threads),
                str(num_conditions)
            ],
                                  stdout=errfile)
        #copy to saved data
        shutil.copy(pjoin(cwd(), 'log', keyfile),
                    pjoin(cwd(), 'log', 'valid.bin'))

    force_opt = True
    validator = np.fromfile(pjoin('log', 'valid.bin'), dtype='float64')
    validator = validator.reshape((-1, 1 + num_conditions * nvar))

    #force constant time steps
    arg_list += ['CONST_TIME_STEP=TRUE']
    with open('logfile', 'a') as file:
        with open('logerr', 'a') as errfile:
            for op in oploop:
                lang = op['lang']
                cache_opt = op['cache_opt']
                shared_mem = op['smem']
                __check_exit(
                    create_jacobian(lang=lang,
                                    mech_name=mech,
                                    therm_name=thermo,
                                    initial_state=initial_conditions,
                                    optimize_cache=cache_opt,
                                    multi_thread=num_threads,
                                    no_shared=not shared_mem,
                                    force_optimize=force_opt,
                                    build_path=build_path))
                if cache_opt:
                    #successful force optimize
                    force_opt = False
                file.write(
                    '\nlang:{}\n'
                    'cache_opt: {}\n'
                    'shared_mem: {}\n'.format(lang, cache_opt,
                                              (shared_mem and lang == 'cuda')))
                start = int(np.log10(end_time))
                for j in range(start, start - 6, -1):
                    t_step = np.power(10.0, j)
                    extra_args = [
                        't_step={:.0e}'.format(t_step),
                        't_end={:.0e}'.format(end_time)
                    ]
                    file.write('t_step={:.0e}\n'.format(t_step))
                    file.flush()
                    subprocess.check_call([scons, builder[lang]] + arg_list +
                                          extra_args + large_tol,
                                          stdout=errfile,
                                          stderr=errfile)
                    __execute(builder[lang], num_threads, num_conditions,
                              t_step)
                    __check_error(builder[lang], num_conditions, nvar,
                                  validator, atol, rtol, minor_species)
Esempio n. 9
0
    def test_read_initial_conditions(self):
        build_dir = self.store.build_dir
        obj_dir = self.store.obj_dir
        lib_dir = self.store.lib_dir
        setup = test_utils.get_read_ics_source()
        utils.create_dir(build_dir)
        utils.create_dir(obj_dir)
        utils.create_dir(lib_dir)
        oploop = OptionLoop(
            OrderedDict([
                # no need to test conv
                ('conp', [True]),
                ('order', ['C', 'F']),
                ('depth', [4, None]),
                ('width', [4, None]),
                ('lang', ['c'])
            ]))
        for state in oploop:
            if state['depth'] and state['width']:
                continue
            self.__cleanup(False)
            # create dummy loopy opts
            opts = type('', (object, ), state)()
            asplit = array_splitter(opts)

            # get source
            path = os.path.realpath(
                os.path.join(self.store.script_dir, os.pardir, 'kernel_utils',
                             'common', 'read_initial_conditions.c.in'))

            with open(path, 'r') as file:
                ric = Template(file.read())
            # subs
            ric = ric.safe_substitute(mechanism='mechanism.h',
                                      vectorization='vectorization.h')
            # write
            with open(os.path.join(build_dir, 'read_initial_conditions.c'),
                      'w') as file:
                file.write(ric)
            # write header
            write_aux(build_dir, opts, self.store.specs, self.store.reacs)
            # write setup
            with open(os.path.join(build_dir, 'setup.py'), 'w') as file:
                file.write(setup.safe_substitute(buildpath=build_dir))
            # copy read ics header to final dest
            shutil.copyfile(
                os.path.join(self.store.script_dir, os.pardir, 'kernel_utils',
                             'common', 'read_initial_conditions.h'),
                os.path.join(build_dir, 'read_initial_conditions.h'))
            # copy wrapper
            shutil.copyfile(
                os.path.join(self.store.script_dir, 'test_utils',
                             'read_ic_wrapper.pyx'),
                os.path.join(build_dir, 'read_ic_wrapper.pyx'))
            # setup
            python_str = 'python{}.{}'.format(sys.version_info[0],
                                              sys.version_info[1])
            call = [
                python_str,
                os.path.join(build_dir, 'setup.py'), 'build_ext',
                '--build-lib', lib_dir
            ]
            subprocess.check_call(call)
            # copy in tester
            shutil.copyfile(
                os.path.join(self.store.script_dir, 'test_utils',
                             'ric_tester.py'),
                os.path.join(lib_dir, 'ric_tester.py'))

            # For simplicity (and really, lack of need) we test CONP only
            # hence, the extra variable is the volume, while the fixed parameter
            # is the pressure

            # save phi, param in correct order
            phi = (self.store.phi_cp if opts.conp else self.store.phi_cv)
            save_phi, = asplit.split_numpy_arrays(phi)
            save_phi = save_phi.flatten(opts.order)
            param = self.store.P if opts.conp else self.store.V
            save_phi.tofile(os.path.join(lib_dir, 'phi_test.npy'))
            param.tofile(os.path.join(lib_dir, 'param_test.npy'))

            # save bin file
            out_file = np.concatenate(
                (
                    np.reshape(phi[:, 0], (-1, 1)),  # temperature
                    np.reshape(param, (-1, 1)),  # param
                    phi[:, 1:]),
                axis=1  # species
            )
            out_file = out_file.flatten('K')
            with open(os.path.join(lib_dir, 'data.bin'), 'wb') as file:
                out_file.tofile(file)

            # and run
            subprocess.check_call([
                python_str,
                os.path.join(lib_dir, 'ric_tester.py'), opts.order,
                str(self.store.test_size)
            ])
Esempio n. 10
0
def run(thedir,
        blacklist=[],
        force=False,
        repeats=5,
        num_cond=131072,
        threads=[6, 12],
        langs=['c', 'cuda'],
        atol=1e-10,
        rtol=1e-7):
    jthread = str(multiprocessing.cpu_count())

    make_sure_path_exists(os.path.join(thedir, 'output'))

    try:
        mechanism = os.path.join(thedir,
                                 glob.glob(os.path.join(thedir, '*.cti'))[0])
    except:
        print "Mechanism file not found in {}, skipping...".format(thedir)
        return

    home = os.getcwd()
    same_powers = get_powers(num_cond)
    diff_powers = get_powers(get_diff_ics_cond(thedir, mechanism))

    #copy the datafile
    shutil.copy(os.path.join(thedir, datafile),
                os.path.join(home, 'ign_data.bin'))

    opt_list = [False]
    smem_list = [True, False]
    t_list = [1e-6, 1e-4]
    ics_list = [False]
    fd_list = [True, False]

    c_params = OptionLoop(
        {
            'lang': 'c',
            'opt': opt_list,
            't_step': t_list,
            'same_ics': ics_list,
            'FD': fd_list
        }, lambda: False)
    cuda_params = OptionLoop(
        {
            'lang': 'cuda',
            'opt': opt_list,
            't_step': t_list,
            'smem': smem_list,
            'same_ics': ics_list,
            'FD': fd_list
        }, lambda: False)
    op = c_params + cuda_params
    pickley = None
    for state in op:
        opt = state['opt']
        smem = state['smem']
        t_step = state['t_step']
        same = state['same_ics']
        FD = state['FD']
        thepow = same_powers if same else diff_powers

        #custom rules so evaluation doesn't take so damn long
        if opt:
            continue
        #turn on FD for long timestep with H2 for direct comparison
        if FD and t_step == 1e-4 and 'H2' not in thedir:
            continue

        #generate mechanisms
        if 'c' in langs:
            mech_dir = 'cpu_{}'.format('co' if opt else 'nco')
            mech_dir = os.path.join(thedir, mech_dir) + os.path.sep
            make_sure_path_exists(mech_dir)
            if opt and pickley is not None:
                if pickley != os.path.join(mech_dir, 'optimized.pickle'):
                    shutil.copy(pickley,
                                os.path.join(mech_dir, 'optimized.pickle'))
            create_jacobian(lang='c',
                            mech_name=mechanism,
                            optimize_cache=opt,
                            build_path=mech_dir,
                            multi_thread=int(jthread))
            if opt and pickley is None:
                pickley = os.path.join(mech_dir, 'optimized.pickle')

        if 'cuda' in langs:
            gpu_mech_dir = 'gpu_{}_{}'.format('co' if opt else 'nco',
                                              'smem' if smem else 'nosmem')
            gpu_mech_dir = os.path.join(thedir, gpu_mech_dir)
            make_sure_path_exists(gpu_mech_dir)
            if opt and pickley is not None:
                if pickley != os.path.join(gpu_mech_dir, 'optimized.pickle'):
                    shutil.copy(pickley,
                                os.path.join(gpu_mech_dir, 'optimized.pickle'))
            create_jacobian(lang='cuda',
                            mech_name=mechanism,
                            optimize_cache=opt,
                            build_path=gpu_mech_dir,
                            no_shared=not smem,
                            multi_thread=int(jthread))
            if opt and pickley is None:
                pickley = os.path.join(gpu_mech_dir, 'optimized.pickle')

        #now build and run
        args = [
            '-j', jthread, 'DEBUG=False', 'FAST_MATH=False',
            'LOG_OUTPUT=False', 'SHUFFLE=False', 'LOG_END_ONLY=False',
            'PRINT=False', 't_step={:e}'.format(t_step),
            't_end={:e}'.format(t_step), 'DIVERGENCE_WARPS=0', 'CV_HMAX=0',
            'CV_MAX_STEPS=-1', 'ATOL={:e}'.format(atol),
            'RTOL={:e}'.format(rtol), 'FINITE_DIFFERENCE={}'.format(FD)
        ]
        args.append('SAME_IC={}'.format(same))

        #run with repeats
        if 'c' in langs:
            run_me = get_executables(blacklist + ['gpu'], inverse=['int'])
            subprocess.check_call([scons, 'cpu'] + args +
                                  ['mechanism_dir={}'.format(mech_dir)])
            for exe in run_me:
                for thread in threads:
                    for cond in thepow:
                        if 'exp' in exe and FD:
                            #the exponential integrators are not formulated for FD Jacobians
                            #thus we don't evaluate them
                            continue
                        filename = os.path.join(
                            thedir, 'output',
                            exe + '_{}_{}_{}_{}_{}_{:e}.txt'.format(
                                cond, thread, 'co' if opt else 'nco',
                                'sameic' if same else 'psric',
                                'FD' if FD else 'AJ', t_step))
                        my_repeats = repeats - check_file(filename)
                        with open(filename, 'a') as file:
                            for repeat in range(my_repeats):
                                subprocess.check_call([
                                    os.path.join(home, exe),
                                    str(thread),
                                    str(cond)
                                ],
                                                      stdout=file)

        if 'cuda' in langs:
            #run with repeats
            subprocess.check_call([scons, 'gpu'] + args +
                                  ['mechanism_dir={}'.format(gpu_mech_dir)])
            run_me = get_executables(blacklist, inverse=['int-gpu'])
            for exe in run_me:
                for cond in thepow:
                    if 'exp' in exe and FD:
                        #the exponential integrators are not formulated for FD Jacobians
                        #thus we don't evaluate them
                        continue
                    filename = os.path.join(
                        thedir, 'output',
                        exe + '_{}_{}_{}_{}_{}_{:e}.txt'.format(
                            cond, 'co' if opt else 'nco', 'smem' if smem else
                            'nosmem', 'sameic' if same else 'psric',
                            'FD' if FD else 'AJ', t_step))
                    my_repeats = repeats - check_file(filename)
                    with open(filename, 'a') as file:
                        for repeat in range(my_repeats):
                            subprocess.check_call(
                                [os.path.join(home, exe),
                                 str(cond)],
                                stdout=file)