Example #1
0
def main(srcFilesAndDirs=sys.argv[1:]):

    # declare constants

    ninit = 0
    nexit = 0
    npart = 0
    nhdf5 = 0
    nconsts = 0
    nkernels = 0
    consts = []
    kernels = []
    sets = []
    kernels_in_files = []
    macro_defs = {}

    OP_ID = 1
    OP_GBL = 2
    OP_MAP = 3

    OP_READ = 1
    OP_WRITE = 2
    OP_RW = 3
    OP_INC = 4
    OP_MAX = 5
    OP_MIN = 6

    auto_soa = os.getenv('OP_AUTO_SOA', '0')

    OP_accs_labels = [
        'OP_READ', 'OP_WRITE', 'OP_RW', 'OP_INC', 'OP_MAX', 'OP_MIN'
    ]

    src_files = [s for s in srcFilesAndDirs if os.path.isfile(s)]
    src_dirs = [d for d in srcFilesAndDirs if os.path.isdir(d)]

    ## Extract macro definitions:
    for src_file in src_files:
        print("Parsing file '" + src_file + "' for macro definitions.")
        with open(src_file, 'r') as f:
            text = f.read()

        local_defs = op_parse_macro_defs(text)
        for k in local_defs.keys():
            if (k in macro_defs) and (local_defs[k] != macro_defs[k]):
                msg = "WARNING: Have found two different definitions for macro '{}': '{}' and '{}'. Using the first definition.".format(
                    k, macro_defs[k], local_defs[k])
                print(msg)
                continue
            else:
                macro_defs[k] = local_defs[k]
    self_evaluate_macro_defs(macro_defs)

    ## Identify global variables already declared as 'extern':
    declared_globals = []
    for src_file in src_files:
        with open(src_file, 'r') as f:
            text = f.read()
        declared_globals += extract_declared_globals(text)

    ## Loop over all input source files to search for op_par_loop calls
    kernels_in_files = [[] for _ in range(len(srcFilesAndDirs))]
    src_file_num = -1
    for src_file in src_files:
        src_file_num = src_file_num + 1
        print("Processing file " + str(src_file_num+1) + " of " + str(len(src_files)) + \
              ": " + src_file)
        with open(src_file, 'r') as f:
            text = f.read()

        any_soa = 0

        # check for op_init/op_exit/op_partition/op_hdf5 calls
        inits, exits, parts, hdf5s = op_parse_calls(text)

        if inits + exits + parts + hdf5s > 0:
            print ' '
        if inits > 0:
            print 'contains op_init call'
            if auto_soa <> '0':
                text = append_init_soa(text)
        if exits > 0:
            print 'contains op_exit call'
        if parts > 0:
            print 'contains op_partition call'
        if hdf5s > 0:
            print 'contains op_hdf5 calls'

        ninit = ninit + inits
        nexit = nexit + exits
        npart = npart + parts
        nhdf5 = nhdf5 + hdf5s

        # parse and process constants

        const_args = op_decl_const_parse(text)
        set_list = op_decl_set_parse(text)
        for i in range(0, len(set_list)):
            sets.append(set_list[i])

        # cleanup '&' symbols from name and convert dim to integer
        for i in range(0, len(const_args)):
            const_args[i]['dim'] = evaluate_macro_defs_in_string(
                macro_defs, const_args[i]['dim'])

            if const_args[i]['name'][0] == '&':
                const_args[i]['name'] = const_args[i]['name'][1:]
                const_args[i]['dim'] = int(const_args[i]['dim'])

        # check for repeats
        nconsts = 0
        for i in range(0, len(const_args)):
            repeat = 0
            name = const_args[i]['name']
            for c in range(0, nconsts):
                if const_args[i]['name'] == consts[c]['name']:
                    repeat = 1
                    if const_args[i]['type'] != consts[c]['type']:
                        print 'type mismatch in repeated op_decl_const'
                    if const_args[i]['dim'] != consts[c]['dim']:
                        print 'size mismatch in repeated op_decl_const'

            if repeat > 0:
                print 'repeated global constant ' + const_args[i]['name']
            else:
                print '\nglobal constant (' + const_args[i]['name'].strip() \
                    + ') of size ' + str(const_args[i]['dim'])

            # store away in master list
            if repeat == 0:
                nconsts = nconsts + 1
                temp = {
                    'dim': const_args[i]['dim'],
                    'type': const_args[i]['type'].strip(),
                    'name': const_args[i]['name'].strip()
                }
                temp["user_declared"] = temp["name"] in declared_globals
                consts.append(temp)

        # parse and process op_par_loop calls

        loop_args = op_par_loop_parse(text)
        for i in range(0, len(loop_args)):
            name = loop_args[i]['name1']
            nargs = loop_args[i]['nargs']
            print '\nprocessing kernel ' + name + ' with ' + str(
                nargs) + ' arguments',

            # process arguments

            var = [''] * nargs
            idxs = [0] * nargs
            dims = [''] * nargs
            maps = [0] * nargs
            mapnames = [''] * nargs
            typs = [''] * nargs
            accs = [0] * nargs
            soaflags = [0] * nargs
            optflags = [0] * nargs
            any_opt = 0

            for m in range(0, nargs):
                argm = loop_args[i]['args'][m]
                argm['dim'] = evaluate_macro_defs_in_string(
                    macro_defs, argm['dim'])

                arg_type = loop_args[i]['args'][m]['type']
                args = loop_args[i]['args'][m]

                if arg_type.strip() == 'op_arg_dat' or arg_type.strip(
                ) == 'op_opt_arg_dat':
                    argm['idx'] = evaluate_macro_defs_in_string(
                        macro_defs, argm['idx'])

                if arg_type.strip() == 'op_arg_dat' or arg_type.strip(
                ) == 'op_opt_arg_dat':
                    var[m] = args['dat']
                    idxs[m] = args['idx']
                    if arg_type.strip() == 'op_opt_arg_dat':
                        any_opt = 1

                    if str(args['map']).strip() == 'OP_ID':
                        maps[m] = OP_ID
                        if int(idxs[m]) != -1:
                            print 'invalid index for argument' + str(m)
                    else:
                        maps[m] = OP_MAP
                        mapnames[m] = str(args['map']).strip()

                    dims[m] = args['dim']
                    soa_loc = args['typ'].find(':soa')
                    if ((auto_soa == '1')
                            and (((not dims[m].isdigit()) or int(dims[m]) > 1))
                            and (soa_loc < 0)):
                        soa_loc = len(args['typ']) - 1

                    if soa_loc > 0:
                        soaflags[m] = 1
                        any_soa = 1
                        typs[m] = args['typ'][1:soa_loc]
                    else:
                        typs[m] = args['typ'][1:-1]

                    l = -1
                    for l in range(0, len(OP_accs_labels)):
                        if args['acc'].strip() == OP_accs_labels[l].strip():
                            break

                    if l == -1:
                        print 'unknown access type for argument ' + str(m)
                    else:
                        accs[m] = l + 1

                if arg_type.strip() == 'op_opt_arg_dat':
                    optflags[m] = 1
                else:
                    optflags[m] = 0

                if arg_type.strip() == 'op_arg_gbl':
                    maps[m] = OP_GBL
                    var[m] = args['data']
                    dims[m] = args['dim']
                    typs[m] = args['typ'][1:-1]
                    optflags[m] = 0

                    l = -1
                    for l in range(0, len(OP_accs_labels)):
                        if args['acc'].strip() == OP_accs_labels[l].strip():
                            break

                    if l == -1:
                        print 'unknown access type for argument ' + str(m)
                    else:
                        accs[m] = l + 1

                if (maps[m] == OP_GBL) and (accs[m] == OP_WRITE
                                            or accs[m] == OP_RW):
                    print 'invalid access type for argument ' + str(m)

                if (maps[m] != OP_GBL) and (accs[m] == OP_MIN
                                            or accs[m] == OP_MAX):
                    print 'invalid access type for argument ' + str(m)

            print ' '

            # identify indirect datasets

            ninds = 0
            inds = [0] * nargs
            invinds = [0] * nargs
            indtyps = [''] * nargs
            inddims = [''] * nargs
            indaccs = [0] * nargs
            invmapinds = [0] * nargs
            mapinds = [0] * nargs

            j = [i for i, x in enumerate(maps) if x == OP_MAP]

            while len(j) > 0:

                indtyps[ninds] = typs[j[0]]
                inddims[ninds] = dims[j[0]]
                indaccs[ninds] = accs[j[0]]
                invinds[ninds] = j[0]  # inverse mapping
                ninds = ninds + 1
                for i in range(0, len(j)):
                    if var[j[0]] == var[j[i]] and typs[j[0]] == typs[j[i]] \
                        and accs[j[0]] == accs[j[i]] and mapnames[j[0]] == mapnames[j[i]]:  # same variable
                        inds[j[i]] = ninds

                k = []
                for i in range(0, len(j)):
                    if not (var[j[0]] == var[j[i]] and typs[j[0]] == typs[j[i]]
                            and accs[j[0]] == accs[j[i]] and mapnames[j[0]]
                            == mapnames[j[i]]):  # same variable
                        k = k + [j[i]]
                j = k

            if ninds > 0:
                invmapinds = invinds[:]
                for i in range(0, ninds):
                    for j in range(0, i):
                        if (mapnames[invinds[i]] == mapnames[invinds[j]]):
                            invmapinds[i] = invmapinds[j]

                for i in range(0, nargs):
                    mapinds[i] = i
                    for j in range(0, i):
                        if (maps[i] == OP_MAP) and (mapnames[i] == mapnames[j]
                                                    ) and (idxs[i] == idxs[j]):
                            mapinds[i] = mapinds[j]

            # check for repeats

            repeat = False
            rep1 = False
            rep2 = False
            which_file = -1
            for nk in range(0, nkernels):
                rep1 = kernels[nk]['name'] == name and \
                  kernels[nk]['nargs'] == nargs and \
                  kernels[nk]['ninds'] == ninds
                if rep1:
                    rep2 = True
                    for arg in range(0, nargs):
                        rep2 = rep2 and \
                          kernels[nk]['dims'][arg] == dims[arg] and \
                          kernels[nk]['maps'][arg] == maps[arg] and \
                          kernels[nk]['typs'][arg] == typs[arg] and \
                          kernels[nk]['accs'][arg] == accs[arg] and \
                          kernels[nk]['idxs'][arg] == idxs[arg] and \
                          kernels[nk]['soaflags'][arg] == soaflags[arg] and \
                          kernels[nk]['optflags'][arg] == optflags[arg] and \
                          kernels[nk]['inds'][arg] == inds[arg]

                    for arg in range(0, ninds):
                        rep2 = rep2 and \
                          kernels[nk]['inddims'][arg] == inddims[arg] and \
                          kernels[nk]['indaccs'][arg] == indaccs[arg] and \
                          kernels[nk]['indtyps'][arg] == indtyps[arg] and \
                          kernels[nk]['invinds'][arg] == invinds[arg]
                    if rep2:
                        print 'repeated kernel with compatible arguments: ' + \
                            kernels[nk]['name'],
                        repeat = True
                        which_file = nk
                    else:
                        print 'repeated kernel with incompatible arguments: ERROR'
                        break

            # output various diagnostics

            if not repeat:
                print '  local constants:',
                for arg in range(0, nargs):
                    if maps[arg] == OP_GBL and accs[arg] == OP_READ:
                        print str(arg),
                print '\n  global reductions:',
                for arg in range(0, nargs):
                    if maps[arg] == OP_GBL and accs[arg] != OP_READ:
                        print str(arg),
                print '\n  direct arguments:',
                for arg in range(0, nargs):
                    if maps[arg] == OP_ID:
                        print str(arg),
                print '\n  indirect arguments:',
                for arg in range(0, nargs):
                    if maps[arg] == OP_MAP:
                        print str(arg),
                if ninds > 0:
                    print '\n  number of indirect datasets: ' + str(ninds),
                if any_opt:
                    print '\n  optional arguments:',
                    for arg in range(0, nargs):
                        if optflags[arg] == 1:
                            print str(arg),

                print '\n'

            # store away in master list

            if not repeat:
                nkernels = nkernels + 1
                temp = {
                    'name': name,
                    'nargs': nargs,
                    'dims': dims,
                    'maps': maps,
                    'var': var,
                    'typs': typs,
                    'accs': accs,
                    'idxs': idxs,
                    'inds': inds,
                    'soaflags': soaflags,
                    'optflags': optflags,
                    'ninds': ninds,
                    'inddims': inddims,
                    'indaccs': indaccs,
                    'indtyps': indtyps,
                    'invinds': invinds,
                    'mapnames': mapnames,
                    'mapinds': mapinds,
                    'invmapinds': invmapinds
                }
                kernels.append(temp)
                (kernels_in_files[src_file_num]).append(nkernels - 1)
            else:
                append = 1
                for in_file in range(0, len(kernels_in_files[src_file_num])):
                    if kernels_in_files[src_file_num][in_file] == which_file:
                        append = 0
                if append == 1:
                    (kernels_in_files[src_file_num]).append(which_file)

        # output new source file
        src_filename = os.path.basename(src_file)
        src_dirpath = os.path.dirname(src_file)
        if src_dirpath[0:2] == "./":
            src_dirpath = src_dirpath[2:]

        op_extension = "_op"
        if '.' in src_filename:
            src_filename_pieces = src_filename.split('.')
            n = len(src_filename_pieces)
            src_filename_extension = src_filename_pieces[n - 1]
            op_src_filename = '.'.join(src_filename_pieces[0:(
                n - 1)]) + op_extension + '.' + src_filename_extension
        else:
            op_src_filename = src_filename + op_extension
        op_src_filepath = op_src_filename
        op_src_dirpath = ""
        if src_dirpath != "":
            src_dirpath_pieces = src_dirpath.split('/')
            root_dir = src_dirpath_pieces[0]
            if len(src_dirpath_pieces) == 0:
                rem_dirpath = ''
            else:
                rem_dirpath = '/'.join(src_dirpath_pieces[1:])
            op_src_dirpath = os.path.join(root_dir + "_op", rem_dirpath)
            op_src_filepath = os.path.join(op_src_dirpath, op_src_filename)

        if op_src_dirpath != "" and not os.path.exists(op_src_dirpath):
            os.makedirs(op_src_dirpath)
        fid = open(op_src_filepath, 'w')
        date = datetime.datetime.now()
        #fid.write('//\n// auto-generated by op2.py on ' +
        #          date.strftime("%Y-%m-%d %H:%M") + '\n//\n\n')
        fid.write('//\n// auto-generated by op2.py\n//\n\n')

        loc_old = 0

        # read original file and locate header location
        header_len = 11
        loc_header = [text.find("op_seq.h")]
        if loc_header[0] == -1:
            header_len = 13
            loc_header = [text.find("op_lib_cpp.h")]

        # get locations of all op_decl_consts
        n_consts = len(const_args)
        loc_consts = [0] * n_consts
        for n in range(0, n_consts):
            loc_consts[n] = const_args[n]['loc']

        # get locations of all op_par_loops
        n_loops = len(loop_args)
        loc_loops = [0] * n_loops
        for n in range(0, n_loops):
            loc_loops[n] = loop_args[n]['loc']

        locs = sorted(loc_header + loc_consts + loc_loops)

        # process header, loops and constants
        for loc in range(0, len(locs)):
            if locs[loc] != -1:
                fid.write(text[loc_old:locs[loc] - 1])
                loc_old = locs[loc] - 1

            indent = ''
            ind = 0
            while 1:
                if text[locs[loc] - ind] == '\n':
                    break
                indent = indent + ' '
                ind = ind + 1

            if (locs[loc] in loc_header) and (locs[loc] != -1):
                fid.write(' "op_lib_cpp.h"\n\n')
                fid.write('//\n// op_par_loop declarations\n//\n')
                fid.write(
                    '#ifdef OPENACC\n#ifdef __cplusplus\nextern "C" {\n#endif\n#endif\n'
                )
                for k_iter in range(0, len(kernels_in_files[src_file_num])):
                    k = kernels_in_files[src_file_num][k_iter]
                    line = '\nvoid op_par_loop_' + \
                      kernels[k]['name'] + '(char const *, op_set,\n'
                    for n in range(1, kernels[k]['nargs']):
                        line = line + '  op_arg,\n'
                    line = line + '  op_arg );\n'
                    fid.write(line)

                fid.write(
                    '#ifdef OPENACC\n#ifdef __cplusplus\n}\n#endif\n#endif\n')
                fid.write('\n')
                loc_old = locs[loc] + header_len - 1
                continue

            if locs[loc] in loc_loops:
                indent = indent + ' ' * len('op_par_loop')
                endofcall = text.find(';', locs[loc])
                curr_loop = loc_loops.index(locs[loc])
                name = loop_args[curr_loop]['name1']
                line = str(' op_par_loop_' + name + '(' +
                           loop_args[curr_loop]['name2'] + ',' +
                           loop_args[curr_loop]['set'] + ',\n' + indent)

                for arguments in range(0, loop_args[curr_loop]['nargs']):
                    elem = loop_args[curr_loop]['args'][arguments]
                    if elem['type'] == 'op_arg_dat':
                        line = line + elem['type'] + '(' + elem['dat'] + \
                          ',' + elem['idx'] + ',' + elem['map'] + \
                          ',' + elem['dim'] + ',' + elem['typ'] + \
                          ',' + elem['acc'] + '),\n' + indent
                    elif elem['type'] == 'op_opt_arg_dat':
                        line = line + elem['type'] + '(' \
                              + elem['opt'] + ',' + elem['dat'] + \
                          ',' + elem['idx'] + ',' + elem['map'] + \
                          ',' + elem['dim'] + ',' + elem['typ'] + \
                          ',' + elem['acc'] + '),\n' + indent

                    elif elem['type'] == 'op_arg_gbl':
                        line = line + elem['type'] + '(' + elem['data'] + \
                          ',' + elem['dim'] + ',' + elem['typ'] + \
                          ',' + elem['acc'] + '),\n' + indent

                fid.write(line[0:-len(indent) - 2] + ');')

                loc_old = endofcall + 1
                continue

            if locs[loc] in loc_consts:
                curr_const = loc_consts.index(locs[loc])
                endofcall = text.find(';', locs[loc])
                name = const_args[curr_const]['name']
                fid.write(indent[0:-2] + 'op_decl_const2("' + name.strip() +
                          '",' + str(const_args[curr_const]['dim']) + ',' +
                          const_args[curr_const]['type'] + ',' +
                          const_args[curr_const]['name2'].strip() + ');')
                loc_old = endofcall + 1
                continue

        fid.write(text[loc_old:])
        fid.close()
    # end of loop over input source files

    ## Loop over kernels, looking for a header file named after each
    ## kernel in either working directory or one of the input-supplied
    ## directories:
    for nk in xrange(0, len(kernels)):
        k_data = kernels[nk]
        k_name = k_data["name"]
        if not "decl_filepath" in k_data.keys():
            src_file = k_name + ".h"
            if os.path.isfile(src_file):
                with open(src_file, 'r') as f:
                    text = f.read()
                if op_check_kernel_in_text(text, k_name):
                    k_data["decl_filepath"] = src_file
                    continue

            for dirname in src_dirs:
                filepath = os.path.join(dirname, src_file)
                if os.path.isfile(filepath):
                    with open(filepath, 'r') as f:
                        text = f.read()
                    if op_check_kernel_in_text(text, k_name):
                        k_data["decl_filepath"] = filepath
                        break

    ## Any kernel declarations still not found must exist in files
    ## not named after the kernel. Search through content of all
    ## input-supplied files, and through all files of input-supplied
    ## directories:
    for nk in xrange(0, len(kernels)):
        if not "decl_filepath" in kernels[nk].keys():
            k_data = kernels[nk]
            k_name = k_data["name"]

            for src_file in src_files:
                with open(src_file, 'r') as f:
                    text = f.read()
                if op_check_kernel_in_text(text, k_name):
                    k_data["decl_filepath"] = src_file
                    break

            if not "decl_filepath" in k_data.keys():
                for src_dir in src_dirs:
                    for src_dir_subfile in [
                            s for s in os.listdir(src_dir)
                            if os.path.isfile(os.path.join(src_dir, s))
                    ]:
                        src_dir_subfilepath = os.path.join(
                            src_dir, src_dir_subfile)
                        with open(src_dir_subfilepath, 'r') as f:
                            text = f.read()
                        if op_check_kernel_in_text(text, k_name):
                            k_data["decl_filepath"] = src_dir_subfilepath
                            break
                    if "decl_filepath" in k_data.keys():
                        break

    fail = False
    for nk in xrange(0, len(kernels)):
        if not "decl_filepath" in kernels[nk].keys():
            fail = True
            print("Declaration not found for kernel " + kernels[nk]["name"])
    if fail:
        exit(2)

    #  errors and warnings

    if ninit == 0:
        print ' '
        print '-----------------------------'
        print '  WARNING: no call to op_init'
        if auto_soa == 1:
            print '  WARNING: code generated with OP_AUTO_SOA,\n but couldn\'t modify op_init to pass\n an additional parameter of 1.\n Please make sure OP_AUTO_SOA is set when executing'
        print '-----------------------------'

    if nexit == 0:
        print ' '
        print '-------------------------------'
        print '  WARNING: no call to op_exit  '
        print '-------------------------------'

    if npart == 0 and nhdf5 > 0:
        print ' '
        print '---------------------------------------------------'
        print '  WARNING: hdf5 calls without call to op_partition '
        print '---------------------------------------------------'

    #
    #  finally, generate target-specific kernel files
    #
    masterFile = str(srcFilesAndDirs[0])

    op2_gen_seq(
        masterFile, date, consts,
        kernels)  # MPI+GENSEQ version - initial version, no vectorisation
    # Vec translator is not yet ready for release, eg it cannot translate the 'aero' app.
    op2_gen_mpi_vec(
        masterFile, date, consts, kernels
    )  # MPI+GENSEQ with code that gets auto vectorised with intel compiler (version 15.0 and above)

    #code generators for OpenMP parallelisation with MPI
    #op2_gen_openmp(masterFile, date, consts, kernels) # Initial OpenMP code generator
    op2_gen_openmp_simple(
        masterFile, date, consts,
        kernels)  # Simplified and Optimized OpenMP code generator
    op2_gen_openacc(masterFile, date, consts,
                    kernels)  # Simplified and Optimized OpenMP code generator

    #code generators for NVIDIA GPUs with CUDA
    #op2_gen_cuda(masterFile, date, consts, kernels,sets) # Optimized for Fermi GPUs
    op2_gen_cuda_simple(masterFile, date, consts, kernels, sets,
                        macro_defs)  # Optimized for Kepler GPUs

    # generates openmp code as well as cuda code into the same file
    op2_gen_cuda_simple_hyb(
        masterFile, date, consts, kernels,
        sets)  # CPU and GPU will then do comutations as a hybrid application

    #code generator for GPUs with OpenMP4.5
    op2_gen_openmp4(masterFile, date, consts, kernels)
Example #2
0
def main():

    # declare constants

    ninit = 0
    nexit = 0
    npart = 0
    nhdf5 = 0
    nconsts = 0
    nkernels = 0
    consts = []
    kernels = []
    sets = []
    kernels_in_files = []

    OP_ID = 1
    OP_GBL = 2
    OP_MAP = 3

    OP_READ = 1
    OP_WRITE = 2
    OP_RW = 3
    OP_INC = 4
    OP_MAX = 5
    OP_MIN = 6

    auto_soa=os.getenv('OP_AUTO_SOA','0')

    OP_accs_labels = ['OP_READ', 'OP_WRITE', 'OP_RW', 'OP_INC',
                      'OP_MAX', 'OP_MIN']

    #  loop over all input source files

    kernels_in_files = [[] for _ in range(len(sys.argv) - 1)]
    for a in range(1, len(sys.argv)):
        print 'processing file ' + str(a) + ' of ' + str(len(sys.argv) - 1) + \
              ' ' + str(sys.argv[a])

        src_file = str(sys.argv[a])
        f = open(src_file, 'r')
        text = f.read()
        any_soa = 0

        # check for op_init/op_exit/op_partition/op_hdf5 calls

        inits, exits, parts, hdf5s = op_parse_calls(text)

        if inits + exits + parts + hdf5s > 0:
            print ' '
        if inits > 0:
            print'contains op_init call'
            if auto_soa<>'0':
              text = append_init_soa(text)
        if exits > 0:
            print'contains op_exit call'
        if parts > 0:
            print'contains op_partition call'
        if hdf5s > 0:
            print'contains op_hdf5 calls'

        ninit = ninit + inits
        nexit = nexit + exits
        npart = npart + parts
        nhdf5 = nhdf5 + hdf5s

        # parse and process constants

        const_args = op_decl_const_parse(text)
        set_list = op_decl_set_parse(text)
        for i in range(0,len(set_list)):
          sets.append(set_list[i])

        # cleanup '&' symbols from name and convert dim to integer
        for i in range(0, len(const_args)):
            if const_args[i]['name'][0] == '&':
                const_args[i]['name'] = const_args[i]['name'][1:]
                const_args[i]['dim'] = int(const_args[i]['dim'])

        # check for repeats
        nconsts = 0
        for i in range(0, len(const_args)):
            repeat = 0
            name = const_args[i]['name']
            for c in range(0, nconsts):
                if const_args[i]['name'] == consts[c]['name']:
                    repeat = 1
                    if const_args[i]['type'] != consts[c]['type']:
                        print 'type mismatch in repeated op_decl_const'
                    if const_args[i]['dim'] != consts[c]['dim']:
                        print 'size mismatch in repeated op_decl_const'

            if repeat > 0:
                print 'repeated global constant ' + const_args[i]['name']
            else:
                print '\nglobal constant (' + const_args[i]['name'].strip() \
                      + ') of size ' + str(const_args[i]['dim'])

            # store away in master list
            if repeat == 0:
                nconsts = nconsts + 1
                temp = {'dim': const_args[i]['dim'],
                        'type': const_args[i]['type'].strip(),
                        'name': const_args[i]['name'].strip()}
                consts.append(temp)

        # parse and process op_par_loop calls

        loop_args = op_par_loop_parse(text)
        for i in range(0, len(loop_args)):
            name = loop_args[i]['name1']
            nargs = loop_args[i]['nargs']
            print '\nprocessing kernel ' + name + ' with ' + str(nargs) + ' arguments',

            # process arguments

            var = [''] * nargs
            idxs = [0] * nargs
            dims = [''] * nargs
            maps = [0] * nargs
            mapnames = ['']*nargs
            typs = [''] * nargs
            accs = [0] * nargs
            soaflags = [0] * nargs

            for m in range(0, nargs):
                arg_type = loop_args[i]['args'][m]['type']
                args = loop_args[i]['args'][m]

                if arg_type.strip() == 'op_arg_dat':
                    var[m] = args['dat']
                    idxs[m] = args['idx']

                    if str(args['map']).strip() == 'OP_ID':
                        maps[m] = OP_ID
                        if int(idxs[m]) != -1:
                            print 'invalid index for argument' + str(m)
                    else:
                        maps[m] = OP_MAP
                        mapnames[m] = str(args['map']).strip()

                    dims[m] = args['dim']
                    soa_loc = args['typ'].find(':soa')
                    if ((auto_soa=='1') and (((not dims[m].isdigit()) or int(dims[m])>1)) and (soa_loc < 0)):
                        soa_loc = len(args['typ'])-1

                    if soa_loc > 0:
                        soaflags[m] = 1
                        any_soa = 1
                        typs[m] = args['typ'][1:soa_loc]
                    else:
                        typs[m] = args['typ'][1:-1]


                    l = -1
                    for l in range(0, len(OP_accs_labels)):
                        if args['acc'].strip() == OP_accs_labels[l].strip():
                            break

                    if l == -1:
                        print 'unknown access type for argument ' + str(m)
                    else:
                        accs[m] = l + 1

                if arg_type.strip() == 'op_arg_gbl':
                    maps[m] = OP_GBL
                    var[m] = args['data']
                    dims[m] = args['dim']
                    typs[m] = args['typ'][1:-1]

                    l = -1
                    for l in range(0, len(OP_accs_labels)):
                        if args['acc'].strip() == OP_accs_labels[l].strip():
                            break

                    if l == -1:
                        print 'unknown access type for argument ' + str(m)
                    else:
                        accs[m] = l + 1

                if (maps[m] == OP_GBL) and (accs[m] == OP_WRITE or accs[m] == OP_RW):
                    print 'invalid access type for argument ' + str(m)

                if (maps[m] != OP_GBL) and (accs[m] == OP_MIN or accs[m] == OP_MAX):
                    print 'invalid access type for argument ' + str(m)


            print ' '

            # identify indirect datasets

            ninds = 0
            inds = [0] * nargs
            invinds = [0] * nargs
            indtyps = [''] * nargs
            inddims = [''] * nargs
            indaccs = [0] * nargs
            invmapinds = [0]*nargs
            mapinds = [0]*nargs

            j = [i for i, x in enumerate(maps) if x == OP_MAP]

            while len(j) > 0:

                indtyps[ninds] = typs[j[0]]
                inddims[ninds] = dims[j[0]]
                indaccs[ninds] = accs[j[0]]
                invinds[ninds] = j[0]  # inverse mapping
                ninds = ninds + 1
                for i in range(0, len(j)):
                    if var[j[0]] == var[j[i]] and typs[j[0]] == typs[j[i]] \
                            and accs[j[0]] == accs[j[i]] and mapnames[j[0]] == mapnames[j[i]]:  # same variable
                        inds[j[i]] = ninds

                k = []
                for i in range(0, len(j)):
                    if not (var[j[0]] == var[j[i]] and typs[j[0]] == typs[j[i]]
                            and accs[j[0]] == accs[j[i]] and mapnames[j[0]] == mapnames[j[i]]):  # same variable
                        k = k + [j[i]]
                j = k

            if ninds > 0:
              invmapinds = invinds[:]
              for i in range(0,ninds):
                for j in range(0,i):
                  if (mapnames[invinds[i]] == mapnames[invinds[j]]):
                    invmapinds[i] = invmapinds[j]

              for i in range(0,nargs):
                mapinds[i] = i
                for j in range(0,i):
                  if (maps[i] == OP_MAP) and (mapnames[i] == mapnames[j]) and (idxs[i] == idxs[j]):
                    mapinds[i] = mapinds[j]

            # check for repeats

            repeat = False
            rep1 = False
            rep2 = False
            which_file = -1
            for nk in range(0, nkernels):
                rep1 = kernels[nk]['name'] == name and \
                    kernels[nk]['nargs'] == nargs and \
                    kernels[nk]['ninds'] == ninds
                if rep1:
                    rep2 = True
                    for arg in range(0, nargs):
                        rep2 = rep2 and \
                            kernels[nk]['dims'][arg] == dims[arg] and \
                            kernels[nk]['maps'][arg] == maps[arg] and \
                            kernels[nk]['typs'][arg] == typs[arg] and \
                            kernels[nk]['accs'][arg] == accs[arg] and \
                            kernels[nk]['idxs'][arg] == idxs[arg] and \
                            kernels[nk]['soaflags'][arg] == soaflags[arg] and \
                            kernels[nk]['inds'][arg] == inds[arg]

                    for arg in range(0, ninds):
                        rep2 = rep2 and \
                            kernels[nk]['inddims'][arg] == inddims[arg] and \
                            kernels[nk]['indaccs'][arg] == indaccs[arg] and \
                            kernels[nk]['indtyps'][arg] == indtyps[arg] and \
                            kernels[nk]['invinds'][arg] == invinds[arg]
                    if rep2:
                        print 'repeated kernel with compatible arguments: ' + \
                              kernels[nk]['name'],
                        repeat = True
                        which_file = nk
                    else:
                        print 'repeated kernel with incompatible arguments: ERROR'
                        break

            # output various diagnostics

            if not repeat:
                print '  local constants:',
                for arg in range(0, nargs):
                    if maps[arg] == OP_GBL and accs[arg] == OP_READ:
                        print str(arg),
                print '\n  global reductions:',
                for arg in range(0, nargs):
                    if maps[arg] == OP_GBL and accs[arg] != OP_READ:
                        print str(arg),
                print '\n  direct arguments:',
                for arg in range(0, nargs):
                    if maps[arg] == OP_ID:
                        print str(arg),
                print '\n  indirect arguments:',
                for arg in range(0, nargs):
                    if maps[arg] == OP_MAP:
                        print str(arg),
                if ninds > 0:
                    print '\n  number of indirect datasets: ' + str(ninds),

                print '\n'

            # store away in master list

            if not repeat:
                nkernels = nkernels + 1
                temp = {'name': name,
                        'nargs': nargs,
                        'dims': dims,
                        'maps': maps,
                        'var': var,
                        'typs': typs,
                        'accs': accs,
                        'idxs': idxs,
                        'inds': inds,
                        'soaflags': soaflags,

                        'ninds': ninds,
                        'inddims': inddims,
                        'indaccs': indaccs,
                        'indtyps': indtyps,
                        'invinds': invinds,
                        'mapnames' : mapnames,
                        'mapinds': mapinds,
                        'invmapinds' : invmapinds}
                kernels.append(temp)
                (kernels_in_files[a - 1]).append(nkernels - 1)
            else:
                append = 1
                for in_file in range(0, len(kernels_in_files[a - 1])):
                    if kernels_in_files[a - 1][in_file] == which_file:
                        append = 0
                if append == 1:
                    (kernels_in_files[a - 1]).append(which_file)

        # output new source file

        fid = open(src_file.split('.')[0] + '_op.cpp', 'w')
        date = datetime.datetime.now()
        #fid.write('//\n// auto-generated by op2.py on ' +
        #          date.strftime("%Y-%m-%d %H:%M") + '\n//\n\n')
        fid.write('//\n// auto-generated by op2.py\n//\n\n')

        loc_old = 0

        # read original file and locate header location
        header_len = 11
        loc_header = [text.find("op_seq.h")]
        if loc_header[0] == -1:
          header_len = 13
          loc_header = [text.find("op_lib_cpp.h")]

        # get locations of all op_decl_consts
        n_consts = len(const_args)
        loc_consts = [0] * n_consts
        for n in range(0, n_consts):
            loc_consts[n] = const_args[n]['loc']

        # get locations of all op_par_loops
        n_loops = len(loop_args)
        loc_loops = [0] * n_loops
        for n in range(0, n_loops):
            loc_loops[n] = loop_args[n]['loc']

        locs = sorted(loc_header + loc_consts + loc_loops)

        # process header, loops and constants
        for loc in range(0, len(locs)):
            if locs[loc] != -1:
                fid.write(text[loc_old:locs[loc] - 1])
                loc_old = locs[loc] - 1

            indent = ''
            ind = 0
            while 1:
                if text[locs[loc] - ind] == '\n':
                    break
                indent = indent + ' '
                ind = ind + 1

            if (locs[loc] in loc_header) and (locs[loc] != -1):
                fid.write(' "op_lib_cpp.h"\n\n')
                fid.write('//\n// op_par_loop declarations\n//\n')
                fid.write('#ifdef OPENACC\n#ifdef __cplusplus\nextern "C" {\n#endif\n#endif\n')
                for k_iter in range(0, len(kernels_in_files[a - 1])):
                    k = kernels_in_files[a - 1][k_iter]
                    line = '\nvoid op_par_loop_' + \
                        kernels[k]['name'] + '(char const *, op_set,\n'
                    for n in range(1, kernels[k]['nargs']):
                        line = line + '  op_arg,\n'
                    line = line + '  op_arg );\n'
                    fid.write(line)

                fid.write('#ifdef OPENACC\n#ifdef __cplusplus\n}\n#endif\n#endif\n')
                fid.write('\n')
                loc_old = locs[loc] + header_len-1
                continue

            if locs[loc] in loc_loops:
                indent = indent + ' ' * len('op_par_loop')
                endofcall = text.find(';', locs[loc])
                curr_loop = loc_loops.index(locs[loc])
                name = loop_args[curr_loop]['name1']
                line = str(' op_par_loop_' + name + '(' +
                           loop_args[curr_loop]['name2'] + ',' +
                           loop_args[curr_loop]['set'] + ',\n' + indent)

                for arguments in range(0, loop_args[curr_loop]['nargs']):
                    elem = loop_args[curr_loop]['args'][arguments]
                    if elem['type'] == 'op_arg_dat':
                        line = line + elem['type'] + '(' + elem['dat'] + \
                            ',' + elem['idx'] + ',' + elem['map'] + \
                            ',' + elem['dim'] + ',' + elem['typ'] + \
                            ',' + elem['acc'] + '),\n' + indent
                    elif elem['type'] == 'op_arg_gbl':
                        line = line + elem['type'] + '(' + elem['data'] + \
                            ',' + elem['dim'] + ',' + elem['typ'] + \
                            ',' + elem['acc'] + '),\n' + indent

                fid.write(line[0:-len(indent) - 2] + ');')

                loc_old = endofcall + 1
                continue

            if locs[loc] in loc_consts:
                curr_const = loc_consts.index(locs[loc])
                endofcall = text.find(';', locs[loc])
                name = const_args[curr_const]['name']
                fid.write(indent[0:-2] + 'op_decl_const2("' + name.strip() +
                          '",' + str(const_args[curr_const]['dim']) + ',' +
                          const_args[curr_const]['type'] + ',' +
                          const_args[curr_const]['name2'].strip() + ');')
                loc_old = endofcall + 1
                continue

        fid.write(text[loc_old:])
        fid.close()

        f.close()
    # end of loop over input source files

    #  errors and warnings

    if ninit == 0:
        print' '
        print'-----------------------------'
        print'  WARNING: no call to op_init'
        if auto_soa==1:
          print'  WARNING: code generated with OP_AUTO_SOA,\n but couldn\'t modify op_init to pass\n an additional parameter of 1.\n Please make sure OP_AUTO_SOA is set when executing'
        print'-----------------------------'

    if nexit == 0:
        print' '
        print'-------------------------------'
        print'  WARNING: no call to op_exit  '
        print'-------------------------------'

    if npart == 0 and nhdf5 > 0:
        print' '
        print'---------------------------------------------------'
        print'  WARNING: hdf5 calls without call to op_partition '
        print'---------------------------------------------------'

    #
    #  finally, generate target-specific kernel files
    #


    op2_gen_seq(str(sys.argv[1]), date, consts, kernels) # MPI+GENSEQ version - initial version, no vectorisation
    op2_gen_mpi_vec(str(sys.argv[1]), date, consts, kernels) # MPI+GENSEQ with code that gets auto vectorised with intel compiler (version 15.0 and above)

    #code generators for OpenMP parallelisation with MPI
    #op2_gen_openmp(str(sys.argv[1]), date, consts, kernels) # Initial OpenMP code generator
    op2_gen_openmp_simple(str(sys.argv[1]), date, consts, kernels) # Simplified and Optimized OpenMP code generator
    op2_gen_openacc(str(sys.argv[1]), date, consts, kernels) # Simplified and Optimized OpenMP code generator

    #code generators for NVIDIA GPUs with CUDA
    #op2_gen_cuda(str(sys.argv[1]), date, consts, kernels,sets) # Optimized for Fermi GPUs
    op2_gen_cuda_simple(str(sys.argv[1]), date, consts, kernels,sets) # Optimized for Kepler GPUs

    # generates openmp code as well as cuda code into the same file
    #op2_gen_cuda_simple_hyb(str(sys.argv[1]), date, consts, kernels,sets) # CPU and GPU will then do comutations as a hybrid application

    import subprocess
    retcode = subprocess.call("which clang-format > /dev/null", shell=True)
    if retcode == 0:
      retcode = subprocess.call("$OP2_INSTALL_PATH/../translator/c/python/format.sh", shell=True)
    else:
      print 'Cannot find clang-format in PATH'
      print 'Install and add clang-format to PATH to format generated code to conform to code formatting guidelines'
Example #3
0
def main():

    # declare constants

    ninit = 0
    nexit = 0
    npart = 0
    nhdf5 = 0
    nconsts = 0
    nkernels = 0
    consts = []
    kernels = []
    sets = []
    kernels_in_files = []

    OP_ID = 1
    OP_GBL = 2
    OP_MAP = 3

    OP_READ = 1
    OP_WRITE = 2
    OP_RW = 3
    OP_INC = 4
    OP_MAX = 5
    OP_MIN = 6

    OP_accs_labels = ['OP_READ', 'OP_WRITE', 'OP_RW', 'OP_INC',
                      'OP_MAX', 'OP_MIN']

    #  loop over all input source files

    kernels_in_files = [[] for _ in range(len(sys.argv) - 1)]
    for a in range(1, len(sys.argv)):
        print 'processing file ' + str(a) + ' of ' + str(len(sys.argv) - 1) + \
              ' ' + str(sys.argv[a])

        src_file = str(sys.argv[a])
        f = open(src_file, 'r')
        text = f.read()

        # check for op_init/op_exit/op_partition/op_hdf5 calls

        inits, exits, parts, hdf5s = op_parse_calls(text)

        if inits + exits + parts + hdf5s > 0:
            print ' '
        if inits > 0:
            print'contains op_init call'
        if exits > 0:
            print'contains op_exit call'
        if parts > 0:
            print'contains op_partition call'
        if hdf5s > 0:
            print'contains op_hdf5 calls'

        ninit = ninit + inits
        nexit = nexit + exits
        npart = npart + parts
        nhdf5 = nhdf5 + hdf5s

        # parse and process constants

        const_args = op_decl_const_parse(text)
        set_list = op_decl_set_parse(text)
        for i in range(0,len(set_list)):
          sets.append(set_list[i])

        # cleanup '&' symbols from name and convert dim to integer
        for i in range(0, len(const_args)):
            if const_args[i]['name'][0] == '&':
                const_args[i]['name'] = const_args[i]['name'][1:]
                const_args[i]['dim'] = int(const_args[i]['dim'])

        # check for repeats
        nconsts = 0
        for i in range(0, len(const_args)):
            repeat = 0
            name = const_args[i]['name']
            for c in range(0, nconsts):
                if const_args[i]['name'] == consts[c]['name']:
                    repeat = 1
                    if const_args[i]['type'] != consts[c]['type']:
                        print 'type mismatch in repeated op_decl_const'
                    if const_args[i]['dim'] != consts[c]['dim']:
                        print 'size mismatch in repeated op_decl_const'

            if repeat > 0:
                print 'repeated global constant ' + const_args[i]['name']
            else:
                print '\nglobal constant (' + const_args[i]['name'].strip() \
                      + ') of size ' + str(const_args[i]['dim'])

            # store away in master list
            if repeat == 0:
                nconsts = nconsts + 1
                temp = {'dim': const_args[i]['dim'],
                        'type': const_args[i]['type'].strip(),
                        'name': const_args[i]['name'].strip()}
                consts.append(temp)

        # parse and process op_par_loop calls

        loop_args = op_par_loop_parse(text)
        for i in range(0, len(loop_args)):
            name = loop_args[i]['name1']
            nargs = loop_args[i]['nargs']
            print '\nprocessing kernel ' + name + ' with ' + str(nargs) + ' arguments',

            # process arguments

            var = [''] * nargs
            idxs = [0] * nargs
            dims = [''] * nargs
            maps = [0] * nargs
            mapnames = ['']*nargs
            typs = [''] * nargs
            accs = [0] * nargs
            soaflags = [0] * nargs

            for m in range(0, nargs):
                arg_type = loop_args[i]['args'][m]['type']
                args = loop_args[i]['args'][m]

                if arg_type.strip() == 'op_arg_dat':
                    var[m] = args['dat']
                    idxs[m] = args['idx']

                    if str(args['map']).strip() == 'OP_ID':
                        maps[m] = OP_ID
                        if int(idxs[m]) != -1:
                            print 'invalid index for argument' + str(m)
                    else:
                        maps[m] = OP_MAP
                        mapnames[m] = str(args['map']).strip()

                    dims[m] = args['dim']
                    soa_loc = args['typ'].find(':soa')

                    if soa_loc > 0:
                        soaflags[m] = 1
                        typs[m] = args['typ'][1:soa_loc]
                    else:
                        typs[m] = args['typ'][1:-1]


                    l = -1
                    for l in range(0, len(OP_accs_labels)):
                        if args['acc'].strip() == OP_accs_labels[l].strip():
                            break

                    if l == -1:
                        print 'unknown access type for argument ' + str(m)
                    else:
                        accs[m] = l + 1

                if arg_type.strip() == 'op_arg_gbl':
                    maps[m] = OP_GBL
                    var[m] = args['data']
                    dims[m] = args['dim']
                    typs[m] = args['typ'][1:-1]

                    l = -1
                    for l in range(0, len(OP_accs_labels)):
                        if args['acc'].strip() == OP_accs_labels[l].strip():
                            break

                    if l == -1:
                        print 'unknown access type for argument ' + str(m)
                    else:
                        accs[m] = l + 1

                if (maps[m] == OP_GBL) and (accs[m] == OP_WRITE or accs[m] == OP_RW):
                    print 'invalid access type for argument ' + str(m)

                if (maps[m] != OP_GBL) and (accs[m] == OP_MIN or accs[m] == OP_MAX):
                    print 'invalid access type for argument ' + str(m)

                # print var[m]+' '+str(idxs[m])+' '+str(maps[m])+' '+\
                # str(dims[m])+' '+typs[m]+' '+str(accs[m])

            print ' '

            # identify indirect datasets

            ninds = 0
            inds = [0] * nargs
            invinds = [0] * nargs
            indtyps = [''] * nargs
            inddims = [''] * nargs
            indaccs = [0] * nargs
            invmapinds = [0]*nargs
            mapinds = [0]*nargs

            j = [i for i, x in enumerate(maps) if x == OP_MAP]

            while len(j) > 0:

                indtyps[ninds] = typs[j[0]]
                inddims[ninds] = dims[j[0]]
                indaccs[ninds] = accs[j[0]]
                invinds[ninds] = j[0]  # inverse mapping
                ninds = ninds + 1
                for i in range(0, len(j)):
                    if var[j[0]] == var[j[i]] and typs[j[0]] == typs[j[i]] \
                            and accs[j[0]] == accs[j[i]]:  # same variable
                        inds[j[i]] = ninds

                k = []
                for i in range(0, len(j)):
                    if not (var[j[0]] == var[j[i]] and typs[j[0]] == typs[j[i]]
                            and accs[j[0]] == accs[j[i]]):  # same variable
                        k = k + [j[i]]
                j = k

            if ninds > 0:
              invmapinds = invinds[:]
              for i in range(0,ninds):
                for j in range(0,i):
                  if (mapnames[invinds[i]] == mapnames[invinds[j]]):
                    invmapinds[i] = invmapinds[j]
              for i in range(0,nargs):
                mapinds[i] = i
                for j in range(0,i):
                  if (maps[i] == OP_MAP) and (mapnames[i] == mapnames[j]) and (idxs[i] == idxs[j]):
                    mapinds[i] = mapinds[j]

            # check for repeats

            repeat = False
            rep1 = False
            rep2 = False
            which_file = -1
            for nk in range(0, nkernels):
                rep1 = kernels[nk]['name'] == name and \
                    kernels[nk]['nargs'] == nargs and \
                    kernels[nk]['ninds'] == ninds
                if rep1:
                    rep2 = True
                    for arg in range(0, nargs):
                        rep2 = rep2 and \
                            kernels[nk]['dims'][arg] == dims[arg] and \
                            kernels[nk]['maps'][arg] == maps[arg] and \
                            kernels[nk]['typs'][arg] == typs[arg] and \
                            kernels[nk]['accs'][arg] == accs[arg] and \
                            kernels[nk]['idxs'][arg] == idxs[arg] and \
                            kernels[nk]['soaflags'][arg] == soaflags[arg] and \
                            kernels[nk]['inds'][arg] == inds[arg]

                    for arg in range(0, ninds):
                        rep2 = rep2 and \
                            kernels[nk]['inddims'][arg] == inddims[arg] and \
                            kernels[nk]['indaccs'][arg] == indaccs[arg] and \
                            kernels[nk]['indtyps'][arg] == indtyps[arg] and \
                            kernels[nk]['invinds'][arg] == invinds[arg]
                    if rep2:
                        print 'repeated kernel with compatible arguments: ' + \
                              kernels[nk]['name'],
                        repeat = True
                        which_file = nk
                    else:
                        print 'repeated kernel with incompatible arguments: ERROR'
                        break

            # output various diagnostics

            if not repeat:
                print '  local constants:',
                for arg in range(0, nargs):
                    if maps[arg] == OP_GBL and accs[arg] == OP_READ:
                        print str(arg),
                print '\n  global reductions:',
                for arg in range(0, nargs):
                    if maps[arg] == OP_GBL and accs[arg] != OP_READ:
                        print str(arg),
                print '\n  direct arguments:',
                for arg in range(0, nargs):
                    if maps[arg] == OP_ID:
                        print str(arg),
                print '\n  indirect arguments:',
                for arg in range(0, nargs):
                    if maps[arg] == OP_MAP:
                        print str(arg),
                if ninds > 0:
                    print '\n  number of indirect datasets: ' + str(ninds),

                print '\n'

            # store away in master list

            if not repeat:
                nkernels = nkernels + 1
                temp = {'name': name,
                        'nargs': nargs,
                        'dims': dims,
                        'maps': maps,
                        'var': var,
                        'typs': typs,
                        'accs': accs,
                        'idxs': idxs,
                        'inds': inds,
                        'soaflags': soaflags,

                        'ninds': ninds,
                        'inddims': inddims,
                        'indaccs': indaccs,
                        'indtyps': indtyps,
                        'invinds': invinds,
                        'mapnames' : mapnames,
                        'mapinds': mapinds,
                        'invmapinds' : invmapinds}
                kernels.append(temp)
                (kernels_in_files[a - 1]).append(nkernels - 1)
            else:
                append = 1
                for in_file in range(0, len(kernels_in_files[a - 1])):
                    if kernels_in_files[a - 1][in_file] == which_file:
                        append = 0
                if append == 1:
                    (kernels_in_files[a - 1]).append(which_file)

        # output new source file

        fid = open(src_file.split('.')[0] + '_op.cpp', 'w')
        date = datetime.datetime.now()
        fid.write('//\n// auto-generated by op2.py on ' +
                  date.strftime("%Y-%m-%d %H:%M") + '\n//\n\n')

        loc_old = 0

        # read original file and locate header location
        header_len = 11
        loc_header = [text.find("op_seq.h")]
        if loc_header[0] == -1:
          header_len = 13
          loc_header = [text.find("op_lib_cpp.h")]

        # get locations of all op_decl_consts
        n_consts = len(const_args)
        loc_consts = [0] * n_consts
        for n in range(0, n_consts):
            loc_consts[n] = const_args[n]['loc']

        # get locations of all op_par_loops
        n_loops = len(loop_args)
        loc_loops = [0] * n_loops
        for n in range(0, n_loops):
            loc_loops[n] = loop_args[n]['loc']

        locs = sorted(loc_header + loc_consts + loc_loops)

        # process header, loops and constants
        for loc in range(0, len(locs)):
            if locs[loc] != -1:
                fid.write(text[loc_old:locs[loc] - 1])
                loc_old = locs[loc] - 1

            indent = ''
            ind = 0
            while 1:
                if text[locs[loc] - ind] == '\n':
                    break
                indent = indent + ' '
                ind = ind + 1

            if (locs[loc] in loc_header) and (locs[loc] != -1):
                fid.write(' "op_lib_cpp.h"\n\n')
                line = '\n#define STRIDE(x,y) x\n'
                for ns in range (0,len(sets)):
                  line += 'int '+sets[ns]['name'].replace('"','')+'_stride = 1;\n'
                fid.write(line)
                fid.write('//\n// op_par_loop declarations\n//\n')
                for k_iter in range(0, len(kernels_in_files[a - 1])):
                    k = kernels_in_files[a - 1][k_iter]
                    line = '\nvoid op_par_loop_' + \
                        kernels[k]['name'] + '(char const *, op_set,\n'
                    for n in range(1, kernels[k]['nargs']):
                        line = line + '  op_arg,\n'
                    line = line + '  op_arg );\n'
                    fid.write(line)

                fid.write('\n')
                loc_old = locs[loc] + header_len
                continue

            if locs[loc] in loc_loops:
                indent = indent + ' ' * len('op_par_loop')
                endofcall = text.find(';', locs[loc])
                curr_loop = loc_loops.index(locs[loc])
                name = loop_args[curr_loop]['name1']
                line = str(' op_par_loop_' + name + '(' +
                           loop_args[curr_loop]['name2'] + ',' +
                           loop_args[curr_loop]['set'] + ',\n' + indent)

                for arguments in range(0, loop_args[curr_loop]['nargs']):
                    elem = loop_args[curr_loop]['args'][arguments]
                    if elem['type'] == 'op_arg_dat':
                        line = line + elem['type'] + '(' + elem['dat'] + \
                            ',' + elem['idx'] + ',' + elem['map'] + \
                            ',' + elem['dim'] + ',' + elem['typ'] + \
                            ',' + elem['acc'] + '),\n' + indent
                    elif elem['type'] == 'op_arg_gbl':
                        line = line + elem['type'] + '(' + elem['data'] + \
                            ',' + elem['dim'] + ',' + elem['typ'] + \
                            ',' + elem['acc'] + '),\n' + indent

                fid.write(line[0:-len(indent) - 2] + ');')

                loc_old = endofcall + 1
                continue

            if locs[loc] in loc_consts:
                curr_const = loc_consts.index(locs[loc])
                endofcall = text.find(';', locs[loc])
                name = const_args[curr_const]['name']
                fid.write(indent[0:-2] + 'op_decl_const2("' + name.strip() +
                          '",' + str(const_args[curr_const]['dim']) + ',' +
                          const_args[curr_const]['type'] + ',' +
                          const_args[curr_const]['name2'].strip() + ');')
                loc_old = endofcall + 1
                continue
        print loc_old, len(text)
        fid.write(text[loc_old:])
        fid.close()

        f.close()
    # end of loop over input source files

    #  errors and warnings

    if ninit == 0:
        print' '
        print'-----------------------------'
        print'  ERROR: no call to op_init  '
        print'-----------------------------'

    if nexit == 0:
        print' '
        print'-------------------------------'
        print'  WARNING: no call to op_exit  '
        print'-------------------------------'

    if npart == 0 and nhdf5 > 0:
        print' '
        print'---------------------------------------------------'
        print'  WARNING: hdf5 calls without call to op_partition '
        print'---------------------------------------------------'

    #  finally, generate target-specific kernel files
    op2_gen_seq(str(sys.argv[1]), date, consts, kernels)
    op2_gen_openmp_simple(str(sys.argv[1]), date, consts, kernels)
    #op2_gen_cuda_simple_hyb(str(sys.argv[1]), date, consts, kernels,sets)
    op2_gen_cuda_simple(str(sys.argv[1]), date, consts, kernels,sets)
Example #4
0
def main(srcFilesAndDirs=sys.argv[1:]):

  # declare constants

  ninit = 0
  nexit = 0
  npart = 0
  nhdf5 = 0
  nconsts = 0
  nkernels = 0
  consts = []
  kernels = []
  sets = []
  kernels_in_files = []
  macro_defs = {}

  OP_ID = 1
  OP_GBL = 2
  OP_MAP = 3

  OP_READ = 1
  OP_WRITE = 2
  OP_RW = 3
  OP_INC = 4
  OP_MAX = 5
  OP_MIN = 6

  auto_soa=os.getenv('OP_AUTO_SOA','0')

  OP_accs_labels = ['OP_READ', 'OP_WRITE', 'OP_RW', 'OP_INC',
            'OP_MAX', 'OP_MIN']

  src_files = [s for s in srcFilesAndDirs if os.path.isfile(s)]
  src_dirs  = [d for d in srcFilesAndDirs if os.path.isdir(d)]

  ## Extract macro definitions:
  for src_file in src_files:
    print("Parsing file '" + src_file + "' for macro definitions.")
    with open(src_file, 'r') as f:
      text = f.read()

    local_defs = op_parse_macro_defs(text)
    for k in local_defs.keys():
      if (k in macro_defs) and (local_defs[k] != macro_defs[k]):
        msg = "WARNING: Have found two different definitions for macro '{}': '{}' and '{}'. Using the first definition.".format(k, macro_defs[k], local_defs[k])
        print(msg)
        continue
      else:
        macro_defs[k] = local_defs[k]
  self_evaluate_macro_defs(macro_defs)

  ## Loop over all input source files to search for op_par_loop calls
  kernels_in_files = [[] for _ in range(len(srcFilesAndDirs))]
  src_file_num = -1
  for src_file in src_files:
    src_file_num = src_file_num + 1
    print("Processing file " + str(src_file_num+1) + " of " + str(len(src_files)) + \
          ": " + src_file)
    with open(src_file, 'r') as f:
      text = f.read()

    any_soa = 0

    # check for op_init/op_exit/op_partition/op_hdf5 calls
    inits, exits, parts, hdf5s = op_parse_calls(text)

    if inits + exits + parts + hdf5s > 0:
      print ' '
    if inits > 0:
      print'contains op_init call'
      if auto_soa<>'0':
        text = append_init_soa(text)
    if exits > 0:
      print'contains op_exit call'
    if parts > 0:
      print'contains op_partition call'
    if hdf5s > 0:
      print'contains op_hdf5 calls'

    ninit = ninit + inits
    nexit = nexit + exits
    npart = npart + parts
    nhdf5 = nhdf5 + hdf5s

    # parse and process constants

    const_args = op_decl_const_parse(text)
    set_list = op_decl_set_parse(text)
    for i in range(0, len(set_list)):
      sets.append(set_list[i])

    # cleanup '&' symbols from name and convert dim to integer
    for i in range(0, len(const_args)):
      const_args[i]['dim'] = evaluate_macro_defs_in_string(macro_defs, const_args[i]['dim'])

      if const_args[i]['name'][0] == '&':
        const_args[i]['name'] = const_args[i]['name'][1:]
        const_args[i]['dim'] = int(const_args[i]['dim'])

    # check for repeats
    nconsts = 0
    for i in range(0, len(const_args)):
      repeat = 0
      name = const_args[i]['name']
      for c in range(0, nconsts):
        if const_args[i]['name'] == consts[c]['name']:
          repeat = 1
          if const_args[i]['type'] != consts[c]['type']:
            print 'type mismatch in repeated op_decl_const'
          if const_args[i]['dim'] != consts[c]['dim']:
            print 'size mismatch in repeated op_decl_const'

      if repeat > 0:
        print 'repeated global constant ' + const_args[i]['name']
      else:
        print '\nglobal constant (' + const_args[i]['name'].strip() \
            + ') of size ' + str(const_args[i]['dim'])

      # store away in master list
      if repeat == 0:
        nconsts = nconsts + 1
        temp = {'dim': const_args[i]['dim'],
            'type': const_args[i]['type'].strip(),
            'name': const_args[i]['name'].strip()}
        consts.append(temp)

    # parse and process op_par_loop calls

    loop_args = op_par_loop_parse(text)
    for i in range(0, len(loop_args)):
      name = loop_args[i]['name1']
      nargs = loop_args[i]['nargs']
      print '\nprocessing kernel ' + name + ' with ' + str(nargs) + ' arguments',

      # process arguments

      var = [''] * nargs
      idxs = [0] * nargs
      dims = [''] * nargs
      maps = [0] * nargs
      mapnames = ['']*nargs
      typs = [''] * nargs
      accs = [0] * nargs
      soaflags = [0] * nargs
      optflags = [0] * nargs
      any_opt = 0

      for m in range(0, nargs):
        argm = loop_args[i]['args'][m]
        argm['dim'] = evaluate_macro_defs_in_string(macro_defs, argm['dim'])

        arg_type = loop_args[i]['args'][m]['type']
        args = loop_args[i]['args'][m]

        if arg_type.strip() == 'op_arg_dat' or arg_type.strip() == 'op_opt_arg_dat':
          argm['idx'] = evaluate_macro_defs_in_string(macro_defs, argm['idx'])

        if arg_type.strip() == 'op_arg_dat' or arg_type.strip() == 'op_opt_arg_dat':
          var[m] = args['dat']
          idxs[m] = args['idx']
          if arg_type.strip() == 'op_opt_arg_dat':
            any_opt = 1

          if str(args['map']).strip() == 'OP_ID':
            maps[m] = OP_ID
            if int(idxs[m]) != -1:
              print 'invalid index for argument' + str(m)
          else:
            maps[m] = OP_MAP
            mapnames[m] = str(args['map']).strip()

          dims[m] = args['dim']
          soa_loc = args['typ'].find(':soa')
          if ((auto_soa=='1') and (((not dims[m].isdigit()) or int(dims[m])>1)) and (soa_loc < 0)):
            soa_loc = len(args['typ'])-1

          if soa_loc > 0:
            soaflags[m] = 1
            any_soa = 1
            typs[m] = args['typ'][1:soa_loc]
          else:
            typs[m] = args['typ'][1:-1]


          l = -1
          for l in range(0, len(OP_accs_labels)):
            if args['acc'].strip() == OP_accs_labels[l].strip():
              break

          if l == -1:
            print 'unknown access type for argument ' + str(m)
          else:
            accs[m] = l + 1

        if arg_type.strip() == 'op_opt_arg_dat':
          optflags[m] = 1
        else:
          optflags[m] = 0

        if arg_type.strip() == 'op_arg_gbl':
          maps[m] = OP_GBL
          var[m] = args['data']
          dims[m] = args['dim']
          typs[m] = args['typ'][1:-1]
          optflags[m] = 0

          l = -1
          for l in range(0, len(OP_accs_labels)):
            if args['acc'].strip() == OP_accs_labels[l].strip():
              break

          if l == -1:
            print 'unknown access type for argument ' + str(m)
          else:
            accs[m] = l + 1

        if (maps[m] == OP_GBL) and (accs[m] == OP_WRITE or accs[m] == OP_RW):
          print 'invalid access type for argument ' + str(m)

        if (maps[m] != OP_GBL) and (accs[m] == OP_MIN or accs[m] == OP_MAX):
          print 'invalid access type for argument ' + str(m)


      print ' '

      # identify indirect datasets

      ninds = 0
      inds = [0] * nargs
      invinds = [0] * nargs
      indtyps = [''] * nargs
      inddims = [''] * nargs
      indaccs = [0] * nargs
      invmapinds = [0]*nargs
      mapinds = [0]*nargs

      j = [i for i, x in enumerate(maps) if x == OP_MAP]

      while len(j) > 0:

        indtyps[ninds] = typs[j[0]]
        inddims[ninds] = dims[j[0]]
        indaccs[ninds] = accs[j[0]]
        invinds[ninds] = j[0]  # inverse mapping
        ninds = ninds + 1
        for i in range(0, len(j)):
          if var[j[0]] == var[j[i]] and typs[j[0]] == typs[j[i]] \
              and accs[j[0]] == accs[j[i]] and mapnames[j[0]] == mapnames[j[i]]:  # same variable
            inds[j[i]] = ninds

        k = []
        for i in range(0, len(j)):
          if not (var[j[0]] == var[j[i]] and typs[j[0]] == typs[j[i]]
              and accs[j[0]] == accs[j[i]] and mapnames[j[0]] == mapnames[j[i]]):  # same variable
            k = k + [j[i]]
        j = k

      if ninds > 0:
        invmapinds = invinds[:]
        for i in range(0, ninds):
          for j in range(0, i):
            if (mapnames[invinds[i]] == mapnames[invinds[j]]):
              invmapinds[i] = invmapinds[j]

        for i in range(0, nargs):
          mapinds[i] = i
          for j in range(0, i):
            if (maps[i] == OP_MAP) and (mapnames[i] == mapnames[j]) and (idxs[i] == idxs[j]):
              mapinds[i] = mapinds[j]

      # check for repeats

      repeat = False
      rep1 = False
      rep2 = False
      which_file = -1
      for nk in range(0, nkernels):
        rep1 = kernels[nk]['name'] == name and \
          kernels[nk]['nargs'] == nargs and \
          kernels[nk]['ninds'] == ninds
        if rep1:
          rep2 = True
          for arg in range(0, nargs):
            rep2 = rep2 and \
              kernels[nk]['dims'][arg] == dims[arg] and \
              kernels[nk]['maps'][arg] == maps[arg] and \
              kernels[nk]['typs'][arg] == typs[arg] and \
              kernels[nk]['accs'][arg] == accs[arg] and \
              kernels[nk]['idxs'][arg] == idxs[arg] and \
              kernels[nk]['soaflags'][arg] == soaflags[arg] and \
              kernels[nk]['optflags'][arg] == optflags[arg] and \
              kernels[nk]['inds'][arg] == inds[arg]

          for arg in range(0, ninds):
            rep2 = rep2 and \
              kernels[nk]['inddims'][arg] == inddims[arg] and \
              kernels[nk]['indaccs'][arg] == indaccs[arg] and \
              kernels[nk]['indtyps'][arg] == indtyps[arg] and \
              kernels[nk]['invinds'][arg] == invinds[arg]
          if rep2:
            print 'repeated kernel with compatible arguments: ' + \
                kernels[nk]['name'],
            repeat = True
            which_file = nk
          else:
            print 'repeated kernel with incompatible arguments: ERROR'
            break

      # output various diagnostics

      if not repeat:
        print '  local constants:',
        for arg in range(0, nargs):
          if maps[arg] == OP_GBL and accs[arg] == OP_READ:
            print str(arg),
        print '\n  global reductions:',
        for arg in range(0, nargs):
          if maps[arg] == OP_GBL and accs[arg] != OP_READ:
            print str(arg),
        print '\n  direct arguments:',
        for arg in range(0, nargs):
          if maps[arg] == OP_ID:
            print str(arg),
        print '\n  indirect arguments:',
        for arg in range(0, nargs):
          if maps[arg] == OP_MAP:
            print str(arg),
        if ninds > 0:
          print '\n  number of indirect datasets: ' + str(ninds),
        if any_opt:
          print '\n  optional arguments:',
          for arg in range(0, nargs):
            if optflags[arg] == 1:
              print str(arg),

        print '\n'

      # store away in master list

      if not repeat:
        nkernels = nkernels + 1
        temp = {'name': name,
            'nargs': nargs,
            'dims': dims,
            'maps': maps,
            'var': var,
            'typs': typs,
            'accs': accs,
            'idxs': idxs,
            'inds': inds,
            'soaflags': soaflags,
            'optflags': optflags,

            'ninds': ninds,
            'inddims': inddims,
            'indaccs': indaccs,
            'indtyps': indtyps,
            'invinds': invinds,
            'mapnames' : mapnames,
            'mapinds': mapinds,
            'invmapinds' : invmapinds}
        kernels.append(temp)
        (kernels_in_files[src_file_num]).append(nkernels - 1)
      else:
        append = 1
        for in_file in range(0, len(kernels_in_files[src_file_num])):
          if kernels_in_files[src_file_num][in_file] == which_file:
            append = 0
        if append == 1:
          (kernels_in_files[src_file_num]).append(which_file)

    # output new source file
    src_filename = os.path.basename(src_file)
    src_dirpath  = os.path.dirname(src_file)
    if src_dirpath[0:2] == "./":
      src_dirpath = src_dirpath[2:]

    op_extension = "_op"
    if '.' in src_filename:
      src_filename_pieces = src_filename.split('.')
      n = len(src_filename_pieces)
      src_filename_extension = src_filename_pieces[n-1]
      op_src_filename = '.'.join(src_filename_pieces[0:(n-1)]) + op_extension + '.' + src_filename_extension
    else:
      op_src_filename = src_filename + op_extension
    op_src_filepath = op_src_filename
    op_src_dirpath = ""
    if src_dirpath != "":
      src_dirpath_pieces = src_dirpath.split('/')
      root_dir = src_dirpath_pieces[0]
      if len(src_dirpath_pieces) == 0:
        rem_dirpath = ''
      else:
        rem_dirpath = '/'.join(src_dirpath_pieces[1:])
      op_src_dirpath = os.path.join(root_dir+"_op", rem_dirpath)
      op_src_filepath = os.path.join(op_src_dirpath, op_src_filename)

    if op_src_dirpath != "" and not os.path.exists(op_src_dirpath):
      os.makedirs(op_src_dirpath)
    fid = open(op_src_filepath, 'w')
    date = datetime.datetime.now()
    #fid.write('//\n// auto-generated by op2.py on ' +
    #          date.strftime("%Y-%m-%d %H:%M") + '\n//\n\n')
    fid.write('//\n// auto-generated by op2.py\n//\n\n')

    loc_old = 0

    # read original file and locate header location
    header_len = 11
    loc_header = [text.find("op_seq.h")]
    if loc_header[0] == -1:
      header_len = 13
      loc_header = [text.find("op_lib_cpp.h")]

    # get locations of all op_decl_consts
    n_consts = len(const_args)
    loc_consts = [0] * n_consts
    for n in range(0, n_consts):
      loc_consts[n] = const_args[n]['loc']

    # get locations of all op_par_loops
    n_loops = len(loop_args)
    loc_loops = [0] * n_loops
    for n in range(0, n_loops):
      loc_loops[n] = loop_args[n]['loc']

    locs = sorted(loc_header + loc_consts + loc_loops)

    # process header, loops and constants
    for loc in range(0, len(locs)):
      if locs[loc] != -1:
        fid.write(text[loc_old:locs[loc] - 1])
        loc_old = locs[loc] - 1

      indent = ''
      ind = 0
      while 1:
        if text[locs[loc] - ind] == '\n':
          break
        indent = indent + ' '
        ind = ind + 1

      if (locs[loc] in loc_header) and (locs[loc] != -1):
        fid.write(' "op_lib_cpp.h"\n\n')
        fid.write('//\n// op_par_loop declarations\n//\n')
        fid.write('#ifdef OPENACC\n#ifdef __cplusplus\nextern "C" {\n#endif\n#endif\n')
        for k_iter in range(0, len(kernels_in_files[src_file_num])):
          k = kernels_in_files[src_file_num][k_iter]
          line = '\nvoid op_par_loop_' + \
            kernels[k]['name'] + '(char const *, op_set,\n'
          for n in range(1, kernels[k]['nargs']):
            line = line + '  op_arg,\n'
          line = line + '  op_arg );\n'
          fid.write(line)

        fid.write('#ifdef OPENACC\n#ifdef __cplusplus\n}\n#endif\n#endif\n')
        fid.write('\n')
        loc_old = locs[loc] + header_len-1
        continue

      if locs[loc] in loc_loops:
        indent = indent + ' ' * len('op_par_loop')
        endofcall = text.find(';', locs[loc])
        curr_loop = loc_loops.index(locs[loc])
        name = loop_args[curr_loop]['name1']
        line = str(' op_par_loop_' + name + '(' +
               loop_args[curr_loop]['name2'] + ',' +
               loop_args[curr_loop]['set'] + ',\n' + indent)

        for arguments in range(0, loop_args[curr_loop]['nargs']):
          elem = loop_args[curr_loop]['args'][arguments]
          if elem['type'] == 'op_arg_dat':
            line = line + elem['type'] + '(' + elem['dat'] + \
              ',' + elem['idx'] + ',' + elem['map'] + \
              ',' + elem['dim'] + ',' + elem['typ'] + \
              ',' + elem['acc'] + '),\n' + indent
          elif elem['type'] == 'op_opt_arg_dat':
            line = line + elem['type'] + '(' \
                  + elem['opt'] + ',' + elem['dat'] + \
              ',' + elem['idx'] + ',' + elem['map'] + \
              ',' + elem['dim'] + ',' + elem['typ'] + \
              ',' + elem['acc'] + '),\n' + indent

          elif elem['type'] == 'op_arg_gbl':
            line = line + elem['type'] + '(' + elem['data'] + \
              ',' + elem['dim'] + ',' + elem['typ'] + \
              ',' + elem['acc'] + '),\n' + indent

        fid.write(line[0:-len(indent) - 2] + ');')

        loc_old = endofcall + 1
        continue

      if locs[loc] in loc_consts:
        curr_const = loc_consts.index(locs[loc])
        endofcall = text.find(';', locs[loc])
        name = const_args[curr_const]['name']
        fid.write(indent[0:-2] + 'op_decl_const2("' + name.strip() +
              '",' + str(const_args[curr_const]['dim']) + ',' +
              const_args[curr_const]['type'] + ',' +
              const_args[curr_const]['name2'].strip() + ');')
        loc_old = endofcall + 1
        continue

    fid.write(text[loc_old:])
    fid.close()
  # end of loop over input source files

  ## Loop over kernels, looking for a header file named after each 
  ## kernel in either working directory or one of the input-supplied 
  ## directories:
  for nk in xrange(0, len(kernels)):
    k_data = kernels[nk]
    k_name = k_data["name"]
    if not "decl_filepath" in k_data.keys():
      src_file = k_name + ".h"
      if os.path.isfile(src_file):
        with open(src_file, 'r') as f:
          text = f.read()
        if op_check_kernel_in_text(text, k_name):
          k_data["decl_filepath"] = src_file
          continue

      for dirname in src_dirs:
        filepath = os.path.join(dirname, src_file)
        if os.path.isfile(filepath):
          with open(filepath, 'r') as f:
            text = f.read()
          if op_check_kernel_in_text(text, k_name):
            k_data["decl_filepath"] = filepath
            break

  ## Any kernel declarations still not found must exist in files 
  ## not named after the kernel. Search through content of all 
  ## input-supplied files, and through all files of input-supplied 
  ## directories:
  for nk in xrange(0, len(kernels)):
    if not "decl_filepath" in kernels[nk].keys():
      k_data = kernels[nk]
      k_name = k_data["name"]

      for src_file in src_files:
        with open(src_file, 'r') as f:
          text = f.read()
        if op_check_kernel_in_text(text, k_name):
          k_data["decl_filepath"] = src_file
          break

      if not "decl_filepath" in k_data.keys():
        for src_dir in src_dirs:
          for src_dir_subfile in [s for s in os.listdir(src_dir) if os.path.isfile(os.path.join(src_dir, s))]:
            src_dir_subfilepath = os.path.join(src_dir, src_dir_subfile)
            with open(src_dir_subfilepath, 'r') as f:
              text = f.read()
            if op_check_kernel_in_text(text, k_name):
              k_data["decl_filepath"] = src_dir_subfilepath
              break
          if "decl_filepath" in k_data.keys():
            break

  fail = False
  for nk in xrange(0, len(kernels)):
    if not "decl_filepath" in kernels[nk].keys():
      fail = True
      print("Declaration not found for kernel " + kernels[nk]["name"])
  if fail:
    exit(2)

  #  errors and warnings

  if ninit == 0:
    print' '
    print'-----------------------------'
    print'  WARNING: no call to op_init'
    if auto_soa==1:
      print'  WARNING: code generated with OP_AUTO_SOA,\n but couldn\'t modify op_init to pass\n an additional parameter of 1.\n Please make sure OP_AUTO_SOA is set when executing'
    print'-----------------------------'

  if nexit == 0:
    print' '
    print'-------------------------------'
    print'  WARNING: no call to op_exit  '
    print'-------------------------------'

  if npart == 0 and nhdf5 > 0:
    print' '
    print'---------------------------------------------------'
    print'  WARNING: hdf5 calls without call to op_partition '
    print'---------------------------------------------------'

  #
  #  finally, generate target-specific kernel files
  #
  masterFile = str(srcFilesAndDirs[0])

  op2_gen_seq(masterFile, date, consts, kernels) # MPI+GENSEQ version - initial version, no vectorisation
  #op2_gen_mpi_vec(masterFile, date, consts, kernels) # MPI+GENSEQ with code that gets auto vectorised with intel compiler (version 15.0 and above)

  #code generators for OpenMP parallelisation with MPI
  #op2_gen_openmp(masterFile, date, consts, kernels) # Initial OpenMP code generator
  op2_gen_openmp_simple(masterFile, date, consts, kernels) # Simplified and Optimized OpenMP code generator
  op2_gen_openacc(masterFile, date, consts, kernels) # Simplified and Optimized OpenMP code generator

  #code generators for NVIDIA GPUs with CUDA
  #op2_gen_cuda(masterFile, date, consts, kernels,sets) # Optimized for Fermi GPUs
  op2_gen_cuda_simple(masterFile, date, consts, kernels, sets, macro_defs) # Optimized for Kepler GPUs

  # generates openmp code as well as cuda code into the same file
  op2_gen_cuda_simple_hyb(masterFile, date, consts, kernels, sets) # CPU and GPU will then do comutations as a hybrid application

  #code generator for GPUs with OpenMP4.5
  op2_gen_openmp4(masterFile, date, consts, kernels)