def main(srcFilesAndDirs=sys.argv[1:]): # declare constants ninit = 0 nexit = 0 npart = 0 nhdf5 = 0 nconsts = 0 nkernels = 0 consts = [] kernels = [] sets = [] kernels_in_files = [] macro_defs = {} OP_ID = 1 OP_GBL = 2 OP_MAP = 3 OP_READ = 1 OP_WRITE = 2 OP_RW = 3 OP_INC = 4 OP_MAX = 5 OP_MIN = 6 auto_soa = os.getenv('OP_AUTO_SOA', '0') OP_accs_labels = [ 'OP_READ', 'OP_WRITE', 'OP_RW', 'OP_INC', 'OP_MAX', 'OP_MIN' ] src_files = [s for s in srcFilesAndDirs if os.path.isfile(s)] src_dirs = [d for d in srcFilesAndDirs if os.path.isdir(d)] ## Extract macro definitions: for src_file in src_files: print("Parsing file '" + src_file + "' for macro definitions.") with open(src_file, 'r') as f: text = f.read() local_defs = op_parse_macro_defs(text) for k in local_defs.keys(): if (k in macro_defs) and (local_defs[k] != macro_defs[k]): msg = "WARNING: Have found two different definitions for macro '{}': '{}' and '{}'. Using the first definition.".format( k, macro_defs[k], local_defs[k]) print(msg) continue else: macro_defs[k] = local_defs[k] self_evaluate_macro_defs(macro_defs) ## Identify global variables already declared as 'extern': declared_globals = [] for src_file in src_files: with open(src_file, 'r') as f: text = f.read() declared_globals += extract_declared_globals(text) ## Loop over all input source files to search for op_par_loop calls kernels_in_files = [[] for _ in range(len(srcFilesAndDirs))] src_file_num = -1 for src_file in src_files: src_file_num = src_file_num + 1 print("Processing file " + str(src_file_num+1) + " of " + str(len(src_files)) + \ ": " + src_file) with open(src_file, 'r') as f: text = f.read() any_soa = 0 # check for op_init/op_exit/op_partition/op_hdf5 calls inits, exits, parts, hdf5s = op_parse_calls(text) if inits + exits + parts + hdf5s > 0: print ' ' if inits > 0: print 'contains op_init call' if auto_soa <> '0': text = append_init_soa(text) if exits > 0: print 'contains op_exit call' if parts > 0: print 'contains op_partition call' if hdf5s > 0: print 'contains op_hdf5 calls' ninit = ninit + inits nexit = nexit + exits npart = npart + parts nhdf5 = nhdf5 + hdf5s # parse and process constants const_args = op_decl_const_parse(text) set_list = op_decl_set_parse(text) for i in range(0, len(set_list)): sets.append(set_list[i]) # cleanup '&' symbols from name and convert dim to integer for i in range(0, len(const_args)): const_args[i]['dim'] = evaluate_macro_defs_in_string( macro_defs, const_args[i]['dim']) if const_args[i]['name'][0] == '&': const_args[i]['name'] = const_args[i]['name'][1:] const_args[i]['dim'] = int(const_args[i]['dim']) # check for repeats nconsts = 0 for i in range(0, len(const_args)): repeat = 0 name = const_args[i]['name'] for c in range(0, nconsts): if const_args[i]['name'] == consts[c]['name']: repeat = 1 if const_args[i]['type'] != consts[c]['type']: print 'type mismatch in repeated op_decl_const' if const_args[i]['dim'] != consts[c]['dim']: print 'size mismatch in repeated op_decl_const' if repeat > 0: print 'repeated global constant ' + const_args[i]['name'] else: print '\nglobal constant (' + const_args[i]['name'].strip() \ + ') of size ' + str(const_args[i]['dim']) # store away in master list if repeat == 0: nconsts = nconsts + 1 temp = { 'dim': const_args[i]['dim'], 'type': const_args[i]['type'].strip(), 'name': const_args[i]['name'].strip() } temp["user_declared"] = temp["name"] in declared_globals consts.append(temp) # parse and process op_par_loop calls loop_args = op_par_loop_parse(text) for i in range(0, len(loop_args)): name = loop_args[i]['name1'] nargs = loop_args[i]['nargs'] print '\nprocessing kernel ' + name + ' with ' + str( nargs) + ' arguments', # process arguments var = [''] * nargs idxs = [0] * nargs dims = [''] * nargs maps = [0] * nargs mapnames = [''] * nargs typs = [''] * nargs accs = [0] * nargs soaflags = [0] * nargs optflags = [0] * nargs any_opt = 0 for m in range(0, nargs): argm = loop_args[i]['args'][m] argm['dim'] = evaluate_macro_defs_in_string( macro_defs, argm['dim']) arg_type = loop_args[i]['args'][m]['type'] args = loop_args[i]['args'][m] if arg_type.strip() == 'op_arg_dat' or arg_type.strip( ) == 'op_opt_arg_dat': argm['idx'] = evaluate_macro_defs_in_string( macro_defs, argm['idx']) if arg_type.strip() == 'op_arg_dat' or arg_type.strip( ) == 'op_opt_arg_dat': var[m] = args['dat'] idxs[m] = args['idx'] if arg_type.strip() == 'op_opt_arg_dat': any_opt = 1 if str(args['map']).strip() == 'OP_ID': maps[m] = OP_ID if int(idxs[m]) != -1: print 'invalid index for argument' + str(m) else: maps[m] = OP_MAP mapnames[m] = str(args['map']).strip() dims[m] = args['dim'] soa_loc = args['typ'].find(':soa') if ((auto_soa == '1') and (((not dims[m].isdigit()) or int(dims[m]) > 1)) and (soa_loc < 0)): soa_loc = len(args['typ']) - 1 if soa_loc > 0: soaflags[m] = 1 any_soa = 1 typs[m] = args['typ'][1:soa_loc] else: typs[m] = args['typ'][1:-1] l = -1 for l in range(0, len(OP_accs_labels)): if args['acc'].strip() == OP_accs_labels[l].strip(): break if l == -1: print 'unknown access type for argument ' + str(m) else: accs[m] = l + 1 if arg_type.strip() == 'op_opt_arg_dat': optflags[m] = 1 else: optflags[m] = 0 if arg_type.strip() == 'op_arg_gbl': maps[m] = OP_GBL var[m] = args['data'] dims[m] = args['dim'] typs[m] = args['typ'][1:-1] optflags[m] = 0 l = -1 for l in range(0, len(OP_accs_labels)): if args['acc'].strip() == OP_accs_labels[l].strip(): break if l == -1: print 'unknown access type for argument ' + str(m) else: accs[m] = l + 1 if (maps[m] == OP_GBL) and (accs[m] == OP_WRITE or accs[m] == OP_RW): print 'invalid access type for argument ' + str(m) if (maps[m] != OP_GBL) and (accs[m] == OP_MIN or accs[m] == OP_MAX): print 'invalid access type for argument ' + str(m) print ' ' # identify indirect datasets ninds = 0 inds = [0] * nargs invinds = [0] * nargs indtyps = [''] * nargs inddims = [''] * nargs indaccs = [0] * nargs invmapinds = [0] * nargs mapinds = [0] * nargs j = [i for i, x in enumerate(maps) if x == OP_MAP] while len(j) > 0: indtyps[ninds] = typs[j[0]] inddims[ninds] = dims[j[0]] indaccs[ninds] = accs[j[0]] invinds[ninds] = j[0] # inverse mapping ninds = ninds + 1 for i in range(0, len(j)): if var[j[0]] == var[j[i]] and typs[j[0]] == typs[j[i]] \ and accs[j[0]] == accs[j[i]] and mapnames[j[0]] == mapnames[j[i]]: # same variable inds[j[i]] = ninds k = [] for i in range(0, len(j)): if not (var[j[0]] == var[j[i]] and typs[j[0]] == typs[j[i]] and accs[j[0]] == accs[j[i]] and mapnames[j[0]] == mapnames[j[i]]): # same variable k = k + [j[i]] j = k if ninds > 0: invmapinds = invinds[:] for i in range(0, ninds): for j in range(0, i): if (mapnames[invinds[i]] == mapnames[invinds[j]]): invmapinds[i] = invmapinds[j] for i in range(0, nargs): mapinds[i] = i for j in range(0, i): if (maps[i] == OP_MAP) and (mapnames[i] == mapnames[j] ) and (idxs[i] == idxs[j]): mapinds[i] = mapinds[j] # check for repeats repeat = False rep1 = False rep2 = False which_file = -1 for nk in range(0, nkernels): rep1 = kernels[nk]['name'] == name and \ kernels[nk]['nargs'] == nargs and \ kernels[nk]['ninds'] == ninds if rep1: rep2 = True for arg in range(0, nargs): rep2 = rep2 and \ kernels[nk]['dims'][arg] == dims[arg] and \ kernels[nk]['maps'][arg] == maps[arg] and \ kernels[nk]['typs'][arg] == typs[arg] and \ kernels[nk]['accs'][arg] == accs[arg] and \ kernels[nk]['idxs'][arg] == idxs[arg] and \ kernels[nk]['soaflags'][arg] == soaflags[arg] and \ kernels[nk]['optflags'][arg] == optflags[arg] and \ kernels[nk]['inds'][arg] == inds[arg] for arg in range(0, ninds): rep2 = rep2 and \ kernels[nk]['inddims'][arg] == inddims[arg] and \ kernels[nk]['indaccs'][arg] == indaccs[arg] and \ kernels[nk]['indtyps'][arg] == indtyps[arg] and \ kernels[nk]['invinds'][arg] == invinds[arg] if rep2: print 'repeated kernel with compatible arguments: ' + \ kernels[nk]['name'], repeat = True which_file = nk else: print 'repeated kernel with incompatible arguments: ERROR' break # output various diagnostics if not repeat: print ' local constants:', for arg in range(0, nargs): if maps[arg] == OP_GBL and accs[arg] == OP_READ: print str(arg), print '\n global reductions:', for arg in range(0, nargs): if maps[arg] == OP_GBL and accs[arg] != OP_READ: print str(arg), print '\n direct arguments:', for arg in range(0, nargs): if maps[arg] == OP_ID: print str(arg), print '\n indirect arguments:', for arg in range(0, nargs): if maps[arg] == OP_MAP: print str(arg), if ninds > 0: print '\n number of indirect datasets: ' + str(ninds), if any_opt: print '\n optional arguments:', for arg in range(0, nargs): if optflags[arg] == 1: print str(arg), print '\n' # store away in master list if not repeat: nkernels = nkernels + 1 temp = { 'name': name, 'nargs': nargs, 'dims': dims, 'maps': maps, 'var': var, 'typs': typs, 'accs': accs, 'idxs': idxs, 'inds': inds, 'soaflags': soaflags, 'optflags': optflags, 'ninds': ninds, 'inddims': inddims, 'indaccs': indaccs, 'indtyps': indtyps, 'invinds': invinds, 'mapnames': mapnames, 'mapinds': mapinds, 'invmapinds': invmapinds } kernels.append(temp) (kernels_in_files[src_file_num]).append(nkernels - 1) else: append = 1 for in_file in range(0, len(kernels_in_files[src_file_num])): if kernels_in_files[src_file_num][in_file] == which_file: append = 0 if append == 1: (kernels_in_files[src_file_num]).append(which_file) # output new source file src_filename = os.path.basename(src_file) src_dirpath = os.path.dirname(src_file) if src_dirpath[0:2] == "./": src_dirpath = src_dirpath[2:] op_extension = "_op" if '.' in src_filename: src_filename_pieces = src_filename.split('.') n = len(src_filename_pieces) src_filename_extension = src_filename_pieces[n - 1] op_src_filename = '.'.join(src_filename_pieces[0:( n - 1)]) + op_extension + '.' + src_filename_extension else: op_src_filename = src_filename + op_extension op_src_filepath = op_src_filename op_src_dirpath = "" if src_dirpath != "": src_dirpath_pieces = src_dirpath.split('/') root_dir = src_dirpath_pieces[0] if len(src_dirpath_pieces) == 0: rem_dirpath = '' else: rem_dirpath = '/'.join(src_dirpath_pieces[1:]) op_src_dirpath = os.path.join(root_dir + "_op", rem_dirpath) op_src_filepath = os.path.join(op_src_dirpath, op_src_filename) if op_src_dirpath != "" and not os.path.exists(op_src_dirpath): os.makedirs(op_src_dirpath) fid = open(op_src_filepath, 'w') date = datetime.datetime.now() #fid.write('//\n// auto-generated by op2.py on ' + # date.strftime("%Y-%m-%d %H:%M") + '\n//\n\n') fid.write('//\n// auto-generated by op2.py\n//\n\n') loc_old = 0 # read original file and locate header location header_len = 11 loc_header = [text.find("op_seq.h")] if loc_header[0] == -1: header_len = 13 loc_header = [text.find("op_lib_cpp.h")] # get locations of all op_decl_consts n_consts = len(const_args) loc_consts = [0] * n_consts for n in range(0, n_consts): loc_consts[n] = const_args[n]['loc'] # get locations of all op_par_loops n_loops = len(loop_args) loc_loops = [0] * n_loops for n in range(0, n_loops): loc_loops[n] = loop_args[n]['loc'] locs = sorted(loc_header + loc_consts + loc_loops) # process header, loops and constants for loc in range(0, len(locs)): if locs[loc] != -1: fid.write(text[loc_old:locs[loc] - 1]) loc_old = locs[loc] - 1 indent = '' ind = 0 while 1: if text[locs[loc] - ind] == '\n': break indent = indent + ' ' ind = ind + 1 if (locs[loc] in loc_header) and (locs[loc] != -1): fid.write(' "op_lib_cpp.h"\n\n') fid.write('//\n// op_par_loop declarations\n//\n') fid.write( '#ifdef OPENACC\n#ifdef __cplusplus\nextern "C" {\n#endif\n#endif\n' ) for k_iter in range(0, len(kernels_in_files[src_file_num])): k = kernels_in_files[src_file_num][k_iter] line = '\nvoid op_par_loop_' + \ kernels[k]['name'] + '(char const *, op_set,\n' for n in range(1, kernels[k]['nargs']): line = line + ' op_arg,\n' line = line + ' op_arg );\n' fid.write(line) fid.write( '#ifdef OPENACC\n#ifdef __cplusplus\n}\n#endif\n#endif\n') fid.write('\n') loc_old = locs[loc] + header_len - 1 continue if locs[loc] in loc_loops: indent = indent + ' ' * len('op_par_loop') endofcall = text.find(';', locs[loc]) curr_loop = loc_loops.index(locs[loc]) name = loop_args[curr_loop]['name1'] line = str(' op_par_loop_' + name + '(' + loop_args[curr_loop]['name2'] + ',' + loop_args[curr_loop]['set'] + ',\n' + indent) for arguments in range(0, loop_args[curr_loop]['nargs']): elem = loop_args[curr_loop]['args'][arguments] if elem['type'] == 'op_arg_dat': line = line + elem['type'] + '(' + elem['dat'] + \ ',' + elem['idx'] + ',' + elem['map'] + \ ',' + elem['dim'] + ',' + elem['typ'] + \ ',' + elem['acc'] + '),\n' + indent elif elem['type'] == 'op_opt_arg_dat': line = line + elem['type'] + '(' \ + elem['opt'] + ',' + elem['dat'] + \ ',' + elem['idx'] + ',' + elem['map'] + \ ',' + elem['dim'] + ',' + elem['typ'] + \ ',' + elem['acc'] + '),\n' + indent elif elem['type'] == 'op_arg_gbl': line = line + elem['type'] + '(' + elem['data'] + \ ',' + elem['dim'] + ',' + elem['typ'] + \ ',' + elem['acc'] + '),\n' + indent fid.write(line[0:-len(indent) - 2] + ');') loc_old = endofcall + 1 continue if locs[loc] in loc_consts: curr_const = loc_consts.index(locs[loc]) endofcall = text.find(';', locs[loc]) name = const_args[curr_const]['name'] fid.write(indent[0:-2] + 'op_decl_const2("' + name.strip() + '",' + str(const_args[curr_const]['dim']) + ',' + const_args[curr_const]['type'] + ',' + const_args[curr_const]['name2'].strip() + ');') loc_old = endofcall + 1 continue fid.write(text[loc_old:]) fid.close() # end of loop over input source files ## Loop over kernels, looking for a header file named after each ## kernel in either working directory or one of the input-supplied ## directories: for nk in xrange(0, len(kernels)): k_data = kernels[nk] k_name = k_data["name"] if not "decl_filepath" in k_data.keys(): src_file = k_name + ".h" if os.path.isfile(src_file): with open(src_file, 'r') as f: text = f.read() if op_check_kernel_in_text(text, k_name): k_data["decl_filepath"] = src_file continue for dirname in src_dirs: filepath = os.path.join(dirname, src_file) if os.path.isfile(filepath): with open(filepath, 'r') as f: text = f.read() if op_check_kernel_in_text(text, k_name): k_data["decl_filepath"] = filepath break ## Any kernel declarations still not found must exist in files ## not named after the kernel. Search through content of all ## input-supplied files, and through all files of input-supplied ## directories: for nk in xrange(0, len(kernels)): if not "decl_filepath" in kernels[nk].keys(): k_data = kernels[nk] k_name = k_data["name"] for src_file in src_files: with open(src_file, 'r') as f: text = f.read() if op_check_kernel_in_text(text, k_name): k_data["decl_filepath"] = src_file break if not "decl_filepath" in k_data.keys(): for src_dir in src_dirs: for src_dir_subfile in [ s for s in os.listdir(src_dir) if os.path.isfile(os.path.join(src_dir, s)) ]: src_dir_subfilepath = os.path.join( src_dir, src_dir_subfile) with open(src_dir_subfilepath, 'r') as f: text = f.read() if op_check_kernel_in_text(text, k_name): k_data["decl_filepath"] = src_dir_subfilepath break if "decl_filepath" in k_data.keys(): break fail = False for nk in xrange(0, len(kernels)): if not "decl_filepath" in kernels[nk].keys(): fail = True print("Declaration not found for kernel " + kernels[nk]["name"]) if fail: exit(2) # errors and warnings if ninit == 0: print ' ' print '-----------------------------' print ' WARNING: no call to op_init' if auto_soa == 1: print ' WARNING: code generated with OP_AUTO_SOA,\n but couldn\'t modify op_init to pass\n an additional parameter of 1.\n Please make sure OP_AUTO_SOA is set when executing' print '-----------------------------' if nexit == 0: print ' ' print '-------------------------------' print ' WARNING: no call to op_exit ' print '-------------------------------' if npart == 0 and nhdf5 > 0: print ' ' print '---------------------------------------------------' print ' WARNING: hdf5 calls without call to op_partition ' print '---------------------------------------------------' # # finally, generate target-specific kernel files # masterFile = str(srcFilesAndDirs[0]) op2_gen_seq( masterFile, date, consts, kernels) # MPI+GENSEQ version - initial version, no vectorisation # Vec translator is not yet ready for release, eg it cannot translate the 'aero' app. op2_gen_mpi_vec( masterFile, date, consts, kernels ) # MPI+GENSEQ with code that gets auto vectorised with intel compiler (version 15.0 and above) #code generators for OpenMP parallelisation with MPI #op2_gen_openmp(masterFile, date, consts, kernels) # Initial OpenMP code generator op2_gen_openmp_simple( masterFile, date, consts, kernels) # Simplified and Optimized OpenMP code generator op2_gen_openacc(masterFile, date, consts, kernels) # Simplified and Optimized OpenMP code generator #code generators for NVIDIA GPUs with CUDA #op2_gen_cuda(masterFile, date, consts, kernels,sets) # Optimized for Fermi GPUs op2_gen_cuda_simple(masterFile, date, consts, kernels, sets, macro_defs) # Optimized for Kepler GPUs # generates openmp code as well as cuda code into the same file op2_gen_cuda_simple_hyb( masterFile, date, consts, kernels, sets) # CPU and GPU will then do comutations as a hybrid application #code generator for GPUs with OpenMP4.5 op2_gen_openmp4(masterFile, date, consts, kernels)
def main(): # declare constants ninit = 0 nexit = 0 npart = 0 nhdf5 = 0 nconsts = 0 nkernels = 0 consts = [] kernels = [] sets = [] kernels_in_files = [] OP_ID = 1 OP_GBL = 2 OP_MAP = 3 OP_READ = 1 OP_WRITE = 2 OP_RW = 3 OP_INC = 4 OP_MAX = 5 OP_MIN = 6 auto_soa=os.getenv('OP_AUTO_SOA','0') OP_accs_labels = ['OP_READ', 'OP_WRITE', 'OP_RW', 'OP_INC', 'OP_MAX', 'OP_MIN'] # loop over all input source files kernels_in_files = [[] for _ in range(len(sys.argv) - 1)] for a in range(1, len(sys.argv)): print 'processing file ' + str(a) + ' of ' + str(len(sys.argv) - 1) + \ ' ' + str(sys.argv[a]) src_file = str(sys.argv[a]) f = open(src_file, 'r') text = f.read() any_soa = 0 # check for op_init/op_exit/op_partition/op_hdf5 calls inits, exits, parts, hdf5s = op_parse_calls(text) if inits + exits + parts + hdf5s > 0: print ' ' if inits > 0: print'contains op_init call' if auto_soa<>'0': text = append_init_soa(text) if exits > 0: print'contains op_exit call' if parts > 0: print'contains op_partition call' if hdf5s > 0: print'contains op_hdf5 calls' ninit = ninit + inits nexit = nexit + exits npart = npart + parts nhdf5 = nhdf5 + hdf5s # parse and process constants const_args = op_decl_const_parse(text) set_list = op_decl_set_parse(text) for i in range(0,len(set_list)): sets.append(set_list[i]) # cleanup '&' symbols from name and convert dim to integer for i in range(0, len(const_args)): if const_args[i]['name'][0] == '&': const_args[i]['name'] = const_args[i]['name'][1:] const_args[i]['dim'] = int(const_args[i]['dim']) # check for repeats nconsts = 0 for i in range(0, len(const_args)): repeat = 0 name = const_args[i]['name'] for c in range(0, nconsts): if const_args[i]['name'] == consts[c]['name']: repeat = 1 if const_args[i]['type'] != consts[c]['type']: print 'type mismatch in repeated op_decl_const' if const_args[i]['dim'] != consts[c]['dim']: print 'size mismatch in repeated op_decl_const' if repeat > 0: print 'repeated global constant ' + const_args[i]['name'] else: print '\nglobal constant (' + const_args[i]['name'].strip() \ + ') of size ' + str(const_args[i]['dim']) # store away in master list if repeat == 0: nconsts = nconsts + 1 temp = {'dim': const_args[i]['dim'], 'type': const_args[i]['type'].strip(), 'name': const_args[i]['name'].strip()} consts.append(temp) # parse and process op_par_loop calls loop_args = op_par_loop_parse(text) for i in range(0, len(loop_args)): name = loop_args[i]['name1'] nargs = loop_args[i]['nargs'] print '\nprocessing kernel ' + name + ' with ' + str(nargs) + ' arguments', # process arguments var = [''] * nargs idxs = [0] * nargs dims = [''] * nargs maps = [0] * nargs mapnames = ['']*nargs typs = [''] * nargs accs = [0] * nargs soaflags = [0] * nargs for m in range(0, nargs): arg_type = loop_args[i]['args'][m]['type'] args = loop_args[i]['args'][m] if arg_type.strip() == 'op_arg_dat': var[m] = args['dat'] idxs[m] = args['idx'] if str(args['map']).strip() == 'OP_ID': maps[m] = OP_ID if int(idxs[m]) != -1: print 'invalid index for argument' + str(m) else: maps[m] = OP_MAP mapnames[m] = str(args['map']).strip() dims[m] = args['dim'] soa_loc = args['typ'].find(':soa') if ((auto_soa=='1') and (((not dims[m].isdigit()) or int(dims[m])>1)) and (soa_loc < 0)): soa_loc = len(args['typ'])-1 if soa_loc > 0: soaflags[m] = 1 any_soa = 1 typs[m] = args['typ'][1:soa_loc] else: typs[m] = args['typ'][1:-1] l = -1 for l in range(0, len(OP_accs_labels)): if args['acc'].strip() == OP_accs_labels[l].strip(): break if l == -1: print 'unknown access type for argument ' + str(m) else: accs[m] = l + 1 if arg_type.strip() == 'op_arg_gbl': maps[m] = OP_GBL var[m] = args['data'] dims[m] = args['dim'] typs[m] = args['typ'][1:-1] l = -1 for l in range(0, len(OP_accs_labels)): if args['acc'].strip() == OP_accs_labels[l].strip(): break if l == -1: print 'unknown access type for argument ' + str(m) else: accs[m] = l + 1 if (maps[m] == OP_GBL) and (accs[m] == OP_WRITE or accs[m] == OP_RW): print 'invalid access type for argument ' + str(m) if (maps[m] != OP_GBL) and (accs[m] == OP_MIN or accs[m] == OP_MAX): print 'invalid access type for argument ' + str(m) print ' ' # identify indirect datasets ninds = 0 inds = [0] * nargs invinds = [0] * nargs indtyps = [''] * nargs inddims = [''] * nargs indaccs = [0] * nargs invmapinds = [0]*nargs mapinds = [0]*nargs j = [i for i, x in enumerate(maps) if x == OP_MAP] while len(j) > 0: indtyps[ninds] = typs[j[0]] inddims[ninds] = dims[j[0]] indaccs[ninds] = accs[j[0]] invinds[ninds] = j[0] # inverse mapping ninds = ninds + 1 for i in range(0, len(j)): if var[j[0]] == var[j[i]] and typs[j[0]] == typs[j[i]] \ and accs[j[0]] == accs[j[i]] and mapnames[j[0]] == mapnames[j[i]]: # same variable inds[j[i]] = ninds k = [] for i in range(0, len(j)): if not (var[j[0]] == var[j[i]] and typs[j[0]] == typs[j[i]] and accs[j[0]] == accs[j[i]] and mapnames[j[0]] == mapnames[j[i]]): # same variable k = k + [j[i]] j = k if ninds > 0: invmapinds = invinds[:] for i in range(0,ninds): for j in range(0,i): if (mapnames[invinds[i]] == mapnames[invinds[j]]): invmapinds[i] = invmapinds[j] for i in range(0,nargs): mapinds[i] = i for j in range(0,i): if (maps[i] == OP_MAP) and (mapnames[i] == mapnames[j]) and (idxs[i] == idxs[j]): mapinds[i] = mapinds[j] # check for repeats repeat = False rep1 = False rep2 = False which_file = -1 for nk in range(0, nkernels): rep1 = kernels[nk]['name'] == name and \ kernels[nk]['nargs'] == nargs and \ kernels[nk]['ninds'] == ninds if rep1: rep2 = True for arg in range(0, nargs): rep2 = rep2 and \ kernels[nk]['dims'][arg] == dims[arg] and \ kernels[nk]['maps'][arg] == maps[arg] and \ kernels[nk]['typs'][arg] == typs[arg] and \ kernels[nk]['accs'][arg] == accs[arg] and \ kernels[nk]['idxs'][arg] == idxs[arg] and \ kernels[nk]['soaflags'][arg] == soaflags[arg] and \ kernels[nk]['inds'][arg] == inds[arg] for arg in range(0, ninds): rep2 = rep2 and \ kernels[nk]['inddims'][arg] == inddims[arg] and \ kernels[nk]['indaccs'][arg] == indaccs[arg] and \ kernels[nk]['indtyps'][arg] == indtyps[arg] and \ kernels[nk]['invinds'][arg] == invinds[arg] if rep2: print 'repeated kernel with compatible arguments: ' + \ kernels[nk]['name'], repeat = True which_file = nk else: print 'repeated kernel with incompatible arguments: ERROR' break # output various diagnostics if not repeat: print ' local constants:', for arg in range(0, nargs): if maps[arg] == OP_GBL and accs[arg] == OP_READ: print str(arg), print '\n global reductions:', for arg in range(0, nargs): if maps[arg] == OP_GBL and accs[arg] != OP_READ: print str(arg), print '\n direct arguments:', for arg in range(0, nargs): if maps[arg] == OP_ID: print str(arg), print '\n indirect arguments:', for arg in range(0, nargs): if maps[arg] == OP_MAP: print str(arg), if ninds > 0: print '\n number of indirect datasets: ' + str(ninds), print '\n' # store away in master list if not repeat: nkernels = nkernels + 1 temp = {'name': name, 'nargs': nargs, 'dims': dims, 'maps': maps, 'var': var, 'typs': typs, 'accs': accs, 'idxs': idxs, 'inds': inds, 'soaflags': soaflags, 'ninds': ninds, 'inddims': inddims, 'indaccs': indaccs, 'indtyps': indtyps, 'invinds': invinds, 'mapnames' : mapnames, 'mapinds': mapinds, 'invmapinds' : invmapinds} kernels.append(temp) (kernels_in_files[a - 1]).append(nkernels - 1) else: append = 1 for in_file in range(0, len(kernels_in_files[a - 1])): if kernels_in_files[a - 1][in_file] == which_file: append = 0 if append == 1: (kernels_in_files[a - 1]).append(which_file) # output new source file fid = open(src_file.split('.')[0] + '_op.cpp', 'w') date = datetime.datetime.now() #fid.write('//\n// auto-generated by op2.py on ' + # date.strftime("%Y-%m-%d %H:%M") + '\n//\n\n') fid.write('//\n// auto-generated by op2.py\n//\n\n') loc_old = 0 # read original file and locate header location header_len = 11 loc_header = [text.find("op_seq.h")] if loc_header[0] == -1: header_len = 13 loc_header = [text.find("op_lib_cpp.h")] # get locations of all op_decl_consts n_consts = len(const_args) loc_consts = [0] * n_consts for n in range(0, n_consts): loc_consts[n] = const_args[n]['loc'] # get locations of all op_par_loops n_loops = len(loop_args) loc_loops = [0] * n_loops for n in range(0, n_loops): loc_loops[n] = loop_args[n]['loc'] locs = sorted(loc_header + loc_consts + loc_loops) # process header, loops and constants for loc in range(0, len(locs)): if locs[loc] != -1: fid.write(text[loc_old:locs[loc] - 1]) loc_old = locs[loc] - 1 indent = '' ind = 0 while 1: if text[locs[loc] - ind] == '\n': break indent = indent + ' ' ind = ind + 1 if (locs[loc] in loc_header) and (locs[loc] != -1): fid.write(' "op_lib_cpp.h"\n\n') fid.write('//\n// op_par_loop declarations\n//\n') fid.write('#ifdef OPENACC\n#ifdef __cplusplus\nextern "C" {\n#endif\n#endif\n') for k_iter in range(0, len(kernels_in_files[a - 1])): k = kernels_in_files[a - 1][k_iter] line = '\nvoid op_par_loop_' + \ kernels[k]['name'] + '(char const *, op_set,\n' for n in range(1, kernels[k]['nargs']): line = line + ' op_arg,\n' line = line + ' op_arg );\n' fid.write(line) fid.write('#ifdef OPENACC\n#ifdef __cplusplus\n}\n#endif\n#endif\n') fid.write('\n') loc_old = locs[loc] + header_len-1 continue if locs[loc] in loc_loops: indent = indent + ' ' * len('op_par_loop') endofcall = text.find(';', locs[loc]) curr_loop = loc_loops.index(locs[loc]) name = loop_args[curr_loop]['name1'] line = str(' op_par_loop_' + name + '(' + loop_args[curr_loop]['name2'] + ',' + loop_args[curr_loop]['set'] + ',\n' + indent) for arguments in range(0, loop_args[curr_loop]['nargs']): elem = loop_args[curr_loop]['args'][arguments] if elem['type'] == 'op_arg_dat': line = line + elem['type'] + '(' + elem['dat'] + \ ',' + elem['idx'] + ',' + elem['map'] + \ ',' + elem['dim'] + ',' + elem['typ'] + \ ',' + elem['acc'] + '),\n' + indent elif elem['type'] == 'op_arg_gbl': line = line + elem['type'] + '(' + elem['data'] + \ ',' + elem['dim'] + ',' + elem['typ'] + \ ',' + elem['acc'] + '),\n' + indent fid.write(line[0:-len(indent) - 2] + ');') loc_old = endofcall + 1 continue if locs[loc] in loc_consts: curr_const = loc_consts.index(locs[loc]) endofcall = text.find(';', locs[loc]) name = const_args[curr_const]['name'] fid.write(indent[0:-2] + 'op_decl_const2("' + name.strip() + '",' + str(const_args[curr_const]['dim']) + ',' + const_args[curr_const]['type'] + ',' + const_args[curr_const]['name2'].strip() + ');') loc_old = endofcall + 1 continue fid.write(text[loc_old:]) fid.close() f.close() # end of loop over input source files # errors and warnings if ninit == 0: print' ' print'-----------------------------' print' WARNING: no call to op_init' if auto_soa==1: print' WARNING: code generated with OP_AUTO_SOA,\n but couldn\'t modify op_init to pass\n an additional parameter of 1.\n Please make sure OP_AUTO_SOA is set when executing' print'-----------------------------' if nexit == 0: print' ' print'-------------------------------' print' WARNING: no call to op_exit ' print'-------------------------------' if npart == 0 and nhdf5 > 0: print' ' print'---------------------------------------------------' print' WARNING: hdf5 calls without call to op_partition ' print'---------------------------------------------------' # # finally, generate target-specific kernel files # op2_gen_seq(str(sys.argv[1]), date, consts, kernels) # MPI+GENSEQ version - initial version, no vectorisation op2_gen_mpi_vec(str(sys.argv[1]), date, consts, kernels) # MPI+GENSEQ with code that gets auto vectorised with intel compiler (version 15.0 and above) #code generators for OpenMP parallelisation with MPI #op2_gen_openmp(str(sys.argv[1]), date, consts, kernels) # Initial OpenMP code generator op2_gen_openmp_simple(str(sys.argv[1]), date, consts, kernels) # Simplified and Optimized OpenMP code generator op2_gen_openacc(str(sys.argv[1]), date, consts, kernels) # Simplified and Optimized OpenMP code generator #code generators for NVIDIA GPUs with CUDA #op2_gen_cuda(str(sys.argv[1]), date, consts, kernels,sets) # Optimized for Fermi GPUs op2_gen_cuda_simple(str(sys.argv[1]), date, consts, kernels,sets) # Optimized for Kepler GPUs # generates openmp code as well as cuda code into the same file #op2_gen_cuda_simple_hyb(str(sys.argv[1]), date, consts, kernels,sets) # CPU and GPU will then do comutations as a hybrid application import subprocess retcode = subprocess.call("which clang-format > /dev/null", shell=True) if retcode == 0: retcode = subprocess.call("$OP2_INSTALL_PATH/../translator/c/python/format.sh", shell=True) else: print 'Cannot find clang-format in PATH' print 'Install and add clang-format to PATH to format generated code to conform to code formatting guidelines'
def main(): # declare constants ninit = 0 nexit = 0 npart = 0 nhdf5 = 0 nconsts = 0 nkernels = 0 consts = [] kernels = [] sets = [] kernels_in_files = [] OP_ID = 1 OP_GBL = 2 OP_MAP = 3 OP_READ = 1 OP_WRITE = 2 OP_RW = 3 OP_INC = 4 OP_MAX = 5 OP_MIN = 6 OP_accs_labels = ['OP_READ', 'OP_WRITE', 'OP_RW', 'OP_INC', 'OP_MAX', 'OP_MIN'] # loop over all input source files kernels_in_files = [[] for _ in range(len(sys.argv) - 1)] for a in range(1, len(sys.argv)): print 'processing file ' + str(a) + ' of ' + str(len(sys.argv) - 1) + \ ' ' + str(sys.argv[a]) src_file = str(sys.argv[a]) f = open(src_file, 'r') text = f.read() # check for op_init/op_exit/op_partition/op_hdf5 calls inits, exits, parts, hdf5s = op_parse_calls(text) if inits + exits + parts + hdf5s > 0: print ' ' if inits > 0: print'contains op_init call' if exits > 0: print'contains op_exit call' if parts > 0: print'contains op_partition call' if hdf5s > 0: print'contains op_hdf5 calls' ninit = ninit + inits nexit = nexit + exits npart = npart + parts nhdf5 = nhdf5 + hdf5s # parse and process constants const_args = op_decl_const_parse(text) set_list = op_decl_set_parse(text) for i in range(0,len(set_list)): sets.append(set_list[i]) # cleanup '&' symbols from name and convert dim to integer for i in range(0, len(const_args)): if const_args[i]['name'][0] == '&': const_args[i]['name'] = const_args[i]['name'][1:] const_args[i]['dim'] = int(const_args[i]['dim']) # check for repeats nconsts = 0 for i in range(0, len(const_args)): repeat = 0 name = const_args[i]['name'] for c in range(0, nconsts): if const_args[i]['name'] == consts[c]['name']: repeat = 1 if const_args[i]['type'] != consts[c]['type']: print 'type mismatch in repeated op_decl_const' if const_args[i]['dim'] != consts[c]['dim']: print 'size mismatch in repeated op_decl_const' if repeat > 0: print 'repeated global constant ' + const_args[i]['name'] else: print '\nglobal constant (' + const_args[i]['name'].strip() \ + ') of size ' + str(const_args[i]['dim']) # store away in master list if repeat == 0: nconsts = nconsts + 1 temp = {'dim': const_args[i]['dim'], 'type': const_args[i]['type'].strip(), 'name': const_args[i]['name'].strip()} consts.append(temp) # parse and process op_par_loop calls loop_args = op_par_loop_parse(text) for i in range(0, len(loop_args)): name = loop_args[i]['name1'] nargs = loop_args[i]['nargs'] print '\nprocessing kernel ' + name + ' with ' + str(nargs) + ' arguments', # process arguments var = [''] * nargs idxs = [0] * nargs dims = [''] * nargs maps = [0] * nargs mapnames = ['']*nargs typs = [''] * nargs accs = [0] * nargs soaflags = [0] * nargs for m in range(0, nargs): arg_type = loop_args[i]['args'][m]['type'] args = loop_args[i]['args'][m] if arg_type.strip() == 'op_arg_dat': var[m] = args['dat'] idxs[m] = args['idx'] if str(args['map']).strip() == 'OP_ID': maps[m] = OP_ID if int(idxs[m]) != -1: print 'invalid index for argument' + str(m) else: maps[m] = OP_MAP mapnames[m] = str(args['map']).strip() dims[m] = args['dim'] soa_loc = args['typ'].find(':soa') if soa_loc > 0: soaflags[m] = 1 typs[m] = args['typ'][1:soa_loc] else: typs[m] = args['typ'][1:-1] l = -1 for l in range(0, len(OP_accs_labels)): if args['acc'].strip() == OP_accs_labels[l].strip(): break if l == -1: print 'unknown access type for argument ' + str(m) else: accs[m] = l + 1 if arg_type.strip() == 'op_arg_gbl': maps[m] = OP_GBL var[m] = args['data'] dims[m] = args['dim'] typs[m] = args['typ'][1:-1] l = -1 for l in range(0, len(OP_accs_labels)): if args['acc'].strip() == OP_accs_labels[l].strip(): break if l == -1: print 'unknown access type for argument ' + str(m) else: accs[m] = l + 1 if (maps[m] == OP_GBL) and (accs[m] == OP_WRITE or accs[m] == OP_RW): print 'invalid access type for argument ' + str(m) if (maps[m] != OP_GBL) and (accs[m] == OP_MIN or accs[m] == OP_MAX): print 'invalid access type for argument ' + str(m) # print var[m]+' '+str(idxs[m])+' '+str(maps[m])+' '+\ # str(dims[m])+' '+typs[m]+' '+str(accs[m]) print ' ' # identify indirect datasets ninds = 0 inds = [0] * nargs invinds = [0] * nargs indtyps = [''] * nargs inddims = [''] * nargs indaccs = [0] * nargs invmapinds = [0]*nargs mapinds = [0]*nargs j = [i for i, x in enumerate(maps) if x == OP_MAP] while len(j) > 0: indtyps[ninds] = typs[j[0]] inddims[ninds] = dims[j[0]] indaccs[ninds] = accs[j[0]] invinds[ninds] = j[0] # inverse mapping ninds = ninds + 1 for i in range(0, len(j)): if var[j[0]] == var[j[i]] and typs[j[0]] == typs[j[i]] \ and accs[j[0]] == accs[j[i]]: # same variable inds[j[i]] = ninds k = [] for i in range(0, len(j)): if not (var[j[0]] == var[j[i]] and typs[j[0]] == typs[j[i]] and accs[j[0]] == accs[j[i]]): # same variable k = k + [j[i]] j = k if ninds > 0: invmapinds = invinds[:] for i in range(0,ninds): for j in range(0,i): if (mapnames[invinds[i]] == mapnames[invinds[j]]): invmapinds[i] = invmapinds[j] for i in range(0,nargs): mapinds[i] = i for j in range(0,i): if (maps[i] == OP_MAP) and (mapnames[i] == mapnames[j]) and (idxs[i] == idxs[j]): mapinds[i] = mapinds[j] # check for repeats repeat = False rep1 = False rep2 = False which_file = -1 for nk in range(0, nkernels): rep1 = kernels[nk]['name'] == name and \ kernels[nk]['nargs'] == nargs and \ kernels[nk]['ninds'] == ninds if rep1: rep2 = True for arg in range(0, nargs): rep2 = rep2 and \ kernels[nk]['dims'][arg] == dims[arg] and \ kernels[nk]['maps'][arg] == maps[arg] and \ kernels[nk]['typs'][arg] == typs[arg] and \ kernels[nk]['accs'][arg] == accs[arg] and \ kernels[nk]['idxs'][arg] == idxs[arg] and \ kernels[nk]['soaflags'][arg] == soaflags[arg] and \ kernels[nk]['inds'][arg] == inds[arg] for arg in range(0, ninds): rep2 = rep2 and \ kernels[nk]['inddims'][arg] == inddims[arg] and \ kernels[nk]['indaccs'][arg] == indaccs[arg] and \ kernels[nk]['indtyps'][arg] == indtyps[arg] and \ kernels[nk]['invinds'][arg] == invinds[arg] if rep2: print 'repeated kernel with compatible arguments: ' + \ kernels[nk]['name'], repeat = True which_file = nk else: print 'repeated kernel with incompatible arguments: ERROR' break # output various diagnostics if not repeat: print ' local constants:', for arg in range(0, nargs): if maps[arg] == OP_GBL and accs[arg] == OP_READ: print str(arg), print '\n global reductions:', for arg in range(0, nargs): if maps[arg] == OP_GBL and accs[arg] != OP_READ: print str(arg), print '\n direct arguments:', for arg in range(0, nargs): if maps[arg] == OP_ID: print str(arg), print '\n indirect arguments:', for arg in range(0, nargs): if maps[arg] == OP_MAP: print str(arg), if ninds > 0: print '\n number of indirect datasets: ' + str(ninds), print '\n' # store away in master list if not repeat: nkernels = nkernels + 1 temp = {'name': name, 'nargs': nargs, 'dims': dims, 'maps': maps, 'var': var, 'typs': typs, 'accs': accs, 'idxs': idxs, 'inds': inds, 'soaflags': soaflags, 'ninds': ninds, 'inddims': inddims, 'indaccs': indaccs, 'indtyps': indtyps, 'invinds': invinds, 'mapnames' : mapnames, 'mapinds': mapinds, 'invmapinds' : invmapinds} kernels.append(temp) (kernels_in_files[a - 1]).append(nkernels - 1) else: append = 1 for in_file in range(0, len(kernels_in_files[a - 1])): if kernels_in_files[a - 1][in_file] == which_file: append = 0 if append == 1: (kernels_in_files[a - 1]).append(which_file) # output new source file fid = open(src_file.split('.')[0] + '_op.cpp', 'w') date = datetime.datetime.now() fid.write('//\n// auto-generated by op2.py on ' + date.strftime("%Y-%m-%d %H:%M") + '\n//\n\n') loc_old = 0 # read original file and locate header location header_len = 11 loc_header = [text.find("op_seq.h")] if loc_header[0] == -1: header_len = 13 loc_header = [text.find("op_lib_cpp.h")] # get locations of all op_decl_consts n_consts = len(const_args) loc_consts = [0] * n_consts for n in range(0, n_consts): loc_consts[n] = const_args[n]['loc'] # get locations of all op_par_loops n_loops = len(loop_args) loc_loops = [0] * n_loops for n in range(0, n_loops): loc_loops[n] = loop_args[n]['loc'] locs = sorted(loc_header + loc_consts + loc_loops) # process header, loops and constants for loc in range(0, len(locs)): if locs[loc] != -1: fid.write(text[loc_old:locs[loc] - 1]) loc_old = locs[loc] - 1 indent = '' ind = 0 while 1: if text[locs[loc] - ind] == '\n': break indent = indent + ' ' ind = ind + 1 if (locs[loc] in loc_header) and (locs[loc] != -1): fid.write(' "op_lib_cpp.h"\n\n') line = '\n#define STRIDE(x,y) x\n' for ns in range (0,len(sets)): line += 'int '+sets[ns]['name'].replace('"','')+'_stride = 1;\n' fid.write(line) fid.write('//\n// op_par_loop declarations\n//\n') for k_iter in range(0, len(kernels_in_files[a - 1])): k = kernels_in_files[a - 1][k_iter] line = '\nvoid op_par_loop_' + \ kernels[k]['name'] + '(char const *, op_set,\n' for n in range(1, kernels[k]['nargs']): line = line + ' op_arg,\n' line = line + ' op_arg );\n' fid.write(line) fid.write('\n') loc_old = locs[loc] + header_len continue if locs[loc] in loc_loops: indent = indent + ' ' * len('op_par_loop') endofcall = text.find(';', locs[loc]) curr_loop = loc_loops.index(locs[loc]) name = loop_args[curr_loop]['name1'] line = str(' op_par_loop_' + name + '(' + loop_args[curr_loop]['name2'] + ',' + loop_args[curr_loop]['set'] + ',\n' + indent) for arguments in range(0, loop_args[curr_loop]['nargs']): elem = loop_args[curr_loop]['args'][arguments] if elem['type'] == 'op_arg_dat': line = line + elem['type'] + '(' + elem['dat'] + \ ',' + elem['idx'] + ',' + elem['map'] + \ ',' + elem['dim'] + ',' + elem['typ'] + \ ',' + elem['acc'] + '),\n' + indent elif elem['type'] == 'op_arg_gbl': line = line + elem['type'] + '(' + elem['data'] + \ ',' + elem['dim'] + ',' + elem['typ'] + \ ',' + elem['acc'] + '),\n' + indent fid.write(line[0:-len(indent) - 2] + ');') loc_old = endofcall + 1 continue if locs[loc] in loc_consts: curr_const = loc_consts.index(locs[loc]) endofcall = text.find(';', locs[loc]) name = const_args[curr_const]['name'] fid.write(indent[0:-2] + 'op_decl_const2("' + name.strip() + '",' + str(const_args[curr_const]['dim']) + ',' + const_args[curr_const]['type'] + ',' + const_args[curr_const]['name2'].strip() + ');') loc_old = endofcall + 1 continue print loc_old, len(text) fid.write(text[loc_old:]) fid.close() f.close() # end of loop over input source files # errors and warnings if ninit == 0: print' ' print'-----------------------------' print' ERROR: no call to op_init ' print'-----------------------------' if nexit == 0: print' ' print'-------------------------------' print' WARNING: no call to op_exit ' print'-------------------------------' if npart == 0 and nhdf5 > 0: print' ' print'---------------------------------------------------' print' WARNING: hdf5 calls without call to op_partition ' print'---------------------------------------------------' # finally, generate target-specific kernel files op2_gen_seq(str(sys.argv[1]), date, consts, kernels) op2_gen_openmp_simple(str(sys.argv[1]), date, consts, kernels) #op2_gen_cuda_simple_hyb(str(sys.argv[1]), date, consts, kernels,sets) op2_gen_cuda_simple(str(sys.argv[1]), date, consts, kernels,sets)
def main(srcFilesAndDirs=sys.argv[1:]): # declare constants ninit = 0 nexit = 0 npart = 0 nhdf5 = 0 nconsts = 0 nkernels = 0 consts = [] kernels = [] sets = [] kernels_in_files = [] macro_defs = {} OP_ID = 1 OP_GBL = 2 OP_MAP = 3 OP_READ = 1 OP_WRITE = 2 OP_RW = 3 OP_INC = 4 OP_MAX = 5 OP_MIN = 6 auto_soa=os.getenv('OP_AUTO_SOA','0') OP_accs_labels = ['OP_READ', 'OP_WRITE', 'OP_RW', 'OP_INC', 'OP_MAX', 'OP_MIN'] src_files = [s for s in srcFilesAndDirs if os.path.isfile(s)] src_dirs = [d for d in srcFilesAndDirs if os.path.isdir(d)] ## Extract macro definitions: for src_file in src_files: print("Parsing file '" + src_file + "' for macro definitions.") with open(src_file, 'r') as f: text = f.read() local_defs = op_parse_macro_defs(text) for k in local_defs.keys(): if (k in macro_defs) and (local_defs[k] != macro_defs[k]): msg = "WARNING: Have found two different definitions for macro '{}': '{}' and '{}'. Using the first definition.".format(k, macro_defs[k], local_defs[k]) print(msg) continue else: macro_defs[k] = local_defs[k] self_evaluate_macro_defs(macro_defs) ## Loop over all input source files to search for op_par_loop calls kernels_in_files = [[] for _ in range(len(srcFilesAndDirs))] src_file_num = -1 for src_file in src_files: src_file_num = src_file_num + 1 print("Processing file " + str(src_file_num+1) + " of " + str(len(src_files)) + \ ": " + src_file) with open(src_file, 'r') as f: text = f.read() any_soa = 0 # check for op_init/op_exit/op_partition/op_hdf5 calls inits, exits, parts, hdf5s = op_parse_calls(text) if inits + exits + parts + hdf5s > 0: print ' ' if inits > 0: print'contains op_init call' if auto_soa<>'0': text = append_init_soa(text) if exits > 0: print'contains op_exit call' if parts > 0: print'contains op_partition call' if hdf5s > 0: print'contains op_hdf5 calls' ninit = ninit + inits nexit = nexit + exits npart = npart + parts nhdf5 = nhdf5 + hdf5s # parse and process constants const_args = op_decl_const_parse(text) set_list = op_decl_set_parse(text) for i in range(0, len(set_list)): sets.append(set_list[i]) # cleanup '&' symbols from name and convert dim to integer for i in range(0, len(const_args)): const_args[i]['dim'] = evaluate_macro_defs_in_string(macro_defs, const_args[i]['dim']) if const_args[i]['name'][0] == '&': const_args[i]['name'] = const_args[i]['name'][1:] const_args[i]['dim'] = int(const_args[i]['dim']) # check for repeats nconsts = 0 for i in range(0, len(const_args)): repeat = 0 name = const_args[i]['name'] for c in range(0, nconsts): if const_args[i]['name'] == consts[c]['name']: repeat = 1 if const_args[i]['type'] != consts[c]['type']: print 'type mismatch in repeated op_decl_const' if const_args[i]['dim'] != consts[c]['dim']: print 'size mismatch in repeated op_decl_const' if repeat > 0: print 'repeated global constant ' + const_args[i]['name'] else: print '\nglobal constant (' + const_args[i]['name'].strip() \ + ') of size ' + str(const_args[i]['dim']) # store away in master list if repeat == 0: nconsts = nconsts + 1 temp = {'dim': const_args[i]['dim'], 'type': const_args[i]['type'].strip(), 'name': const_args[i]['name'].strip()} consts.append(temp) # parse and process op_par_loop calls loop_args = op_par_loop_parse(text) for i in range(0, len(loop_args)): name = loop_args[i]['name1'] nargs = loop_args[i]['nargs'] print '\nprocessing kernel ' + name + ' with ' + str(nargs) + ' arguments', # process arguments var = [''] * nargs idxs = [0] * nargs dims = [''] * nargs maps = [0] * nargs mapnames = ['']*nargs typs = [''] * nargs accs = [0] * nargs soaflags = [0] * nargs optflags = [0] * nargs any_opt = 0 for m in range(0, nargs): argm = loop_args[i]['args'][m] argm['dim'] = evaluate_macro_defs_in_string(macro_defs, argm['dim']) arg_type = loop_args[i]['args'][m]['type'] args = loop_args[i]['args'][m] if arg_type.strip() == 'op_arg_dat' or arg_type.strip() == 'op_opt_arg_dat': argm['idx'] = evaluate_macro_defs_in_string(macro_defs, argm['idx']) if arg_type.strip() == 'op_arg_dat' or arg_type.strip() == 'op_opt_arg_dat': var[m] = args['dat'] idxs[m] = args['idx'] if arg_type.strip() == 'op_opt_arg_dat': any_opt = 1 if str(args['map']).strip() == 'OP_ID': maps[m] = OP_ID if int(idxs[m]) != -1: print 'invalid index for argument' + str(m) else: maps[m] = OP_MAP mapnames[m] = str(args['map']).strip() dims[m] = args['dim'] soa_loc = args['typ'].find(':soa') if ((auto_soa=='1') and (((not dims[m].isdigit()) or int(dims[m])>1)) and (soa_loc < 0)): soa_loc = len(args['typ'])-1 if soa_loc > 0: soaflags[m] = 1 any_soa = 1 typs[m] = args['typ'][1:soa_loc] else: typs[m] = args['typ'][1:-1] l = -1 for l in range(0, len(OP_accs_labels)): if args['acc'].strip() == OP_accs_labels[l].strip(): break if l == -1: print 'unknown access type for argument ' + str(m) else: accs[m] = l + 1 if arg_type.strip() == 'op_opt_arg_dat': optflags[m] = 1 else: optflags[m] = 0 if arg_type.strip() == 'op_arg_gbl': maps[m] = OP_GBL var[m] = args['data'] dims[m] = args['dim'] typs[m] = args['typ'][1:-1] optflags[m] = 0 l = -1 for l in range(0, len(OP_accs_labels)): if args['acc'].strip() == OP_accs_labels[l].strip(): break if l == -1: print 'unknown access type for argument ' + str(m) else: accs[m] = l + 1 if (maps[m] == OP_GBL) and (accs[m] == OP_WRITE or accs[m] == OP_RW): print 'invalid access type for argument ' + str(m) if (maps[m] != OP_GBL) and (accs[m] == OP_MIN or accs[m] == OP_MAX): print 'invalid access type for argument ' + str(m) print ' ' # identify indirect datasets ninds = 0 inds = [0] * nargs invinds = [0] * nargs indtyps = [''] * nargs inddims = [''] * nargs indaccs = [0] * nargs invmapinds = [0]*nargs mapinds = [0]*nargs j = [i for i, x in enumerate(maps) if x == OP_MAP] while len(j) > 0: indtyps[ninds] = typs[j[0]] inddims[ninds] = dims[j[0]] indaccs[ninds] = accs[j[0]] invinds[ninds] = j[0] # inverse mapping ninds = ninds + 1 for i in range(0, len(j)): if var[j[0]] == var[j[i]] and typs[j[0]] == typs[j[i]] \ and accs[j[0]] == accs[j[i]] and mapnames[j[0]] == mapnames[j[i]]: # same variable inds[j[i]] = ninds k = [] for i in range(0, len(j)): if not (var[j[0]] == var[j[i]] and typs[j[0]] == typs[j[i]] and accs[j[0]] == accs[j[i]] and mapnames[j[0]] == mapnames[j[i]]): # same variable k = k + [j[i]] j = k if ninds > 0: invmapinds = invinds[:] for i in range(0, ninds): for j in range(0, i): if (mapnames[invinds[i]] == mapnames[invinds[j]]): invmapinds[i] = invmapinds[j] for i in range(0, nargs): mapinds[i] = i for j in range(0, i): if (maps[i] == OP_MAP) and (mapnames[i] == mapnames[j]) and (idxs[i] == idxs[j]): mapinds[i] = mapinds[j] # check for repeats repeat = False rep1 = False rep2 = False which_file = -1 for nk in range(0, nkernels): rep1 = kernels[nk]['name'] == name and \ kernels[nk]['nargs'] == nargs and \ kernels[nk]['ninds'] == ninds if rep1: rep2 = True for arg in range(0, nargs): rep2 = rep2 and \ kernels[nk]['dims'][arg] == dims[arg] and \ kernels[nk]['maps'][arg] == maps[arg] and \ kernels[nk]['typs'][arg] == typs[arg] and \ kernels[nk]['accs'][arg] == accs[arg] and \ kernels[nk]['idxs'][arg] == idxs[arg] and \ kernels[nk]['soaflags'][arg] == soaflags[arg] and \ kernels[nk]['optflags'][arg] == optflags[arg] and \ kernels[nk]['inds'][arg] == inds[arg] for arg in range(0, ninds): rep2 = rep2 and \ kernels[nk]['inddims'][arg] == inddims[arg] and \ kernels[nk]['indaccs'][arg] == indaccs[arg] and \ kernels[nk]['indtyps'][arg] == indtyps[arg] and \ kernels[nk]['invinds'][arg] == invinds[arg] if rep2: print 'repeated kernel with compatible arguments: ' + \ kernels[nk]['name'], repeat = True which_file = nk else: print 'repeated kernel with incompatible arguments: ERROR' break # output various diagnostics if not repeat: print ' local constants:', for arg in range(0, nargs): if maps[arg] == OP_GBL and accs[arg] == OP_READ: print str(arg), print '\n global reductions:', for arg in range(0, nargs): if maps[arg] == OP_GBL and accs[arg] != OP_READ: print str(arg), print '\n direct arguments:', for arg in range(0, nargs): if maps[arg] == OP_ID: print str(arg), print '\n indirect arguments:', for arg in range(0, nargs): if maps[arg] == OP_MAP: print str(arg), if ninds > 0: print '\n number of indirect datasets: ' + str(ninds), if any_opt: print '\n optional arguments:', for arg in range(0, nargs): if optflags[arg] == 1: print str(arg), print '\n' # store away in master list if not repeat: nkernels = nkernels + 1 temp = {'name': name, 'nargs': nargs, 'dims': dims, 'maps': maps, 'var': var, 'typs': typs, 'accs': accs, 'idxs': idxs, 'inds': inds, 'soaflags': soaflags, 'optflags': optflags, 'ninds': ninds, 'inddims': inddims, 'indaccs': indaccs, 'indtyps': indtyps, 'invinds': invinds, 'mapnames' : mapnames, 'mapinds': mapinds, 'invmapinds' : invmapinds} kernels.append(temp) (kernels_in_files[src_file_num]).append(nkernels - 1) else: append = 1 for in_file in range(0, len(kernels_in_files[src_file_num])): if kernels_in_files[src_file_num][in_file] == which_file: append = 0 if append == 1: (kernels_in_files[src_file_num]).append(which_file) # output new source file src_filename = os.path.basename(src_file) src_dirpath = os.path.dirname(src_file) if src_dirpath[0:2] == "./": src_dirpath = src_dirpath[2:] op_extension = "_op" if '.' in src_filename: src_filename_pieces = src_filename.split('.') n = len(src_filename_pieces) src_filename_extension = src_filename_pieces[n-1] op_src_filename = '.'.join(src_filename_pieces[0:(n-1)]) + op_extension + '.' + src_filename_extension else: op_src_filename = src_filename + op_extension op_src_filepath = op_src_filename op_src_dirpath = "" if src_dirpath != "": src_dirpath_pieces = src_dirpath.split('/') root_dir = src_dirpath_pieces[0] if len(src_dirpath_pieces) == 0: rem_dirpath = '' else: rem_dirpath = '/'.join(src_dirpath_pieces[1:]) op_src_dirpath = os.path.join(root_dir+"_op", rem_dirpath) op_src_filepath = os.path.join(op_src_dirpath, op_src_filename) if op_src_dirpath != "" and not os.path.exists(op_src_dirpath): os.makedirs(op_src_dirpath) fid = open(op_src_filepath, 'w') date = datetime.datetime.now() #fid.write('//\n// auto-generated by op2.py on ' + # date.strftime("%Y-%m-%d %H:%M") + '\n//\n\n') fid.write('//\n// auto-generated by op2.py\n//\n\n') loc_old = 0 # read original file and locate header location header_len = 11 loc_header = [text.find("op_seq.h")] if loc_header[0] == -1: header_len = 13 loc_header = [text.find("op_lib_cpp.h")] # get locations of all op_decl_consts n_consts = len(const_args) loc_consts = [0] * n_consts for n in range(0, n_consts): loc_consts[n] = const_args[n]['loc'] # get locations of all op_par_loops n_loops = len(loop_args) loc_loops = [0] * n_loops for n in range(0, n_loops): loc_loops[n] = loop_args[n]['loc'] locs = sorted(loc_header + loc_consts + loc_loops) # process header, loops and constants for loc in range(0, len(locs)): if locs[loc] != -1: fid.write(text[loc_old:locs[loc] - 1]) loc_old = locs[loc] - 1 indent = '' ind = 0 while 1: if text[locs[loc] - ind] == '\n': break indent = indent + ' ' ind = ind + 1 if (locs[loc] in loc_header) and (locs[loc] != -1): fid.write(' "op_lib_cpp.h"\n\n') fid.write('//\n// op_par_loop declarations\n//\n') fid.write('#ifdef OPENACC\n#ifdef __cplusplus\nextern "C" {\n#endif\n#endif\n') for k_iter in range(0, len(kernels_in_files[src_file_num])): k = kernels_in_files[src_file_num][k_iter] line = '\nvoid op_par_loop_' + \ kernels[k]['name'] + '(char const *, op_set,\n' for n in range(1, kernels[k]['nargs']): line = line + ' op_arg,\n' line = line + ' op_arg );\n' fid.write(line) fid.write('#ifdef OPENACC\n#ifdef __cplusplus\n}\n#endif\n#endif\n') fid.write('\n') loc_old = locs[loc] + header_len-1 continue if locs[loc] in loc_loops: indent = indent + ' ' * len('op_par_loop') endofcall = text.find(';', locs[loc]) curr_loop = loc_loops.index(locs[loc]) name = loop_args[curr_loop]['name1'] line = str(' op_par_loop_' + name + '(' + loop_args[curr_loop]['name2'] + ',' + loop_args[curr_loop]['set'] + ',\n' + indent) for arguments in range(0, loop_args[curr_loop]['nargs']): elem = loop_args[curr_loop]['args'][arguments] if elem['type'] == 'op_arg_dat': line = line + elem['type'] + '(' + elem['dat'] + \ ',' + elem['idx'] + ',' + elem['map'] + \ ',' + elem['dim'] + ',' + elem['typ'] + \ ',' + elem['acc'] + '),\n' + indent elif elem['type'] == 'op_opt_arg_dat': line = line + elem['type'] + '(' \ + elem['opt'] + ',' + elem['dat'] + \ ',' + elem['idx'] + ',' + elem['map'] + \ ',' + elem['dim'] + ',' + elem['typ'] + \ ',' + elem['acc'] + '),\n' + indent elif elem['type'] == 'op_arg_gbl': line = line + elem['type'] + '(' + elem['data'] + \ ',' + elem['dim'] + ',' + elem['typ'] + \ ',' + elem['acc'] + '),\n' + indent fid.write(line[0:-len(indent) - 2] + ');') loc_old = endofcall + 1 continue if locs[loc] in loc_consts: curr_const = loc_consts.index(locs[loc]) endofcall = text.find(';', locs[loc]) name = const_args[curr_const]['name'] fid.write(indent[0:-2] + 'op_decl_const2("' + name.strip() + '",' + str(const_args[curr_const]['dim']) + ',' + const_args[curr_const]['type'] + ',' + const_args[curr_const]['name2'].strip() + ');') loc_old = endofcall + 1 continue fid.write(text[loc_old:]) fid.close() # end of loop over input source files ## Loop over kernels, looking for a header file named after each ## kernel in either working directory or one of the input-supplied ## directories: for nk in xrange(0, len(kernels)): k_data = kernels[nk] k_name = k_data["name"] if not "decl_filepath" in k_data.keys(): src_file = k_name + ".h" if os.path.isfile(src_file): with open(src_file, 'r') as f: text = f.read() if op_check_kernel_in_text(text, k_name): k_data["decl_filepath"] = src_file continue for dirname in src_dirs: filepath = os.path.join(dirname, src_file) if os.path.isfile(filepath): with open(filepath, 'r') as f: text = f.read() if op_check_kernel_in_text(text, k_name): k_data["decl_filepath"] = filepath break ## Any kernel declarations still not found must exist in files ## not named after the kernel. Search through content of all ## input-supplied files, and through all files of input-supplied ## directories: for nk in xrange(0, len(kernels)): if not "decl_filepath" in kernels[nk].keys(): k_data = kernels[nk] k_name = k_data["name"] for src_file in src_files: with open(src_file, 'r') as f: text = f.read() if op_check_kernel_in_text(text, k_name): k_data["decl_filepath"] = src_file break if not "decl_filepath" in k_data.keys(): for src_dir in src_dirs: for src_dir_subfile in [s for s in os.listdir(src_dir) if os.path.isfile(os.path.join(src_dir, s))]: src_dir_subfilepath = os.path.join(src_dir, src_dir_subfile) with open(src_dir_subfilepath, 'r') as f: text = f.read() if op_check_kernel_in_text(text, k_name): k_data["decl_filepath"] = src_dir_subfilepath break if "decl_filepath" in k_data.keys(): break fail = False for nk in xrange(0, len(kernels)): if not "decl_filepath" in kernels[nk].keys(): fail = True print("Declaration not found for kernel " + kernels[nk]["name"]) if fail: exit(2) # errors and warnings if ninit == 0: print' ' print'-----------------------------' print' WARNING: no call to op_init' if auto_soa==1: print' WARNING: code generated with OP_AUTO_SOA,\n but couldn\'t modify op_init to pass\n an additional parameter of 1.\n Please make sure OP_AUTO_SOA is set when executing' print'-----------------------------' if nexit == 0: print' ' print'-------------------------------' print' WARNING: no call to op_exit ' print'-------------------------------' if npart == 0 and nhdf5 > 0: print' ' print'---------------------------------------------------' print' WARNING: hdf5 calls without call to op_partition ' print'---------------------------------------------------' # # finally, generate target-specific kernel files # masterFile = str(srcFilesAndDirs[0]) op2_gen_seq(masterFile, date, consts, kernels) # MPI+GENSEQ version - initial version, no vectorisation #op2_gen_mpi_vec(masterFile, date, consts, kernels) # MPI+GENSEQ with code that gets auto vectorised with intel compiler (version 15.0 and above) #code generators for OpenMP parallelisation with MPI #op2_gen_openmp(masterFile, date, consts, kernels) # Initial OpenMP code generator op2_gen_openmp_simple(masterFile, date, consts, kernels) # Simplified and Optimized OpenMP code generator op2_gen_openacc(masterFile, date, consts, kernels) # Simplified and Optimized OpenMP code generator #code generators for NVIDIA GPUs with CUDA #op2_gen_cuda(masterFile, date, consts, kernels,sets) # Optimized for Fermi GPUs op2_gen_cuda_simple(masterFile, date, consts, kernels, sets, macro_defs) # Optimized for Kepler GPUs # generates openmp code as well as cuda code into the same file op2_gen_cuda_simple_hyb(masterFile, date, consts, kernels, sets) # CPU and GPU will then do comutations as a hybrid application #code generator for GPUs with OpenMP4.5 op2_gen_openmp4(masterFile, date, consts, kernels)