def time_concat(var,years,hist_dict,ave_info,file_dict,ave_type,simplecomm,all_files_vars,serial,timer,collapse_dim=''): ''' Concats files together in the time dimension. @param var The name of the variable to concat. @param years A list of the years that are in this average @param hist_dict A dictionary that holds file references for all years/months. @param ave_info A dictionary of the type of average that is to be done. Includes: type, months_to_average, fn, and weights (weights are not used in this function/average) @param file_dict A dictionary which holds file pointers to the input files that are needed by this average calculation. @param ave_type The average type key that indicated which type of average will be done. @param simplecomm The simple comm object used for mpi communication. @param all_files_vars All of the file's variables with ncids attached. @param collapse_dim Used to collapse/average over one dim. @serial Boolean if running in serial mode. ''' import asaptools if (not simplecomm.is_manager() or serial): print('Concatenating ',ave_info['type'],' for ',var) time_index = 0 CONCAT_TAG = 60 CONCAT_VAL_TAG = 67 # Loop over years, months, and variables to cat them all together into one file first = True for yr in years: for m in ave_info['months_to_average']: if ('__meta' in var): parts = var.split('__') var = parts[0] # If slave, get slice and pass to master if (not simplecomm.is_manager() or serial): if 'zonalavg' in ave_type: if collapse_dim is not None: timer.start("Time to compute Average") var_val = zonal_average(var,yr,m,hist_dict,file_dict,timer,collapse_dim) timer.stop("Time to compute Average") else: timer.start("Time to compute Average") timer.start("Variable fetch time") var_val = rover.fetch_slice(hist_dict,yr,m,var,file_dict) timer.stop("Time to compute Average") timer.stop("Variable fetch time") #print var, asaptools.__version__,type(var_val),var_val.dtype #-------------------- if not serial: timer.start("Send Average Time") var_shape = var_val.shape var_dtype = var_val.dtype md_message = {'name':var,'shape':var_shape,'dtype':var_dtype,'index':time_index} simplecomm.collect(data=md_message,tag=CONCAT_TAG) #var_val = np.ma.filled(var_val) #print type(var_val), md_message simplecomm.collect(data=var_val,tag=CONCAT_VAL_TAG) timer.stop("Send Average Time") if (simplecomm.is_manager() or serial): # If master, recv slice and write to file if not serial: timer.start("Recv Average Time") r_rank,results = simplecomm.collect(tag=CONCAT_TAG) r_rank,var_val = simplecomm.collect(tag=CONCAT_VAL_TAG) if results['dtype'] == 'S1' or results['dtype'] == 'c': var_val = var_val[0] ti = results['index'] var_n = results['name'] timer.stop("Recv Average Time") else: var_n = var ti = time_index if var_val.dtype == 'S1' or var_val.dtype == 'c': var_val = var_val[0] timer.start("Write Netcdf Averages") climFileIO.write_averages(all_files_vars, var_val, var_n, index=ti) timer.stop("Write Netcdf Averages") time_index = time_index + 1
def compute_averages(self, spec): ''' Sets up the run information and computes the averages. @param spec An instance of the Specification class which holds the user settings that define which averages to compute, directories, file prefixes, etc ''' import os, sys import rover import climAverager import climFileIO import average_types as ave_t import regionOpts import string import collections from asaptools import timekeeper from asaptools import partition #============================================================================== # # Initialize # #============================================================================== # Initialize the timekeeper class and start 'total' timer timer = timekeeper.TimeKeeper() timer.start("Total Time") # Initialize some timers that are not used by all tasks timer.reset("Send Average Time") timer.reset("Variable fetch time") timer.reset("Recv Average Time") timer.reset("Write Netcdf Averages") timer.reset("Variable fetch time") timer.reset("Time to compute Average") # Check average list to make sure it complies with the standards ave_t.average_compliance(spec.avg_list) # Check if I'm the global master g_master = spec.main_comm.is_manager() for tag in spec.m_id: file_pattern = list(spec.file_pattern) if ('-999' not in tag): prefix = spec.prefix + '_' + tag p_index = file_pattern.index('$prefix') t_index = file_pattern.index('$m_id') for i in range(p_index + 1, t_index + 1): del file_pattern[p_index + 1] else: prefix = spec.prefix # Sort through the average list and figure out dependencies and do # averages in steps if need be. avg_dict = {0: spec.avg_list} for i in range(1, 20): avg_dict[i] = [] avg_dict = ave_t.sort_depend(avg_dict, 0, spec.out_directory, prefix, spec.regions) print avg_dict # Initialize the tag for the average send/recv AVE_TAG = 40 VNAME_TAG = 41 #start_level = 0 start_level = min(avg_dict.keys()) found_level = False #for i in range(0,len(avg_dict)): # if found_level == False: # if (i in avg_dict): # start_level = i # found_level = True ordered_avg_dict = collections.OrderedDict(avg_dict) #for i in range(start_level,len(avg_dict)): for i, value in ordered_avg_dict.items(): # Initialize some containers var_list = [] full_hist_dict = {} hist_dict = {} #============================================================================== # # Set the hist_dict up with file references for all years/months. # Create a list of all variables and meta variables within the file # and set the final variable list passed on user preferences. # #============================================================================== ## Set var_list and file info dictionary timer.start("Define history dictionary") if (spec.hist_type == 'series'): full_hist_dict, full_var_list, meta_list, key = rover.set_slices_and_vars_time_series( spec.in_directory, file_pattern, spec.date_pattern, prefix, spec.suffix, spec.year0, spec.year1, spec.split, spec.split_files) else: full_hist_dict, full_var_list, meta_list, key = rover.set_slices_and_vars_time_slice( spec.in_directory, file_pattern, prefix, spec.suffix, spec.year0, spec.year1) timer.stop("Define history dictionary") # Set variable list. If there was a variable list passed to the averager, use this list. Other wise, # use all variables within the file. if (len(spec.varlist) > 0): var_list = spec.varlist for v in full_var_list: if '__meta' in v: var_list.append(v) else: var_list = full_var_list meta_list = list(set(meta_list)) var_list = list(set(var_list)) #============================================================================== # # Workload Distribution # #============================================================================== # Each intercommunicator recieves a list of averages it's responsible for # Each mpi task within that intercommunicator gets a portion of the variable list num_of_avg = len(avg_dict[i]) min_procs_per_ave = min(4, spec.main_comm.get_size()) # Override user selection if they picked less than 2 or # the variable list is less than the min procs per sub-communicator if (min_procs_per_ave < 2 or len(var_list) <= (min_procs_per_ave - 1)): min_procs_per_ave = 2 # If running in paralllel mode, split the communicator and partition the averages if (spec.serial == False): size = spec.main_comm.get_size() rank = spec.main_comm.get_rank() # split mpi comm world temp_color = (rank // min_procs_per_ave) % num_of_avg num_of_groups = size / min_procs_per_ave if (temp_color == num_of_groups): temp_color = temp_color - 1 groups = [] for g in range(0, num_of_groups): groups.append(g) #print 'g_rank:',rank,'size:',size,'#of ave:',num_of_avg,'min_procs:',min_procs_per_ave,'temp_color:',temp_color,'#of groups',num_of_groups,'groups:',groups group = groups[temp_color] inter_comm, multi_comm = spec.main_comm.divide(group) color = inter_comm.get_color() lsize = inter_comm.get_size() lrank = inter_comm.get_rank() #g_master = spec.main_comm.is_manager() l_master = inter_comm.is_manager() #print 'global rank: ',rank,'local rank: ',lrank,'color: ',color,'tempcolor: ',temp_color,'group: ',group,'is local master: ',l_master laverages = [] AVE_LIST_TAG = 50 # Partion the average task list amoung the inter/split communicators if (l_master): laverages = multi_comm.partition( avg_dict[i], func=partition.EqualStride(), involved=True) for b in range(1, lsize): laverages_send = inter_comm.ration( data=laverages, tag=AVE_LIST_TAG) else: laverages = inter_comm.ration(tag=AVE_LIST_TAG) else: # Running in serial mode. Just copy the average list. laverages = avg_dict[i] inter_comm = spec.main_comm lsize = inter_comm.get_size() #g_master = spec.main_comm.is_manager() l_master = inter_comm.is_manager() # Partition the variable list between the tasks of each communicator if (lsize > 1 and spec.serial == False): lvar_list = inter_comm.partition( var_list, func=partition.EqualStride(), involved=False) if (l_master): lvar_list = var_list else: lvar_list = var_list #print rank,lvar_list #print(rank,'averages :',laverages, ' vars :',lvar_list) #============================================================================== # # Create the output directory if it doesn't exist # #============================================================================== if spec.serial or g_master: if not os.path.exists(spec.out_directory): os.makedirs(spec.out_directory) spec.main_comm.sync() #============================================================================== # # Main Averaging Loop # #============================================================================== # Files are only split for the first loop. When the depend averages start, they will operate on files # that are already stiched together. if (i != 0): spec.split_name = 'null' spec.split = False spec.split_files = 'null' # Toggle to incate that extra variables were added to the local file list (only do once per average level added_extra_vars = False for ave in laverages: for split_name in spec.split_files.split(","): # Split apart the average info to get type of average and year(s) ave_descr = ave.split(':') if ('hor.meanyr' in ave_descr[0] or 'hor.meanConcat' in ave_descr[0]): ave_name_split = ave_descr[0].split('_') region_num = ave_name_split[len(ave_name_split) - 1] region_name = spec.regions[int(region_num)] # Remove the region number as part of the average name ave_descr[0] = ave_name_split[0] else: region_name = 'null' region_num = -99 # If the average depends on other averages that have to be computed, create a new temporary dictionary if '__d' in ave_descr: yr0 = ave_descr[1] if (len(ave_descr) > 2 and '_d' not in ave_descr[2]): yr1 = ave_descr[2] else: yr1 = ave_descr[1] hist_dict = rover.set_slices_and_vars_depend( spec.out_directory, file_pattern, prefix, yr0, yr1, ave_t.average_types[ave_descr[0]], ave_descr[0], region_name) else: hist_dict = dict(full_hist_dict) # If concat' mean_diff_rms files, for each var, also add the _DIFF and _RMS variables. if ('hor.meanConcat' in ave_descr and added_extra_vars == False): new_vars = [] for v in lvar_list: if '__meta' not in v: new_vars.append(v + '_DIFF') new_vars.append(v + '_RMS') lvar_list = lvar_list + new_vars added_extra_vars = True # Create and define the average file timer.start("Create/Define Netcdf File") if (len(ave_descr) < 3 or 'hor.meanyr' in ave_descr): ave_date = string.zfill(ave_descr[1], 4) ave_date2 = str(ave_descr[1]) else: date1 = string.zfill(ave_descr[1], 4) date2 = string.zfill(ave_descr[2], 4) ave_date = date1 + '-' + date2 ave_date2 = str(ave_descr[1]) + '-' + str( ave_descr[2]) outfile_name = climFileIO.get_out_fn( ave_descr[0], prefix, ave_date, ave_t.average_types[ave_descr[0]]['fn'], region_name) if 'zonalavg' in ave_descr: l_collapse_dim = spec.collapse_dim else: l_collapse_dim = '' all_files_vars, new_file = climFileIO.define_ave_file( l_master, spec.serial, var_list, lvar_list, meta_list, hist_dict, spec.hist_type, ave_descr, prefix, outfile_name, spec.split, split_name, spec.out_directory, inter_comm, spec.ncformat, ave_t.average_types[ ave_descr[0]]['months_to_average'][0], key, spec.clobber, spec.year0, spec.year1, ave_date2, collapse_dim=l_collapse_dim) timer.stop("Create/Define Netcdf File") # Start loops to compute averages # create a list of years that are needed for this average years = [] if '__d' in ave_descr: if (ave_t.average_types[ ave_descr[0]]['depend_type'] == 'month' or '_d' in ave_descr[2]): years.append(int(ave_descr[1])) else: years = list( range(int(ave_descr[1]), int(ave_descr[2]) + 1)) depend = True else: if (len(ave_descr) == 2): years.append(int(ave_descr[1])) else: years = list( range(int(ave_descr[1]), int(ave_descr[2]) + 1)) depend = False # Get the first year. If part of a sig avg, this will be the sig first year, not year of indiv average fyr = years[0] if i + 1 in avg_dict.keys(): for a in avg_dict[i + 1]: if (ave_descr[0] + '_sig') in a: spl = a.split(':') fyr = int(spl[1]) file_dict = [] open_list = [] # Open all of the files that this rank will need for this average (for time slice files) if ((spec.hist_type == 'slice' or '__d' in ave_descr) and (spec.serial or not l_master) and len(lvar_list) > 0): file_dict = [] open_list = [] file_dict, open_list = climFileIO.open_all_files( hist_dict, ave_t.average_types[ave_descr[0]] ['months_to_average'], years, lvar_list[0], 'null', ave_descr[0], depend, fyr) # If concat of file instead of average, piece file together here. If not, enter averaging loop if (('mavg' in ave_descr or 'moc' in ave_descr or 'annall' in ave_descr or 'mons' in ave_descr or '_mean' in ave_descr[0]) and len(lvar_list) > 0): file_dict = [] open_list = [] if (spec.serial or not l_master): # Open files file_dict, open_list = climFileIO.open_all_files( hist_dict, ave_t.average_types[ ave_descr[0]]['months_to_average'], years, lvar_list[0], 'null', ave_descr[0], depend, fyr) # Loop through variables and compute the averages for orig_var in lvar_list: # Some variable names were suffixed with a meta label indicaticating that the variable exists in all files, # but there isn't a didicated ts file to open. Pick the first variable off the list and get values from there if ('__meta' in orig_var): var = key else: var = orig_var # Open all of the files that this rank will need for this average (for time series files) if ((spec.hist_type == 'series' and '__d' not in ave_descr) and (spec.serial or not l_master)): if ('mavg' not in ave_descr or 'moc' not in ave_descr or 'annall' not in ave_descr or 'mons' not in ave_descr or '_mean' not in ave_descr[0]): file_dict = [] open_list = [] file_dict, open_list = climFileIO.open_all_files( hist_dict, ave_t.average_types[ ave_descr[0]]['months_to_average'], years, var, split_name, ave_descr[0], depend, fyr) # We now have open files to pull values from. Now reset var name if ('__meta' in orig_var): parts = orig_var.split('__') var = parts[0] # If concat, all of the procs will participate in this call if ('mavg' in ave_descr or 'moc' in ave_descr or 'mocm' in ave_descr or 'hor.meanConcat' in ave_descr or 'annall' in ave_descr or 'mons' in ave_descr or '_mean' in ave_descr[0] or 'zonalavg' in ave_descr): if 'zonalavg' in ave_descr: l_collapse_dim = spec.collapse_dim else: l_collapse_dim = '' # Concat var_avg_results = climAverager.time_concat( var, years, hist_dict, ave_t.average_types[ave_descr[0]], file_dict, ave_descr[0], inter_comm, all_files_vars, spec.serial, timer, collapse_dim=spec.collapse_dim) # Else (not concat), each slave will compute averages and each master will collect and write else: if spec.serial or not l_master: # mean_diff_rsm file if ('hor.meanyr' in ave_descr and '__meta' not in orig_var): obs_file = spec.obs_dir + "/" + spec.obs_file reg_obs_file = spec.obs_dir + "/" + region_name + spec.reg_obs_file_suffix # The mean diff rsm function will send the variables once they are created var_avg_results, var_DIFF_results, var_RMS_results = climAverager.mean_diff_rms( var, region_name, region_num, spec.region_nc_var, spec.region_wgt_var, years, hist_dict, ave_t.average_types[ave_descr[0]], file_dict, obs_file, reg_obs_file, inter_comm, spec.serial, VNAME_TAG, AVE_TAG) else: if ('__metaChar' in orig_var): # Handle special meta var_avg_results = climAverager.get_metaCharValue( var, years, hist_dict, ave_t.average_types[ ave_descr[0]], file_dict, timer) else: # Average if (spec.weighted == True and 'weights' in ave_t.average_types[ ave_descr[0]]): var_avg_results = climAverager.weighted_avg_var( var, years, hist_dict, ave_t.average_types[ ave_descr[0]], file_dict, ave_descr[0], timer, depend, fyr) else: var_avg_results = climAverager.avg_var( var, years, hist_dict, ave_t.average_types[ ave_descr[0]], file_dict, ave_descr[0], timer, depend, fyr) # Close all open files (for time series files) if ((spec.hist_type == 'series' and '__d' not in ave_descr) and (spec.serial or not l_master)): climFileIO.close_all_files( open_list) # Pass the average results to master rank for writing var_shape = var_avg_results.shape var_dtype = var_avg_results.dtype var_type = type(var_avg_results) md_message = { 'name': var, 'shape': var_shape, 'dtype': var_dtype, 'average': var_avg_results, 'type': var_type } if not spec.serial: timer.start("Send Average Time") #inter_comm.collect(data=md_message, tag=AVE_TAG) inter_comm.collect(data=var, tag=VNAME_TAG) inter_comm.collect( data=var_avg_results, tag=AVE_TAG) timer.stop("Send Average Time") if spec.serial or l_master: # If ave_descr is hor.meanyr, there will be three variables to write for each variable. # Other wise, there will only be 1 if ('hor.meanyr' in ave_descr and '__meta' not in orig_var): var_cnt = 3 else: var_cnt = 1 for r in range(0, var_cnt): if not spec.serial: timer.start("Recv Average Time") #r_rank,results = inter_comm.collect(tag=AVE_TAG) #r_var_avg_results = results['average'] r_rank, var_name = inter_comm.collect( tag=VNAME_TAG) r_rank, r_var_avg_results = inter_comm.collect( tag=AVE_TAG) #var_name = results['name'] timer.start("Recv Average Time") else: var_name = var r_var_avg_results = var_avg_results timer.start("Write Netcdf Averages") climFileIO.write_averages( all_files_vars, r_var_avg_results, var_name) if ('hor.meanyr' in ave_descr and spec.serial ) and '__meta' not in orig_var: climFileIO.write_averages( all_files_vars, var_DIFF_results, var_name + '_DIFF') climFileIO.write_averages( all_files_vars, var_RMS_results, var_name + '_RMS') timer.stop("Write Netcdf Averages") # Close all open files (for time slice files) if (('mavg' in ave_descr or 'moc__d' == ave_descr[0] or 'annall' in ave_descr or 'mons' in ave_descr or '_mean' in ave_descr[0]) and len(lvar_list) > 0): if (spec.serial or not l_master): climFileIO.close_all_files(open_list) elif ((spec.hist_type == 'slice' or '__d' in ave_descr) and (spec.serial or not l_master) and len(lvar_list) > 0): climFileIO.close_all_files(open_list) # Sync the local communicator before closing the averaged netcdf file and moving to the next average inter_comm.sync() # Close the newly created average file if spec.serial or l_master: new_file.close() # If needed, stitch spatially split files together. if spec.serial or l_master: if (len(spec.split_files.split(",")) > 1): fn1 = spec.out_directory + '/nh_' + outfile_name fn2 = spec.out_directory + '/sh_' + outfile_name out_fn = spec.out_directory + '/' + outfile_name dim_info = spec.split_orig_size.split(",") dim1 = dim_info[0].split("=") dim2 = dim_info[1].split("=") regionOpts.combine_regions(fn1, fn2, out_fn, dim1[0], int(dim1[1]), dim2[0], int(dim2[1]), "nj", spec.clobber) if not spec.serial: # Free the inter-communicators #intercomm.Free() # Sync all mpi tasks / All averages should have been computed at this point spec.main_comm.sync() #============================================================================== # # Collect and print timing information # #============================================================================== timer.stop("Total Time") my_times = spec.main_comm.allreduce(timer.get_all_times(), 'max') if g_master: print("==============================================") print "COMPLETED SUCCESSFULLY" print my_times print("==============================================")
def time_concat(var,years,hist_dict,ave_info,file_dict,ave_type,simplecomm,all_files_vars,serial,timer,collapse_dim=''): ''' Concats files together in the time dimension. @param var The name of the variable to concat. @param years A list of the years that are in this average @param hist_dict A dictionary that holds file references for all years/months. @param ave_info A dictionary of the type of average that is to be done. Includes: type, months_to_average, fn, and weights (weights are not used in this function/average) @param file_dict A dictionary which holds file pointers to the input files that are needed by this average calculation. @param ave_type The average type key that indicated which type of average will be done. @param simplecomm The simple comm object used for mpi communication. @param all_files_vars All of the file's variables with ncids attached. @param collapse_dim Used to collapse/average over one dim. @serial Boolean if running in serial mode. ''' import asaptools if (not simplecomm.is_manager() or serial): print('Concatenating ',ave_info['type'],' for ',var) time_index = 0 CONCAT_TAG = 60 CONCAT_VAL_TAG = 67 # Loop over years, months, and variables to cat them all together into one file first = True for yr in years: for m in ave_info['months_to_average']: if ('__meta' in var): parts = var.split('__') var = parts[0] # If slave, get slice and pass to master if (not simplecomm.is_manager() or serial): if 'zonalavg' in ave_type: if collapse_dim is not None: timer.start("Time to compute Average") var_val = zonal_average(var,yr,m,hist_dict,file_dict,timer,collapse_dim) timer.stop("Time to compute Average") else: timer.start("Time to compute Average") timer.start("Variable fetch time") var_val = rover.fetch_slice(hist_dict,yr,m,var,file_dict) timer.stop("Time to compute Average") timer.stop("Variable fetch time") #print var, asaptools.__version__,type(var_val),var_val.dtype #-------------------- if not serial: timer.start("Send Average Time") var_shape = var_val.shape var_dtype = var_val.dtype md_message = {'name':var,'shape':var_shape,'dtype':var_dtype,'index':time_index} simplecomm.collect(data=md_message,tag=CONCAT_TAG) #var_val = np.ma.filled(var_val) #print type(var_val), md_message simplecomm.collect(data=var_val,tag=CONCAT_VAL_TAG) timer.stop("Send Average Time") if (simplecomm.is_manager() or serial): # If master, recv slice and write to file if not serial: timer.start("Recv Average Time") r_rank,results = simplecomm.collect(tag=CONCAT_TAG) r_rank,var_val = simplecomm.collect(tag=CONCAT_VAL_TAG) if results['dtype'] == 'S1': var_val = var_val[0] ti = results['index'] var_n = results['name'] timer.stop("Recv Average Time") else: var_n = var ti = time_index if var_val.dtype == 'S1': var_val = var_val[0] timer.start("Write Netcdf Averages") climFileIO.write_averages(all_files_vars, var_val, var_n, index=ti) timer.stop("Write Netcdf Averages") time_index = time_index + 1
def create_pre_proc(self,spec): ''' Creates the CICE pre_proc file. @param spec An instance of the Specification class which holds the user settings that define which averages to compute, directories, file prefixes, etc ''' variables = {'hi':{'factor':1.0e-13,'units':'1.E+13 m3'}, 'ai':{'factor':1.0e-14,'units':'1.E+13 m3'}, 'ext':{'factor':1.0e-12,'units':'1.E+12 m2'}, 'hs':{'factor':1.0e-13,'units':'1.E+12 m2'}, } # All of the region names, with 0=Northern Hem and 1=Southern Hem regions = {'nh':0, 'sh':1, 'Lab':0,'GIN':0,'Bar':0,'ArcOc':0,'Sib':0,'Beau':0, 'CArc':0,'Bering':0,'Okhotsk':0,'Hudson':0,'CAArch':0, 'Wed':1,'Ross':1,'Ind':1,'Pac':1,'BAm':1} split_hem = spec.split_files.split(',') attributes = {'missing_value':1.e+30, 'coordinates':'time', 'cell_methods':'time:mean','_FillValue':1.e+30} poly_masks = {} ave_descr = ['preproc',str(spec.year0),str(spec.year1)] AVE_TAG = 40 time_dim = 'time' years = list(range(int(spec.year0),int(spec.year1)+1)) months = ave_t.average_types[ave_descr[0]]['months_to_average'] # Initialize simplecomm (MPI wrappers) main_comm = spec.main_comm # If the region mask file doesn't exist, have root call ncl to create it if (not os.path.isfile(spec.reg_file) and (main_comm.is_manager() or spec.serial)): import subprocess os.environ['GRIDFILE'] = spec.ice_obs_file os.environ['REGIONFILE'] = spec.reg_file ncl_command = 'ncl < '+ spec.ncl_location +'/ice_pre_proc_mask.ncl' subprocess.call(ncl_command,shell=True) # make sure to have all ranks sync to prevent ranks other than root from continuing on without a region mask file main_comm.sync() # Get the history dictionary that lists were files are located for each time slice, a variable list, meta list, and a key lookup variable if (spec.hist_type == 'series'): hist_dict,file_var_list,meta_list,key = rover.set_slices_and_vars_time_series(spec.in_directory, spec.file_pattern, spec.date_pattern, spec.prefix, spec.suffix, int(spec.year0), int(spec.year1), spec.split, spec.split_files) else: hist_dict,file_var_list,meta_list,key = rover.set_slices_and_vars_time_slice(spec.in_directory, spec.file_pattern, spec.prefix, spec.suffix, int(spec.year0), int(spec.year1)) # Loop over the regions and variable names to get full list of variables global_var_list = [] for reg in regions: for var in variables: if ('ext' in var): global_var_list.append(var+'_mo_'+reg) else: global_var_list.append('v'+var+'_mo_'+reg) global_var_list.append('time') # Partition the global variable list between the MPI ranks local_var_list = main_comm.partition(global_var_list,func=partition.EqualLength(),involved=False) # If master/root, give it the full variable list if main_comm.is_manager() or spec.serial: local_var_list = global_var_list meta_list = [] # Define the netcdf file outfile = 'ice_vol_'+spec.prefix[:-7]+'_'+str(spec.year0)+'-'+str(spec.year1)+'.nc' ave_date = str(spec.year0)+'-'+str(spec.year1) all_files_vars,new_file = climFileIO.define_ave_file(main_comm.is_manager(),spec.serial,global_var_list,local_var_list,meta_list,hist_dict,spec.hist_type, ave_descr,spec.prefix,outfile,spec.split,split_hem[regions['GIN']],spec.out_directory,main_comm,spec.ncformat, ave_t.average_types[ave_descr[0]]['months_to_average'][0],key,spec.clobber,int(spec.year0),int(spec.year1),ave_date,attributes,variables) # If using time slice files, open all files now if (len(local_var_list) > 0): if (spec.hist_type == 'slice' and (spec.serial or not main_comm.is_manager())): file_dict,open_list = climFileIO.open_all_files(hist_dict,ave_t.average_types[ave_descr[0]]['months_to_average'], years,local_var_list[0],'null',ave_descr[0],False,int(spec.year0)) # Loop over each variable in the local list and read/operate on/write for nc_var in local_var_list: if not main_comm.is_manager() or spec.serial: # Slave print('Computing ice_pre_proc for', nc_var) # Get variable/region names if ('time' in nc_var): get_var_name = 'aice' var_name = 'time' else: var_name,reg = nc_var.split('_mo_') if ('ext' in var_name): var_name = var_name else: var_name = var_name[1:] if ('ext' in var_name or 'ai' in var_name): get_var_name = 'aice' else: get_var_name = var_name # Get observation lat,lon,area obs_file = spec.ice_obs_file tarea = 'TAREA' tlong = 'TLONG' tlat = 'TLAT' o_lat,o_lon,o_area = self.read_obs(obs_file,tarea,tlong,tlat) # If using time series files, open the variable's file now if (spec.hist_type == 'series'): if spec.split: split_name = split_hem[regions[reg]] else: split_name = '' file_dict,open_list = climFileIO.open_all_files(hist_dict,ave_t.average_types[ave_descr[0]]['months_to_average'], years,get_var_name,split_name,ave_descr[0],False,int(spec.year0)) time_slice = 0 for year in years: for m in months: if not main_comm.is_manager() or spec.serial: # Slave if ('time' in nc_var): var_sum = rover.fetch_slice(hist_dict, year, m, var_name, file_dict) else: # Get month slice var_slice = rover.fetch_slice(hist_dict, year, m, get_var_name, file_dict) lat,lon = var_slice.shape full_lat,full_lon = o_lat.shape if spec.split: fill = full_lat-lat missing_vals = np.zeros((fill,lon)) var_slice = np.array(var_slice) var_slice[var_slice >= 1e+20] = 0 if regions[reg] == 1: var_slice = np.concatenate((var_slice,missing_vals),axis=0) else: var_slice = np.concatenate((missing_vals,var_slice),axis=0) # Get ai factor if ('ext' in var_name or 'ai' in var_name): aimax = np.amax(var_slice) if (aimax < 2): aifac = 100 else: aifac = 1 var_slice = var_slice*aifac # The ext variable is true/false based on the ai variable. Set accordingly if ('ext' in var_name): var_slice = np.array(var_slice) var_slice[var_slice >= 1e+20] = 0 var_slice[var_slice < 15] = 0 var_slice[var_slice >= 15] = 1 # Mult by weight var_slice = var_slice * o_area # Mask the variable to get just this region mask_to_apply = self.read_reg_mask(spec.reg_file,reg) masked_var = MA.masked_where(mask_to_apply==0,var_slice) # Sum the variable var_sum = self.get_sum(masked_var,variables[var_name],var_name) # Pass the average results to master rank for writing var_shape = var_sum.shape var_dtype = var_sum.dtype md_message = {'name':nc_var,'shape':var_shape,'dtype':var_dtype,'average':var_sum,'index':time_slice} if not spec.serial: main_comm.collect(data=md_message, tag=AVE_TAG) if main_comm.is_manager() or spec.serial: # Master # Recv the variable to write if not spec.serial: r_rank,results = main_comm.collect(tag=AVE_TAG) var_sum_results = results['average'] v_name = results['name'] index = results['index'] else: v_name = nc_var var_sum_results = var_sum index = time_slice #Write Var climFileIO.write_averages(all_files_vars, var_sum_results, v_name, index) time_slice = time_slice + 1 # Close timeseries files that are open if (spec.hist_type == 'series' and (not main_comm.is_manager() or spec.serial)): climFileIO.close_all_files(open_list) # Close timeslice files that are open if (len(local_var_list) > 0): if (spec.hist_type == 'slice' and (spec.serial or not main_comm.is_manager())): climFileIO.close_all_files(open_list) # Make sure everyone gets sync'ed up main_comm.sync() # Close the file that was just created if spec.serial or main_comm.is_manager(): new_file.close()
def compute_averages(self,spec): ''' Sets up the run information and computes the averages. @param spec An instance of the Specification class which holds the user settings that define which averages to compute, directories, file prefixes, etc ''' import os,sys import rover import climAverager import climFileIO import average_types as ave_t import regionOpts import string import collections from asaptools import timekeeper from asaptools import partition #============================================================================== # # Initialize # #============================================================================== # Initialize the timekeeper class and start 'total' timer timer = timekeeper.TimeKeeper() timer.start("Total Time") # Initialize some timers that are not used by all tasks timer.reset("Send Average Time") timer.reset("Variable fetch time") timer.reset("Recv Average Time") timer.reset("Write Netcdf Averages") timer.reset("Variable fetch time") timer.reset("Time to compute Average") # Check average list to make sure it complies with the standards ave_t.average_compliance(spec.avg_list) # Check if I'm the global master g_master = spec.main_comm.is_manager() for tag in spec.m_id: file_pattern = list(spec.file_pattern) if ('-999' not in tag): prefix = spec.prefix + '_' + tag p_index = file_pattern.index('$prefix') t_index = file_pattern.index('$m_id') for i in range(p_index+1,t_index+1): del file_pattern[p_index+1] else: prefix = spec.prefix # Sort through the average list and figure out dependencies and do # averages in steps if need be. avg_dict = {0:spec.avg_list} for i in range(1,20): avg_dict[i] = [] avg_dict = ave_t.sort_depend(avg_dict,0,spec.out_directory,prefix,spec.regions) print avg_dict # Initialize the tag for the average send/recv AVE_TAG = 40 VNAME_TAG = 41 #start_level = 0 start_level = min(avg_dict.keys()) found_level = False #for i in range(0,len(avg_dict)): # if found_level == False: # if (i in avg_dict): # start_level = i # found_level = True ordered_avg_dict = collections.OrderedDict(avg_dict) #for i in range(start_level,len(avg_dict)): for i,value in ordered_avg_dict.items(): # Initialize some containers var_list = [] full_hist_dict = {} hist_dict = {} #============================================================================== # # Set the hist_dict up with file references for all years/months. # Create a list of all variables and meta variables within the file # and set the final variable list passed on user preferences. # #============================================================================== ## Set var_list and file info dictionary timer.start("Define history dictionary") if (spec.hist_type == 'series'): full_hist_dict,full_var_list,meta_list,key = rover.set_slices_and_vars_time_series(spec.in_directory, file_pattern, spec.date_pattern, prefix, spec.suffix, spec.year0, spec.year1, spec.split, spec.split_files) else: full_hist_dict,full_var_list,meta_list,key = rover.set_slices_and_vars_time_slice(spec.in_directory, file_pattern, prefix, spec.suffix, spec.year0, spec.year1) timer.stop("Define history dictionary") # Set variable list. If there was a variable list passed to the averager, use this list. Other wise, # use all variables within the file. if (len(spec.varlist)>0): var_list = spec.varlist for v in full_var_list: if '__meta' in v: var_list.append(v) else: var_list = full_var_list meta_list = list(set(meta_list)) var_list = list(set(var_list)) #============================================================================== # # Workload Distribution # #============================================================================== # Each intercommunicator recieves a list of averages it's responsible for # Each mpi task within that intercommunicator gets a portion of the variable list num_of_avg = len(avg_dict[i]) min_procs_per_ave = min(4,spec.main_comm.get_size()) # Override user selection if they picked less than 2 or # the variable list is less than the min procs per sub-communicator if (min_procs_per_ave < 2 or len(var_list) <= (min_procs_per_ave-1)): min_procs_per_ave = 2 # If running in paralllel mode, split the communicator and partition the averages if (spec.serial == False): size = spec.main_comm.get_size() rank = spec.main_comm.get_rank() # split mpi comm world temp_color = (rank // min_procs_per_ave) % num_of_avg num_of_groups = size/min_procs_per_ave if (temp_color == num_of_groups): temp_color = temp_color - 1 groups = [] for g in range(0,num_of_groups): groups.append(g) #print 'g_rank:',rank,'size:',size,'#of ave:',num_of_avg,'min_procs:',min_procs_per_ave,'temp_color:',temp_color,'#of groups',num_of_groups,'groups:',groups group = groups[temp_color] inter_comm,multi_comm = spec.main_comm.divide(group) color = inter_comm.get_color() lsize = inter_comm.get_size() lrank = inter_comm.get_rank() #g_master = spec.main_comm.is_manager() l_master = inter_comm.is_manager() #print 'global rank: ',rank,'local rank: ',lrank,'color: ',color,'tempcolor: ',temp_color,'group: ',group,'is local master: ',l_master laverages = [] AVE_LIST_TAG = 50 # Partion the average task list amoung the inter/split communicators if (l_master): laverages = multi_comm.partition(avg_dict[i],func=partition.EqualStride(),involved=True) for b in range(1,lsize): laverages_send = inter_comm.ration(data=laverages,tag=AVE_LIST_TAG) else: laverages = inter_comm.ration(tag=AVE_LIST_TAG) else: # Running in serial mode. Just copy the average list. laverages = avg_dict[i] inter_comm = spec.main_comm lsize = inter_comm.get_size() #g_master = spec.main_comm.is_manager() l_master = inter_comm.is_manager() # Partition the variable list between the tasks of each communicator if (lsize > 1 and spec.serial == False): lvar_list = inter_comm.partition(var_list,func=partition.EqualStride(),involved=False) if (l_master): lvar_list = var_list else: lvar_list = var_list #print rank,lvar_list #print(rank,'averages :',laverages, ' vars :',lvar_list) #============================================================================== # # Create the output directory if it doesn't exist # #============================================================================== if spec.serial or g_master: if not os.path.exists(spec.out_directory): os.makedirs(spec.out_directory) spec.main_comm.sync() #============================================================================== # # Main Averaging Loop # #============================================================================== # Files are only split for the first loop. When the depend averages start, they will operate on files # that are already stiched together. if (i != 0): spec.split_name = 'null' spec.split = False spec.split_files = 'null' # Toggle to incate that extra variables were added to the local file list (only do once per average level added_extra_vars = False for ave in laverages: for split_name in spec.split_files.split(","): # Split apart the average info to get type of average and year(s) ave_descr = ave.split(':') if ('hor.meanyr' in ave_descr[0] or 'hor.meanConcat' in ave_descr[0]): ave_name_split = ave_descr[0].split('_') region_num = ave_name_split[len(ave_name_split)-1] region_name = spec.regions[int(region_num)] # Remove the region number as part of the average name ave_descr[0] = ave_name_split[0] else: region_name = 'null' region_num = -99 # If the average depends on other averages that have to be computed, create a new temporary dictionary if '__d' in ave_descr: yr0 = ave_descr[1] if (len(ave_descr) > 2 and '_d' not in ave_descr[2]): yr1 = ave_descr[2] else: yr1 = ave_descr[1] hist_dict = rover.set_slices_and_vars_depend(spec.out_directory, file_pattern, prefix, yr0, yr1, ave_t.average_types[ave_descr[0]],ave_descr[0],region_name) else: hist_dict = dict(full_hist_dict) # If concat' mean_diff_rms files, for each var, also add the _DIFF and _RMS variables. if ('hor.meanConcat' in ave_descr and added_extra_vars==False): new_vars = [] for v in lvar_list: if '__meta' not in v: new_vars.append(v+'_DIFF') new_vars.append(v+'_RMS') lvar_list = lvar_list + new_vars added_extra_vars = True # Create and define the average file timer.start("Create/Define Netcdf File") if (len(ave_descr)<3 or 'hor.meanyr' in ave_descr): ave_date = string.zfill(ave_descr[1],4) ave_date2 = str(ave_descr[1]) else: date1 = string.zfill(ave_descr[1],4) date2 = string.zfill(ave_descr[2],4) ave_date = date1+'-'+date2 ave_date2 = str(ave_descr[1])+'-'+str(ave_descr[2]) outfile_name = climFileIO.get_out_fn(ave_descr[0],prefix,ave_date,ave_t.average_types[ave_descr[0]]['fn'],region_name) if 'zonalavg' in ave_descr: l_collapse_dim = spec.collapse_dim else: l_collapse_dim = '' all_files_vars,new_file = climFileIO.define_ave_file(l_master,spec.serial,var_list,lvar_list,meta_list,hist_dict, spec.hist_type,ave_descr,prefix,outfile_name, spec.split,split_name,spec.out_directory,inter_comm, spec.ncformat,ave_t.average_types[ave_descr[0]]['months_to_average'][0], key,spec.clobber,spec.year0,spec.year1,ave_date2,collapse_dim=l_collapse_dim) timer.stop("Create/Define Netcdf File") # Start loops to compute averages # create a list of years that are needed for this average years = [] if '__d' in ave_descr: if (ave_t.average_types[ave_descr[0]]['depend_type'] == 'month' or '_d' in ave_descr[2]): years.append(int(ave_descr[1])) else: years = list(range(int(ave_descr[1]),int(ave_descr[2])+1)) depend = True else: if (len(ave_descr) == 2): years.append(int(ave_descr[1])) else: years = list(range(int(ave_descr[1]),int(ave_descr[2])+1)) depend = False # Get the first year. If part of a sig avg, this will be the sig first year, not year of indiv average fyr = years[0] if i+1 in avg_dict.keys(): for a in avg_dict[i+1]: if (ave_descr[0]+'_sig') in a: spl = a.split(':') fyr = int(spl[1]) file_dict = [] open_list = [] # Open all of the files that this rank will need for this average (for time slice files) if ((spec.hist_type == 'slice' or '__d' in ave_descr) and (spec.serial or not l_master) and len(lvar_list) > 0): file_dict = [] open_list = [] file_dict,open_list = climFileIO.open_all_files(hist_dict,ave_t.average_types[ave_descr[0]]['months_to_average'], years,lvar_list[0],'null',ave_descr[0],depend,fyr) # If concat of file instead of average, piece file together here. If not, enter averaging loop if (('mavg' in ave_descr or 'moc' in ave_descr or 'annall' in ave_descr or 'mons' in ave_descr or '_mean' in ave_descr[0]) and len(lvar_list) > 0): file_dict = [] open_list = [] if (spec.serial or not l_master): # Open files file_dict,open_list = climFileIO.open_all_files(hist_dict,ave_t.average_types[ave_descr[0]]['months_to_average'], years,lvar_list[0],'null',ave_descr[0],depend,fyr) # Loop through variables and compute the averages for orig_var in lvar_list: # Some variable names were suffixed with a meta label indicaticating that the variable exists in all files, # but there isn't a didicated ts file to open. Pick the first variable off the list and get values from there if ('__meta' in orig_var): var = key else: var = orig_var # Open all of the files that this rank will need for this average (for time series files) if ((spec.hist_type == 'series' and '__d' not in ave_descr) and (spec.serial or not l_master)): if ('mavg' not in ave_descr or 'moc' not in ave_descr or 'annall' not in ave_descr or 'mons' not in ave_descr or '_mean' not in ave_descr[0]): file_dict = [] open_list = [] file_dict,open_list = climFileIO.open_all_files(hist_dict,ave_t.average_types[ave_descr[0]]['months_to_average'], years,var,split_name,ave_descr[0],depend,fyr) # We now have open files to pull values from. Now reset var name if ('__meta' in orig_var): parts = orig_var.split('__') var = parts[0] # If concat, all of the procs will participate in this call if ('mavg' in ave_descr or 'moc' in ave_descr or 'mocm' in ave_descr or 'hor.meanConcat' in ave_descr or 'annall' in ave_descr or 'mons' in ave_descr or '_mean' in ave_descr[0] or 'zonalavg' in ave_descr): if 'zonalavg' in ave_descr: l_collapse_dim = spec.collapse_dim else: l_collapse_dim = '' # Concat var_avg_results = climAverager.time_concat(var,years,hist_dict,ave_t.average_types[ave_descr[0]], file_dict,ave_descr[0],inter_comm,all_files_vars,spec.serial,timer,collapse_dim=spec.collapse_dim) # Else (not concat), each slave will compute averages and each master will collect and write else: if spec.serial or not l_master: # mean_diff_rsm file if ('hor.meanyr' in ave_descr and '__meta' not in orig_var): obs_file = spec.obs_dir+"/"+spec.obs_file reg_obs_file = spec.obs_dir+"/"+region_name+spec.reg_obs_file_suffix # The mean diff rsm function will send the variables once they are created var_avg_results,var_DIFF_results,var_RMS_results = climAverager.mean_diff_rms(var,region_name,region_num,spec.region_nc_var, spec.region_wgt_var,years,hist_dict,ave_t.average_types[ave_descr[0]],file_dict,obs_file, reg_obs_file,inter_comm,spec.serial,VNAME_TAG,AVE_TAG,spec.vertical_levels) else: if ('__metaChar' in orig_var): # Handle special meta var_avg_results = climAverager.get_metaCharValue(var,years,hist_dict,ave_t.average_types[ave_descr[0]], file_dict,timer) else: # Average if (spec.weighted == True and 'weights' in ave_t.average_types[ave_descr[0]]): var_avg_results = climAverager.weighted_avg_var(var,years,hist_dict, ave_t.average_types[ave_descr[0]],file_dict,ave_descr[0],timer,depend,fyr) else: var_avg_results = climAverager.avg_var(var,years,hist_dict, ave_t.average_types[ave_descr[0]],file_dict,ave_descr[0],timer,depend,fyr) # Close all open files (for time series files) if ((spec.hist_type == 'series' and '__d' not in ave_descr) and (spec.serial or not l_master)): climFileIO.close_all_files(open_list) # Pass the average results to master rank for writing var_shape = var_avg_results.shape var_dtype = var_avg_results.dtype var_type = type(var_avg_results) md_message = {'name':var,'shape':var_shape,'dtype':var_dtype,'average':var_avg_results,'type':var_type} if not spec.serial: timer.start("Send Average Time") #inter_comm.collect(data=md_message, tag=AVE_TAG) inter_comm.collect(data=var, tag=VNAME_TAG) inter_comm.collect(data=var_avg_results, tag=AVE_TAG) timer.stop("Send Average Time") if spec.serial or l_master: # If ave_descr is hor.meanyr, there will be three variables to write for each variable. # Other wise, there will only be 1 if ('hor.meanyr' in ave_descr and '__meta' not in orig_var): var_cnt = 3 else: var_cnt = 1 for r in range(0,var_cnt): if not spec.serial: timer.start("Recv Average Time") #r_rank,results = inter_comm.collect(tag=AVE_TAG) #r_var_avg_results = results['average'] r_rank,var_name = inter_comm.collect(tag=VNAME_TAG) r_rank,r_var_avg_results = inter_comm.collect(tag=AVE_TAG) #var_name = results['name'] timer.start("Recv Average Time") else: var_name = var r_var_avg_results = var_avg_results timer.start("Write Netcdf Averages") climFileIO.write_averages(all_files_vars, r_var_avg_results, var_name) if ('hor.meanyr' in ave_descr and spec.serial) and '__meta' not in orig_var: climFileIO.write_averages(all_files_vars, var_DIFF_results, var_name+'_DIFF') climFileIO.write_averages(all_files_vars, var_RMS_results, var_name+'_RMS') timer.stop("Write Netcdf Averages") # Close all open files (for time slice files) if (('mavg' in ave_descr or 'moc__d'==ave_descr[0] or 'annall' in ave_descr or 'mons' in ave_descr or '_mean' in ave_descr[0]) and len(lvar_list) > 0): if (spec.serial or not l_master): climFileIO.close_all_files(open_list) elif ((spec.hist_type == 'slice' or '__d' in ave_descr)and (spec.serial or not l_master) and len(lvar_list) > 0): climFileIO.close_all_files(open_list) # Sync the local communicator before closing the averaged netcdf file and moving to the next average inter_comm.sync() # Close the newly created average file if spec.serial or l_master: new_file.close() # If needed, stitch spatially split files together. if spec.serial or l_master: if (len(spec.split_files.split(",")) > 1): fn1 = spec.out_directory+'/nh_'+outfile_name fn2 = spec.out_directory+'/sh_'+outfile_name out_fn = spec.out_directory+'/'+outfile_name dim_info = spec.split_orig_size.split(",") dim1 = dim_info[0].split("=") dim2 = dim_info[1].split("=") regionOpts.combine_regions(fn1, fn2, out_fn, dim1[0], int(dim1[1]), dim2[0], int(dim2[1]), "nj", spec.clobber) if not spec.serial: # Free the inter-communicators #intercomm.Free() # Sync all mpi tasks / All averages should have been computed at this point spec.main_comm.sync() #============================================================================== # # Collect and print timing information # #============================================================================== timer.stop("Total Time") my_times = spec.main_comm.allreduce(timer.get_all_times(),'max') if g_master: print("==============================================") print "COMPLETED SUCCESSFULLY" print my_times print("==============================================")
def create_pre_proc(self,spec): ''' Creates the CICE pre_proc file. @param spec An instance of the Specification class which holds the user settings that define which averages to compute, directories, file prefixes, etc ''' variables = {'hi':{'factor':1.0e-13,'units':'1.E+13 m3'}, 'ai':{'factor':1.0e-14,'units':'1.E+13 m3'}, 'ext':{'factor':1.0e-12,'units':'1.E+12 m2'}, 'hs':{'factor':1.0e-13,'units':'1.E+12 m2'}, } # All of the region names, with 0=Northern Hem and 1=Southern Hem regions = {'nh':0, 'sh':1, 'Lab':0,'GIN':0,'Bar':0,'ArcOc':0,'Sib':0,'Beau':0, 'CArc':0,'Bering':0,'Okhotsk':0,'Hudson':0,'CAArch':0, 'Wed':1,'Ross':1,'Ind':1,'Pac':1,'BAm':1} split_hem = spec.split_files.split(',') attributes = {'missing_value':1.e+30, 'coordinates':'time', 'cell_methods':'time:mean','_FillValue':1.e+30} poly_masks = {} ave_descr = ['preproc',str(spec.year0),str(spec.year1)] AVE_TAG = 40 time_dim = 'time' years = list(range(int(spec.year0),int(spec.year1)+1)) months = ave_t.average_types[ave_descr[0]]['months_to_average'] # Initialize simplecomm (MPI wrappers) main_comm = spec.main_comm # If the region mask file doesn't exist, have root call ncl to create it if (not os.path.isfile(spec.reg_file) and (main_comm.is_manager() or spec.serial)): import subprocess os.environ['GRIDFILE'] = spec.ice_obs_file os.environ['REGIONFILE'] = spec.reg_file ncl_command = 'ncl < '+ spec.ncl_location +'/ice_pre_proc_mask.ncl' subprocess.call(ncl_command,shell=True) # make sure to have all ranks sync to prevent ranks other than root from continuing on without a region mask file main_comm.sync() # Get the history dictionary that lists were files are located for each time slice, a variable list, meta list, and a key lookup variable if (spec.hist_type == 'series'): hist_dict,file_var_list,meta_list,key = rover.set_slices_and_vars_time_series(spec.in_directory, spec.file_pattern, spec.date_pattern, spec.prefix, spec.suffix, int(spec.year0), int(spec.year1), spec.split, spec.split_files) else: hist_dict,file_var_list,meta_list,key = rover.set_slices_and_vars_time_slice(spec.in_directory, spec.file_pattern, spec.prefix, spec.suffix, int(spec.year0), int(spec.year1)) # Loop over the regions and variable names to get full list of variables global_var_list = [] for reg in regions: for var in variables: if ('ext' in var): global_var_list.append(var+'_mo_'+reg) else: global_var_list.append('v'+var+'_mo_'+reg) global_var_list.append('time') # Partition the global variable list between the MPI ranks local_var_list = main_comm.partition(global_var_list,func=partition.EqualLength(),involved=False) # If master/root, give it the full variable list if main_comm.is_manager() or spec.serial: local_var_list = global_var_list meta_list = [] # Define the netcdf file outfile = 'ice_vol_'+spec.prefix[:-7]+'_'+str(spec.year0)+'-'+str(spec.year1)+'.nc' ave_date = str(spec.year0)+'-'+str(spec.year1) all_files_vars,new_file = climFileIO.define_ave_file(main_comm.is_manager(),spec.serial,global_var_list,local_var_list,meta_list,hist_dict,spec.hist_type, ave_descr,spec.prefix,outfile,spec.split,split_hem[regions['GIN']],spec.out_directory,main_comm,spec.ncformat, ave_t.average_types[ave_descr[0]]['months_to_average'][0],key,spec.clobber,int(spec.year0),int(spec.year1),ave_date,attributes,variables) # If using time slice files, open all files now if (len(local_var_list) > 0): if (spec.hist_type == 'slice' and (spec.serial or not main_comm.is_manager())): file_dict,open_list = climFileIO.open_all_files(hist_dict,ave_t.average_types[ave_descr[0]]['months_to_average'], years,local_var_list[0],'null',ave_descr[0],False,int(spec.year0)) # Loop over each variable in the local list and read/operate on/write for nc_var in local_var_list: if not main_comm.is_manager() or spec.serial: # Slave print('Computing ice_pre_proc for', nc_var) # Get variable/region names if ('time' in nc_var): get_var_name = 'aice' var_name = 'time' else: var_name,reg = nc_var.split('_mo_') if ('ext' in var_name): var_name = var_name else: var_name = var_name[1:] if ('ext' in var_name or 'ai' in var_name): get_var_name = 'aice' else: get_var_name = var_name # Get observation lat,lon,area obs_file = spec.ice_obs_file tarea = 'TAREA' tlong = 'TLONG' tlat = 'TLAT' # Read in the ice observation file to get area, lat, and lon values. obs_file_hndl = Nio.open_file(obs_file,'r') o_lat = obs_file_hndl.variables[tlat] o_lon = obs_file_hndl.variables[tlong] o_area = obs_file_hndl.variables[tarea] o_area = o_area[:]*1.0e-4 # If using time series files, open the variable's file now if (spec.hist_type == 'series'): if spec.split: split_name = split_hem[regions[reg]] else: split_name = '' file_dict,open_list = climFileIO.open_all_files(hist_dict,ave_t.average_types[ave_descr[0]]['months_to_average'], years,get_var_name,split_name,ave_descr[0],False,int(spec.year0)) time_slice = 0 for year in years: for m in months: if not main_comm.is_manager() or spec.serial: # Slave if ('time' in nc_var): var_sum = rover.fetch_slice(hist_dict, year, m, var_name, file_dict) else: # Get month slice var_slice = rover.fetch_slice(hist_dict, year, m, get_var_name, file_dict) lat,lon = var_slice.shape full_lat,full_lon = o_lat.shape if spec.split: fill = full_lat-lat missing_vals = np.zeros((fill,lon)) var_slice = np.array(var_slice) var_slice[var_slice >= 1e+20] = 0 if regions[reg] == 1: var_slice = np.concatenate((var_slice,missing_vals),axis=0) else: var_slice = np.concatenate((missing_vals,var_slice),axis=0) # Get ai factor if ('ext' in var_name or 'ai' in var_name): aimax = np.amax(var_slice) if (aimax < 2): aifac = 100 else: aifac = 1 var_slice = var_slice*aifac # The ext variable is true/false based on the ai variable. Set accordingly if ('ext' in var_name): var_slice = np.array(var_slice) var_slice[var_slice >= 1e+20] = 0 var_slice[var_slice < 15] = 0 var_slice[var_slice >= 15] = 1 # Mult by weight var_slice = var_slice * o_area # Mask the variable to get just this region mask_to_apply = self.read_reg_mask(spec.reg_file,reg) masked_var = MA.masked_where(mask_to_apply==0,var_slice) # Sum the variable var_sum = self.get_sum(masked_var,variables[var_name],var_name) # Pass the average results to master rank for writing var_shape = var_sum.shape var_dtype = var_sum.dtype md_message = {'name':nc_var,'shape':var_shape,'dtype':var_dtype,'average':var_sum,'index':time_slice} if not spec.serial: main_comm.collect(data=md_message, tag=AVE_TAG) if main_comm.is_manager() or spec.serial: # Master # Recv the variable to write if not spec.serial: r_rank,results = main_comm.collect(tag=AVE_TAG) var_sum_results = results['average'] v_name = results['name'] index = results['index'] else: v_name = nc_var var_sum_results = var_sum index = time_slice #Write Var climFileIO.write_averages(all_files_vars, var_sum_results, v_name, index) time_slice = time_slice + 1 # Close timeseries files that are open if (spec.hist_type == 'series' and (not main_comm.is_manager() or spec.serial)): climFileIO.close_all_files(open_list) # Close timeslice files that are open if (len(local_var_list) > 0): if (spec.hist_type == 'slice' and (spec.serial or not main_comm.is_manager())): climFileIO.close_all_files(open_list) # Make sure everyone gets sync'ed up main_comm.sync() # Close the file that was just created if spec.serial or main_comm.is_manager(): new_file.close()