def setUp(self): self.gcomm = simplecomm.create_comm() self.gsize = MPI_COMM_WORLD.Get_size() self.grank = MPI_COMM_WORLD.Get_rank() self.groups = ['a', 'b', 'c'] self.rank = int(self.grank // len(self.groups)) self.color = int(self.grank % len(self.groups)) self.group = self.groups[self.color] self.monocomm, self.multicomm = self.gcomm.divide(self.group) self.all_colors = [i % len(self.groups) for i in range(self.gsize)] self.all_groups = [self.groups[i] for i in self.all_colors] self.all_ranks = [int(i // len(self.groups)) for i in range(self.gsize)]
def setUp(self): self.gcomm = simplecomm.create_comm() self.gsize = MPI_COMM_WORLD.Get_size() self.grank = MPI_COMM_WORLD.Get_rank() self.groups = ['a', 'b', 'c'] self.rank = int(self.grank // len(self.groups)) self.color = int(self.grank % len(self.groups)) self.group = self.groups[self.color] self.monocomm, self.multicomm = self.gcomm.divide(self.group) self.all_colors = [i % len(self.groups) for i in range(self.gsize)] self.all_groups = [self.groups[i] for i in self.all_colors] self.all_ranks = [ int(i // len(self.groups)) for i in range(self.gsize) ]
def setUp(self): # COMM_WORLD Communicator and its size and # this MPI process's world rank self.gcomm = simplecomm.create_comm() self.gsize = MPI_COMM_WORLD.Get_size() self.grank = MPI_COMM_WORLD.Get_rank() # The group names to assume when dividing COMM_WORLD self.groups = ['a', 'b', 'c'] # This MPI process's rank, color, and group after division self.rank = self.grank / len(self.groups) self.color = self.grank % len(self.groups) self.group = self.groups[self.color] # The divided communicators (monocolor and multicolor) self.monocomm, self.multicomm = self.gcomm.divide(self.group) # Every MPI process's color, group, and grank after division self.all_colors = [i % len(self.groups) for i in xrange(self.gsize)] self.all_groups = [self.groups[i] for i in self.all_colors] self.all_ranks = [i / len(self.groups) for i in xrange(self.gsize)]
def __init__(self, specifier, serial=False, verbosity=1, wmode='w', once=False, simplecomm=None): """ Constructor Parameters: specifier (Specifier): An instance of the Specifier class, defining the input specification for this reshaper operation. serial (bool): True or False, indicating whether the operation should be performed in serial (True) or parallel (False). The default is to assume parallel operation (but serial will be chosen if the mpi4py cannot be found when trying to initialize decomposition. verbosity(int): Level of printed output (stdout). A value of 0 means no output, and a higher value means more output. The default value is 1. wmode (str): The mode to use for writing output. Can be 'w' for normal write operation, 's' to skip the output generation for existing time-series files, 'o' to overwrite existing time-series files, 'a' to append to existing time-series files. once (bool): True or False, indicating whether the Reshaper should write all metadata to a 'once' file (separately). simplecomm (SimpleComm): A SimpleComm object to handle the parallel communication, if necessary """ # Type checking (or double-checking) if not isinstance(specifier, Specifier): err_msg = "Input must be given in the form of a Specifier object" raise TypeError(err_msg) if type(serial) is not bool: err_msg = "Serial indicator must be True or False." raise TypeError(err_msg) if type(verbosity) is not int: err_msg = "Verbosity level must be an integer." raise TypeError(err_msg) if type(wmode) is not str: err_msg = "Write mode flag must be a str." raise TypeError(err_msg) if type(once) is not bool: err_msg = "Once-file indicator must be True or False." raise TypeError(err_msg) if simplecomm is not None: if not isinstance(simplecomm, SimpleComm): err_msg = "Simple communicator object is not a SimpleComm" raise TypeError(err_msg) if wmode not in ['w', 's', 'o', 'a']: err_msg = "Write mode '{}' not recognized".format(wmode) raise ValueError(err_msg) # Whether to write a once file self._use_once_file = once # The output write mode to use self._write_mode = wmode # Internal timer data self._timer = TimeKeeper() # Dictionary storing read/write data amounts self.assumed_block_size = float(4 * 1024 * 1024) self._byte_counts = {} self._timer.start('Initializing Simple Communicator') if simplecomm is None: simplecomm = create_comm(serial=serial) # Reference to the simple communicator self._simplecomm = simplecomm self._timer.stop('Initializing Simple Communicator') # Contruct the print header header = ''.join([ '[', str(self._simplecomm.get_rank()), '/', str(self._simplecomm.get_size()), '] ' ]) # Reference to the verbose printer tool self._vprint = VPrinter(header=header, verbosity=verbosity) # Debug output starting if self._simplecomm.is_manager(): self._vprint('Initializing Reshaper...', verbosity=0) # Validate the user input data self._timer.start('Specifier Validation') specifier.validate() self._timer.stop('Specifier Validation') if self._simplecomm.is_manager(): self._vprint(' Specifier validated', verbosity=1) # Store the input file names self._input_filenames = specifier.input_file_list # Store the list of metadata names self._metadata_names = specifier.time_variant_metadata # Store the output file prefix and suffix self._output_prefix = specifier.output_file_prefix self._output_suffix = specifier.output_file_suffix # Setup PyNIO options (including disabling the default PreFill option) opt = nio_options() opt.PreFill = False # Determine the Format and CompressionLevel options # from the NetCDF format string in the Specifier if specifier.netcdf_format == 'netcdf': opt.Format = 'Classic' elif specifier.netcdf_format in ['netcdf4', 'netcdf4c']: opt.Format = 'NetCDF4Classic' opt.CompressionLevel = specifier.compression_level self._nio_options = opt if self._simplecomm.is_manager(): self._vprint(' PyNIO options set', verbosity=1) # Helpful debugging message if self._simplecomm.is_manager(): self._vprint('Reshaper initialized.', verbosity=0) # Sync before continuing.. self._simplecomm.sync()
def __init__(self, specifier, serial=False, verbosity=1, wmode='w', once=False, simplecomm=None): """ Constructor Parameters: specifier (Specifier): An instance of the Specifier class, defining the input specification for this reshaper operation. serial (bool): True or False, indicating whether the operation should be performed in serial (True) or parallel (False). The default is to assume parallel operation (but serial will be chosen if the mpi4py cannot be found when trying to initialize decomposition. verbosity(int): Level of printed output (stdout). A value of 0 means no output, and a higher value means more output. The default value is 1. wmode (str): The mode to use for writing output. Can be 'w' for normal write operation, 's' to skip the output generation for existing time-series files, 'o' to overwrite existing time-series files, 'a' to append to existing time-series files. once (bool): True or False, indicating whether the Reshaper should write all metadata to a 'once' file (separately). simplecomm (SimpleComm): A SimpleComm object to handle the parallel communication, if necessary """ # Type checking (or double-checking) if not isinstance(specifier, Specifier): err_msg = "Input must be given in the form of a Specifier object" raise TypeError(err_msg) if type(serial) is not bool: err_msg = "Serial indicator must be True or False." raise TypeError(err_msg) if type(verbosity) is not int: err_msg = "Verbosity level must be an integer." raise TypeError(err_msg) if type(wmode) is not str: err_msg = "Write mode flag must be a str." raise TypeError(err_msg) if type(once) is not bool: err_msg = "Once-file indicator must be True or False." raise TypeError(err_msg) if simplecomm is not None: if not isinstance(simplecomm, SimpleComm): err_msg = "Simple communicator object is not a SimpleComm" raise TypeError(err_msg) if wmode not in ['w', 's', 'o', 'a']: err_msg = "Write mode '{}' not recognized".format(wmode) raise ValueError(err_msg) # Whether to write a once file self._use_once_file = once # The output write mode to use self._write_mode = wmode # Internal timer data self._timer = TimeKeeper() # Dictionary storing read/write data amounts self.assumed_block_size = float(4 * 1024 * 1024) self._byte_counts = {} self._timer.start('Initializing Simple Communicator') if simplecomm is None: simplecomm = create_comm(serial=serial) # Reference to the simple communicator self._simplecomm = simplecomm self._timer.stop('Initializing Simple Communicator') # Contruct the print header header = ''.join(['[', str(self._simplecomm.get_rank()), '/', str(self._simplecomm.get_size()), '] ']) # Reference to the verbose printer tool self._vprint = VPrinter(header=header, verbosity=verbosity) # Debug output starting if self._simplecomm.is_manager(): self._vprint('Initializing Reshaper...', verbosity=0) # Validate the user input data self._timer.start('Specifier Validation') specifier.validate() self._timer.stop('Specifier Validation') if self._simplecomm.is_manager(): self._vprint(' Specifier validated', verbosity=1) # Store the input file names self._input_filenames = specifier.input_file_list # Store the list of metadata names self._metadata_names = specifier.time_variant_metadata # Store the output file prefix and suffix self._output_prefix = specifier.output_file_prefix self._output_suffix = specifier.output_file_suffix # Setup PyNIO options (including disabling the default PreFill option) opt = nio_options() opt.PreFill = False # Determine the Format and CompressionLevel options # from the NetCDF format string in the Specifier if specifier.netcdf_format == 'netcdf': opt.Format = 'Classic' elif specifier.netcdf_format in ['netcdf4', 'netcdf4c']: opt.Format = 'NetCDF4Classic' opt.CompressionLevel = specifier.compression_level self._nio_options = opt if self._simplecomm.is_manager(): self._vprint(' PyNIO options set', verbosity=1) # Helpful debugging message if self._simplecomm.is_manager(): self._vprint('Reshaper initialized.', verbosity=0) # Sync before continuing.. self._simplecomm.sync()
def main(argv): print 'Running pyEnsSumPop!' # Get command line stuff and store in a dictionary s = 'nyear= nmonth= npert= tag= res= mach= compset= sumfile= indir= tslice= verbose jsonfile= mpi_enable zscoreonly nrand= rand seq= jsondir=' optkeys = s.split() try: opts, args = getopt.getopt(argv, "h", optkeys) except getopt.GetoptError: pyEnsLib.EnsSumPop_usage() sys.exit(2) # Put command line options in a dictionary - also set defaults opts_dict = {} # Defaults opts_dict['tag'] = 'cesm1_2_0' opts_dict['compset'] = 'FC5' opts_dict['mach'] = 'yellowstone' opts_dict['tslice'] = 0 opts_dict['nyear'] = 3 opts_dict['nmonth'] = 12 opts_dict['npert'] = 40 opts_dict['nbin'] = 40 opts_dict['minrange'] = 0.0 opts_dict['maxrange'] = 4.0 opts_dict['res'] = 'ne30_ne30' opts_dict['sumfile'] = 'ens.pop.summary.nc' opts_dict['indir'] = './' opts_dict['jsonfile'] = '' opts_dict['verbose'] = True opts_dict['mpi_enable'] = False opts_dict['zscoreonly'] = False opts_dict['popens'] = True opts_dict['nrand'] = 40 opts_dict['rand'] = False opts_dict['seq'] = 0 opts_dict['jsondir'] = '/glade/scratch/haiyingx/' # This creates the dictionary of input arguments print "before parseconfig" opts_dict = pyEnsLib.getopt_parseconfig(opts, optkeys, 'ESP', opts_dict) verbose = opts_dict['verbose'] nbin = opts_dict['nbin'] if verbose: print opts_dict # Now find file names in indir input_dir = opts_dict['indir'] # Create a mpi simplecomm object if opts_dict['mpi_enable']: me = simplecomm.create_comm() else: me = simplecomm.create_comm(not opts_dict['mpi_enable']) if opts_dict['jsonfile']: # Read in the included var list Var2d, Var3d = pyEnsLib.read_jsonlist(opts_dict['jsonfile'], 'ESP') str_size = 0 for str in Var3d: if str_size < len(str): str_size = len(str) for str in Var2d: if str_size < len(str): str_size = len(str) in_files = [] if (os.path.exists(input_dir)): # Pick up the 'nrand' random number of input files to generate summary files if opts_dict['rand']: in_files = pyEnsLib.Random_pickup_pop(input_dir, opts_dict, opts_dict['nrand']) else: # Get the list of files in_files_temp = os.listdir(input_dir) in_files = sorted(in_files_temp) # Make sure we have enough num_files = len(in_files) else: print 'Input directory: ', input_dir, ' not found' sys.exit(2) # Create a mpi simplecomm object if opts_dict['mpi_enable']: me = simplecomm.create_comm() else: me = simplecomm.create_comm(not opts_dict['mpi_enable']) #Partition the input file list in_file_list = me.partition(in_files, func=EqualStride(), involved=True) # Open the files in the input directory o_files = [] for onefile in in_file_list: if (os.path.isfile(input_dir + '/' + onefile)): o_files.append(Nio.open_file(input_dir + '/' + onefile, "r")) else: print "COULD NOT LOCATE FILE " + input_dir + onefile + "! EXITING...." sys.exit() print in_file_list # Store dimensions of the input fields if (verbose == True): print "Getting spatial dimensions" nlev = -1 nlat = -1 nlon = -1 # Look at first file and get dims input_dims = o_files[0].dimensions ndims = len(input_dims) # Make sure all files have the same dimensions for key in input_dims: if key == "z_t": nlev = input_dims["z_t"] elif key == "nlon": nlon = input_dims["nlon"] elif key == "nlat": nlat = input_dims["nlat"] for count, this_file in enumerate(o_files): input_dims = this_file.dimensions if ( nlev != int(input_dims["z_t"]) or ( nlat != int(input_dims["nlat"]))\ or ( nlon != int(input_dims["nlon"]))): print "Dimension mismatch between ", in_file_list[ 0], 'and', in_file_list[count], '!!!' sys.exit() # Create new summary ensemble file this_sumfile = opts_dict["sumfile"] if verbose: print "Creating ", this_sumfile, " ..." if (me.get_rank() == 0): if os.path.exists(this_sumfile): os.unlink(this_sumfile) opt = Nio.options() opt.PreFill = False opt.Format = 'NetCDF4Classic' nc_sumfile = Nio.open_file(this_sumfile, 'w', options=opt) # Set dimensions if (verbose == True): print "Setting dimensions ....." nc_sumfile.create_dimension('nlat', nlat) nc_sumfile.create_dimension('nlon', nlon) nc_sumfile.create_dimension('nlev', nlev) nc_sumfile.create_dimension('time', None) nc_sumfile.create_dimension('ens_size', opts_dict['npert']) nc_sumfile.create_dimension('nbin', opts_dict['nbin']) nc_sumfile.create_dimension('nvars', len(Var3d) + len(Var2d)) nc_sumfile.create_dimension('nvars3d', len(Var3d)) nc_sumfile.create_dimension('nvars2d', len(Var2d)) nc_sumfile.create_dimension('str_size', str_size) # Set global attributes now = time.strftime("%c") if (verbose == True): print "Setting global attributes ....." setattr(nc_sumfile, 'creation_date', now) setattr(nc_sumfile, 'title', 'POP verification ensemble summary file') setattr(nc_sumfile, 'tag', opts_dict["tag"]) setattr(nc_sumfile, 'compset', opts_dict["compset"]) setattr(nc_sumfile, 'resolution', opts_dict["res"]) setattr(nc_sumfile, 'machine', opts_dict["mach"]) # Create variables if (verbose == True): print "Creating variables ....." v_lev = nc_sumfile.create_variable("lev", 'f', ('nlev', )) v_vars = nc_sumfile.create_variable("vars", 'S1', ('nvars', 'str_size')) v_var3d = nc_sumfile.create_variable("var3d", 'S1', ('nvars3d', 'str_size')) v_var2d = nc_sumfile.create_variable("var2d", 'S1', ('nvars2d', 'str_size')) v_time = nc_sumfile.create_variable("time", 'd', ('time', )) v_ens_avg3d = nc_sumfile.create_variable( "ens_avg3d", 'f', ('time', 'nvars3d', 'nlev', 'nlat', 'nlon')) v_ens_stddev3d = nc_sumfile.create_variable( "ens_stddev3d", 'f', ('time', 'nvars3d', 'nlev', 'nlat', 'nlon')) v_ens_avg2d = nc_sumfile.create_variable( "ens_avg2d", 'f', ('time', 'nvars2d', 'nlat', 'nlon')) v_ens_stddev2d = nc_sumfile.create_variable( "ens_stddev2d", 'f', ('time', 'nvars2d', 'nlat', 'nlon')) v_RMSZ = nc_sumfile.create_variable( "RMSZ", 'f', ('time', 'nvars', 'ens_size', 'nbin')) if not opts_dict['zscoreonly']: v_gm = nc_sumfile.create_variable("global_mean", 'f', ('time', 'nvars', 'ens_size')) # Assign vars, var3d and var2d if (verbose == True): print "Assigning vars, var3d, and var2d ....." eq_all_var_names = [] eq_d3_var_names = [] eq_d2_var_names = [] all_var_names = list(Var3d) all_var_names += Var2d l_eq = len(all_var_names) for i in range(l_eq): tt = list(all_var_names[i]) l_tt = len(tt) if (l_tt < str_size): extra = list(' ') * (str_size - l_tt) tt.extend(extra) eq_all_var_names.append(tt) l_eq = len(Var3d) for i in range(l_eq): tt = list(Var3d[i]) l_tt = len(tt) if (l_tt < str_size): extra = list(' ') * (str_size - l_tt) tt.extend(extra) eq_d3_var_names.append(tt) l_eq = len(Var2d) for i in range(l_eq): tt = list(Var2d[i]) l_tt = len(tt) if (l_tt < str_size): extra = list(' ') * (str_size - l_tt) tt.extend(extra) eq_d2_var_names.append(tt) v_vars[:] = eq_all_var_names[:] v_var3d[:] = eq_d3_var_names[:] v_var2d[:] = eq_d2_var_names[:] # Time-invarient metadata if (verbose == True): print "Assigning time invariant metadata ....." vars_dict = o_files[0].variables lev_data = vars_dict["z_t"] v_lev = lev_data # Time-varient metadata if verbose: print "Assigning time variant metadata ....." vars_dict = o_files[0].variables time_value = vars_dict['time'] time_array = np.array([time_value]) time_array = pyEnsLib.gather_npArray_pop(time_array, me, (me.get_size(), )) if me.get_rank() == 0: v_time[:] = time_array[:] # Calculate global mean, average, standard deviation if verbose: print "Calculating global means ....." is_SE = False tslice = 0 if not opts_dict['zscoreonly']: gm3d, gm2d = pyEnsLib.generate_global_mean_for_summary( o_files, Var3d, Var2d, is_SE, False, opts_dict) if verbose: print "Finish calculating global means ....." # Calculate RMSZ scores if (verbose == True): print "Calculating RMSZ scores ....." zscore3d, zscore2d, ens_avg3d, ens_stddev3d, ens_avg2d, ens_stddev2d, temp1, temp2 = pyEnsLib.calc_rmsz( o_files, Var3d, Var2d, is_SE, opts_dict) # Collect from all processors if opts_dict['mpi_enable']: # Gather the 3d variable results from all processors to the master processor # Gather global means 3d results if not opts_dict['zscoreonly']: gmall = np.concatenate((gm3d, gm2d), axis=0) #print "before gather, gmall.shape=",gmall.shape gmall = pyEnsLib.gather_npArray_pop( gmall, me, (me.get_size(), len(Var3d) + len(Var2d), len(o_files))) zmall = np.concatenate((zscore3d, zscore2d), axis=0) zmall = pyEnsLib.gather_npArray_pop( zmall, me, (me.get_size(), len(Var3d) + len(Var2d), len(o_files), nbin)) #print 'zmall=',zmall #print "after gather, gmall.shape=",gmall.shape ens_avg3d = pyEnsLib.gather_npArray_pop( ens_avg3d, me, (me.get_size(), len(Var3d), nlev, (nlat), nlon)) ens_avg2d = pyEnsLib.gather_npArray_pop(ens_avg2d, me, (me.get_size(), len(Var2d), (nlat), nlon)) ens_stddev3d = pyEnsLib.gather_npArray_pop( ens_stddev3d, me, (me.get_size(), len(Var3d), nlev, (nlat), nlon)) ens_stddev2d = pyEnsLib.gather_npArray_pop(ens_stddev2d, me, (me.get_size(), len(Var2d), (nlat), nlon)) # Assign to file: if me.get_rank() == 0: #Zscoreall=np.concatenate((zscore3d,zscore2d),axis=0) v_RMSZ[:, :, :, :] = zmall[:, :, :, :] v_ens_avg3d[:, :, :, :, :] = ens_avg3d[:, :, :, :, :] v_ens_stddev3d[:, :, :, :, :] = ens_stddev3d[:, :, :, :, :] v_ens_avg2d[:, :, :, :] = ens_avg2d[:, :, :, :] v_ens_stddev2d[:, :, :, :] = ens_stddev2d[:, :, :, :] if not opts_dict['zscoreonly']: v_gm[:, :, :] = gmall[:, :, :] print "All done"
if rank == 0: # Update system log with the dates that were just converted debugMsg('before chunking.write_log', header=True, verbosity=1) chunking.write_log('{0}/logs/ts_status.log'.format(caseroot), log) debugMsg('after chunking.write_log', header=True, verbosity=1) scomm.sync() return 0 #=================================== if __name__ == "__main__": # initialize simplecomm object scomm = simplecomm.create_comm(serial=False) # setup an overall timer timer = timekeeper.TimeKeeper() timer.start("Total Time") # get commandline options options = commandline_options() debug = options.debug[0] # initialize global vprinter object for printing debug messages debugMsg = vprinter.VPrinter(header='', verbosity=0) if options.debug: header = 'cesm_tseries_generator: DEBUG... ' debugMsg = vprinter.VPrinter(header=header, verbosity=options.debug[0])
def main(argv): # Get command line stuff and store in a dictionary s = """verbose sumfile= indir= input_globs= tslice= nPC= sigMul= minPCFail= minRunFail= numRunFile= printVars popens jsonfile= mpi_enable nbin= minrange= maxrange= outfile= casejson= npick= pepsi_gm pop_tol= web_enabled pop_threshold= printStdMean fIndex= lev= eet= saveResults json_case= """ optkeys = s.split() try: opts, args = getopt.getopt(argv, "h", optkeys) except getopt.GetoptError: pyEnsLib.CECT_usage() sys.exit(2) # Set the default value for options opts_dict = {} opts_dict['input_globs'] = '' opts_dict['indir'] = '' opts_dict['tslice'] = 1 opts_dict['nPC'] = 50 opts_dict['sigMul'] = 2 opts_dict['verbose'] = False opts_dict['minPCFail'] = 3 opts_dict['minRunFail'] = 2 opts_dict['numRunFile'] = 3 opts_dict['printVars'] = False opts_dict['popens'] = False opts_dict['jsonfile'] = '' opts_dict['mpi_enable'] = False opts_dict['nbin'] = 40 opts_dict['minrange'] = 0.0 opts_dict['maxrange'] = 4.0 opts_dict['outfile'] = 'testcase.result' opts_dict['casejson'] = '' opts_dict['npick'] = 10 opts_dict['pepsi_gm'] = False opts_dict['test_failure'] = True opts_dict['pop_tol'] = 3.0 opts_dict['pop_threshold'] = 0.90 opts_dict['printStdMean'] = False opts_dict['lev'] = 0 opts_dict['eet'] = 0 opts_dict['json_case'] = '' opts_dict['sumfile'] = '' opts_dict['web_enabled'] = False opts_dict['saveResults'] = False # Call utility library getopt_parseconfig to parse the option keys # and save to the dictionary caller = 'CECT' opts_dict = pyEnsLib.getopt_parseconfig(opts, optkeys, caller, opts_dict) popens = opts_dict['popens'] #some mods for POP-ECT if popens == True: opts_dict['tslice'] = 0 opts_dict['numRunFile'] = 1 opts_dict['eet'] = 0 opts_dict['mpi_enable'] = False # Create a mpi simplecomm object if opts_dict['mpi_enable']: me = simplecomm.create_comm() else: me = simplecomm.create_comm(not opts_dict['mpi_enable']) # Print out timestamp, input ensemble file and new run directory dt = datetime.now() verbose = opts_dict['verbose'] if me.get_rank() == 0: print(' ') print('--------pyCECT--------') print(' ') print(dt.strftime("%A, %d. %B %Y %I:%M%p")) print(' ') if not opts_dict['web_enabled']: print('Ensemble summary file = ' + opts_dict['sumfile']) print(' ') print('Testcase file directory = ' + opts_dict['indir']) print(' ') print(' ') #make sure these are valid if opts_dict['web_enabled'] == False and os.path.isfile( opts_dict['sumfile']) == False: print("ERROR: Summary file name is not valid.") sys.exit() if os.path.exists(opts_dict['indir']) == False: print("ERROR: --indir path is not valid.") sys.exit() # Ensure sensible EET value if opts_dict['eet'] and opts_dict['numRunFile'] > opts_dict['eet']: pyEnsLib.CECT_usage() sys.exit(2) ifiles = [] in_files = [] # Random pick pop files from not_pick_files list if opts_dict['casejson']: with open(opts_dict['casejson']) as fin: result = json.load(fin) in_files_first = result['not_pick_files'] in_files = random.sample(in_files_first, opts_dict['npick']) print('Testcase files:') print('\n'.join(in_files)) elif opts_dict['json_case']: json_file = opts_dict['json_case'] if (os.path.exists(json_file)): fd = open(json_file) metainfo = json.load(fd) if 'CaseName' in metainfo: casename = metainfo['CaseName'] if (os.path.exists(opts_dict['indir'])): for name in casename: wildname = '*.' + name + '.*' full_glob_str = os.path.join(opts_dict['indir'], wildname) glob_file = glob.glob(full_glob_str) in_files.extend(glob_file) else: print("ERROR: " + opts_dict['json_case'] + " does not exist.") sys.exit() print("in_files=", in_files) else: wildname = '*' + str(opts_dict['input_globs']) + '*' # Open all input files if (os.path.exists(opts_dict['indir'])): full_glob_str = os.path.join(opts_dict['indir'], wildname) glob_files = glob.glob(full_glob_str) in_files.extend(glob_files) num_file = len(in_files) if num_file == 0: print("ERROR: no matching files for wildcard=" + wildname + " found in specified --indir") sys.exit() else: print("Found " + str(num_file) + " matching files in specified --indir") if opts_dict['numRunFile'] > num_file: print("ERROR: more files needed (" + str(opts_dict['numRunFile']) + ") than available in the indir (" + str(num_file) + ").") sys.exit() in_files.sort() #print in_files if popens: #Partition the input file list in_files_list = me.partition(in_files, func=EqualStride(), involved=True) else: # Random pick cam files in_files_list = pyEnsLib.Random_pickup(in_files, opts_dict) for frun_file in in_files_list: if frun_file.find(opts_dict['indir']) != -1: frun_temp = frun_file else: frun_temp = opts_dict['indir'] + '/' + frun_file if (os.path.isfile(frun_temp)): ifiles.append(frun_temp) else: print("ERROR: COULD NOT LOCATE FILE " + frun_temp) sys.exit() if opts_dict['web_enabled']: if len(opts_dict['sumfile']) == 0: opts_dict[ 'sumfile'] = '/glade/p/cesmdata/cseg/inputdata/validation/' #need to open ifiles opts_dict['sumfile'], machineid, compiler = pyEnsLib.search_sumfile( opts_dict, ifiles) if len(machineid) != 0 and len(compiler) != 0: print(' ') print('Validation file : machineid = ' + machineid + ', compiler = ' + compiler) print('Found summary file : ' + opts_dict['sumfile']) print(' ') else: print('Warning: machine and compiler are unknown') if popens: # Read in the included var list if not os.path.exists(opts_dict['jsonfile']): print( "ERROR: POP-ECT requires the specification of a valid json file via --jsonfile." ) sys.exit() Var2d, Var3d = pyEnsLib.read_jsonlist(opts_dict['jsonfile'], 'ESP') print(' ') print('Z-score tolerance = ' + '{:3.2f}'.format(opts_dict['pop_tol'])) print('ZPR = ' + '{:.2%}'.format(opts_dict['pop_threshold'])) zmall, n_timeslice = pyEnsLib.pop_compare_raw_score( opts_dict, ifiles, me.get_rank(), Var3d, Var2d) np.set_printoptions(threshold=sys.maxsize) if opts_dict['mpi_enable']: zmall = pyEnsLib.gather_npArray_pop( zmall, me, (me.get_size(), len(Var3d) + len(Var2d), len(ifiles), opts_dict['nbin'])) if me.get_rank() == 0: fout = open(opts_dict['outfile'], "w") for i in range(me.get_size()): for j in zmall[i]: np.savetxt(fout, j, fmt='%-7.2e') #cam else: # Read all variables from the ensemble summary file ens_var_name, ens_avg, ens_stddev, ens_rmsz, ens_gm, num_3d, mu_gm, sigma_gm, loadings_gm, sigma_scores_gm, is_SE_sum, std_gm, std_gm_array, str_size = pyEnsLib.read_ensemble_summary( opts_dict['sumfile']) #Only doing gm # Add ensemble rmsz and global mean to the dictionary "variables" variables = {} for k, v in ens_gm.items(): pyEnsLib.addvariables(variables, k, 'gmRange', v) # Get 3d variable name list and 2d variable name list separately var_name3d = [] var_name2d = [] for vcount, v in enumerate(ens_var_name): if vcount < num_3d: var_name3d.append(v) else: var_name2d.append(v) # Get ncol and nlev value npts3d, npts2d, is_SE = pyEnsLib.get_ncol_nlev(ifiles[0]) if (is_SE ^ is_SE_sum): print( 'Warning: please note the ensemble summary file is different from the testing files: they use different grids' ) # Compare the new run and the ensemble summary file results = {} countgm = np.zeros(len(ifiles), dtype=np.int32) # Calculate the new run global mean mean3d, mean2d, varlist = pyEnsLib.generate_global_mean_for_summary( ifiles, var_name3d, var_name2d, is_SE, opts_dict['pepsi_gm'], opts_dict) means = np.concatenate((mean3d, mean2d), axis=0) # Add the new run global mean to the dictionary "results" for i in range(means.shape[1]): for j in range(means.shape[0]): pyEnsLib.addresults(results, 'means', means[j][i], ens_var_name[j], 'f' + str(i)) # Evaluate the new run global mean if it is in the range of the ensemble summary global mean range for fcount, fid in enumerate(ifiles): countgm[fcount] = pyEnsLib.evaluatestatus('means', 'gmRange', variables, 'gm', results, 'f' + str(fcount)) # Calculate the PCA scores of the new run new_scores, var_list, comp_std_gm = pyEnsLib.standardized( means, mu_gm, sigma_gm, loadings_gm, ens_var_name, opts_dict, ens_avg, me) run_index, decision = pyEnsLib.comparePCAscores( ifiles, new_scores, sigma_scores_gm, opts_dict, me) # If there is failure, plot out standardized mean and compared standardized mean in box plots # if opts_dict['printStdMean'] and decision == 'FAILED': if opts_dict['printStdMean']: import seaborn as sns import matplotlib matplotlib.use('Agg') #don't display figures import matplotlib.pyplot as plt print(" ") print( '***************************************************************************** ' ) print( 'Test run variable standardized means (for reference only - not used to determine pass/fail)' ) print( '***************************************************************************** ' ) print(" ") category = { "all_outside99": [], "two_outside99": [], "one_outside99": [], "all_oneside_outside1QR": [] } b = list(pyEnsLib.chunk(ens_var_name, 10)) for f, alist in enumerate(b): for fc, avar in enumerate(alist): dist_995 = np.percentile(std_gm[avar], 99.5) dist_75 = np.percentile(std_gm[avar], 75) dist_25 = np.percentile(std_gm[avar], 25) dist_05 = np.percentile(std_gm[avar], 0.5) c = 0 d = 0 p = 0 q = 0 for i in range(comp_std_gm[f + fc].size): if comp_std_gm[f + fc][i] > dist_995: c = c + 1 elif comp_std_gm[f + fc][i] < dist_05: d = d + 1 elif (comp_std_gm[f + fc][i] < dist_995 and comp_std_gm[f + fc][i] > dist_75): p = p + 1 elif (comp_std_gm[f + fc][i] > dist_05 and comp_std_gm[f + fc][i] < dist_25): q = q + 1 if c == 3 or d == 3: category["all_outside99"].append((avar, f + fc)) elif c == 2 or d == 2: category["two_outside99"].append((avar, f + fc)) elif c == 1 or d == 1: category["one_outside99"].append((avar, f + fc)) if p == 3 or q == 3: category["all_oneside_outside1QR"].append( (avar, f + fc)) part_name = opts_dict['indir'].split('/')[-1] if not part_name: part_name = opts_dict['indir'].split('/')[-2] for key in sorted(category): list_array = [] list_array2 = [] list_var = [] value = category[key] if key == "all_outside99": print( "*** ", len(value), " variables have 3 test run global means outside of the 99th percentile." ) elif key == "two_outside99": print( "*** ", len(value), " variables have 2 test run global means outside of the 99th percentile." ) elif key == "one_outside99": print( "*** ", len(value), " variables have 1 test run global mean outside of the 99th percentile." ) elif key == "all_oneside_outside1QR": print( "*** ", len(value), " variables have all test run global means outside of the first quartile (but not outside the 99th percentile)." ) if len(value) > 0: print(" => generating plot ...") if len(value) > 20: print( " NOTE: truncating to only plot the first 20 variables." ) value = value[0:20] for each_var in value: list_array.append(std_gm[each_var[0]]) list_array2.append(comp_std_gm[each_var[1]]) name = each_var[0] if isinstance(name, str) == False: name = name.decode("utf-8") list_var.append(name) if len(value) != 0: ax = sns.boxplot(data=list_array, whis=[0.5, 99.5], fliersize=0.0) sns.stripplot(data=list_array2, jitter=True, color="r") plt.xticks(list(range(len(list_array))), list_var, fontsize=8, rotation=-45) if decision == 'FAILED': plt.savefig(part_name + "_" + key + "_fail.png") else: plt.savefig(part_name + "_" + key + "_pass.png") plt.close() ## # Print file with info about new test runs....to a netcdf file ## if opts_dict['saveResults']: num_vars = comp_std_gm.shape[0] tsize = comp_std_gm.shape[1] esize = std_gm_array.shape[1] this_savefile = 'savefile.nc' if (verbose == True): print("VERBOSE: Creating ", this_savefile, " ...") if os.path.exists(this_savefile): os.unlink(this_savefile) nc_savefile = nc.Dataset(this_savefile, "w", format="NETCDF4_CLASSIC") nc_savefile.createDimension('ens_size', esize) nc_savefile.createDimension('test_size', tsize) nc_savefile.createDimension('nvars', num_vars) nc_savefile.createDimension('str_size', str_size) # Set global attributes now = time.strftime("%c") nc_savefile.creation_date = now nc_savefile.title = 'PyCECT compare results file' nc_savefile.summaryfile = opts_dict['sumfile'] #nc_savefile.testfiles = in_files #variables v_vars = nc_savefile.createVariable("vars", 'S1', ('nvars', 'str_size')) v_std_gm = nc_savefile.createVariable("std_gm", 'f8', ('nvars', 'test_size')) v_scores = nc_savefile.createVariable("scores", 'f8', ('nvars', 'test_size')) v_ens_sigma_scores = nc_savefile.createVariable( 'ens_sigma_scores', 'f8', ('nvars', )) v_ens_std_gm = nc_savefile.createVariable("ens_std_gm", 'f8', ('nvars', 'ens_size')) #hard-coded size str_out = nc.stringtochar(np.array(ens_var_name, 'S10')) v_vars[:] = str_out v_std_gm[:, :] = comp_std_gm[:, :] v_scores[:, :] = new_scores[:, :] v_ens_sigma_scores[:] = sigma_scores_gm[:] v_ens_std_gm[:, :] = std_gm_array[:, :] nc_savefile.close() # Print variables (optional) if opts_dict['printVars']: print(" ") print( '***************************************************************************** ' ) print( 'Variable global mean information (for reference only - not used to determine pass/fail)' ) print( '***************************************************************************** ' ) for fcount, fid in enumerate(ifiles): print(' ') print('Run ' + str(fcount + 1) + ":") print(' ') print( '***' + str(countgm[fcount]), " of " + str(len(ens_var_name)) + ' variables are outside of ensemble global mean distribution***' ) pyEnsLib.printsummary(results, 'gm', 'means', 'gmRange', fcount, variables, 'global mean') print(' ') print( '----------------------------------------------------------------------------' ) if me.get_rank() == 0: print(' ') print("Testing complete.") print(' ')
oldfile = os.path.join(olddir, filename) if oldfile in oldfiles: item_dict['old'] = oldfile oldfiles.remove(oldfile) items_to_check.append(item_dict) else: item_dict['old'] = None unchecked_new_items.append(item_dict) for oldfile in oldfiles: item_dict = {'test': test_name} item_dict['new'] = None item_dict['old'] = oldfile unchecked_old_items.append(item_dict) # Get a basic MPI comm comm = create_comm(serial=(opts.serial or opts.list_tests)) # Print tests that will be checked if comm.is_manager(): print 'Checking test results.' for test_name in tests_to_check: print 'Test {0!s}:'.format(test_name) num_chk = sum(1 for i in items_to_check if i['test'] == test_name) num_new = num_chk + sum(1 for i in unchecked_new_items if i['test'] == test_name) num_old = num_chk + sum(1 for i in unchecked_old_items if i['test'] == test_name) print ' Checking {0!s} of {1!s}'.format(num_chk, num_new), print 'new files generated against {0!s}'.format(num_old), print 'old files found.'
def main(argv): # Get command line stuff and store in a dictionary s = 'tag= compset= esize= tslice= res= sumfile= indir= sumfiledir= mach= verbose jsonfile= mpi_enable maxnorm gmonly popens cumul regx= startMon= endMon= fIndex=' optkeys = s.split() try: opts, args = getopt.getopt(argv, "h", optkeys) except getopt.GetoptError: pyEnsLib.EnsSum_usage() sys.exit(2) # Put command line options in a dictionary - also set defaults opts_dict={} # Defaults opts_dict['tag'] = 'cesm2_0_beta08' opts_dict['compset'] = 'F2000' opts_dict['mach'] = 'cheyenne' opts_dict['esize'] = 350 opts_dict['tslice'] = 1 opts_dict['res'] = 'f19_f19' opts_dict['sumfile'] = 'ens.summary.nc' opts_dict['indir'] = './' opts_dict['sumfiledir'] = './' opts_dict['jsonfile'] = 'exclude_empty.json' opts_dict['verbose'] = False opts_dict['mpi_enable'] = False opts_dict['maxnorm'] = False opts_dict['gmonly'] = True opts_dict['popens'] = False opts_dict['cumul'] = False opts_dict['regx'] = 'test' opts_dict['startMon'] = 1 opts_dict['endMon'] = 1 opts_dict['fIndex'] = 151 # This creates the dictionary of input arguments opts_dict = pyEnsLib.getopt_parseconfig(opts,optkeys,'ES',opts_dict) verbose = opts_dict['verbose'] st = opts_dict['esize'] esize = int(st) if not (opts_dict['tag'] and opts_dict['compset'] and opts_dict['mach'] or opts_dict['res']): print 'Please specify --tag, --compset, --mach and --res options' sys.exit() # Now find file names in indir input_dir = opts_dict['indir'] # The var list that will be excluded ex_varlist=[] inc_varlist=[] # Create a mpi simplecomm object if opts_dict['mpi_enable']: me=simplecomm.create_comm() else: me=simplecomm.create_comm(not opts_dict['mpi_enable']) if me.get_rank() == 0: print 'Running pyEnsSum!' if me.get_rank() ==0 and (verbose == True): print opts_dict print 'Ensemble size for summary = ', esize exclude=False if me.get_rank() == 0: if opts_dict['jsonfile']: inc_varlist=[] # Read in the excluded or included var list ex_varlist,exclude=pyEnsLib.read_jsonlist(opts_dict['jsonfile'],'ES') if exclude == False: inc_varlist=ex_varlist ex_varlist=[] # Read in the included var list #inc_varlist=pyEnsLib.read_jsonlist(opts_dict['jsonfile'],'ES') # Broadcast the excluded var list to each processor #if opts_dict['mpi_enable']: # ex_varlist=me.partition(ex_varlist,func=Duplicate(),involved=True) # Broadcast the excluded var list to each processor if opts_dict['mpi_enable']: exclude=me.partition(exclude,func=Duplicate(),involved=True) if exclude: ex_varlist=me.partition(ex_varlist,func=Duplicate(),involved=True) else: inc_varlist=me.partition(inc_varlist,func=Duplicate(),involved=True) in_files=[] if(os.path.exists(input_dir)): # Get the list of files in_files_temp = os.listdir(input_dir) in_files=sorted(in_files_temp) # Make sure we have enough num_files = len(in_files) if me.get_rank()==0 and (verbose == True): print 'Number of files in input directory = ', num_files if (num_files < esize): if me.get_rank()==0 and (verbose == True): print 'Number of files in input directory (',num_files,\ ') is less than specified ensemble size of ', esize sys.exit(2) if (num_files > esize): if me.get_rank()==0 and (verbose == True): print 'NOTE: Number of files in ', input_dir, \ 'is greater than specified ensemble size of ', esize ,\ '\nwill just use the first ', esize, 'files' else: if me.get_rank()==0: print 'Input directory: ',input_dir,' not found' sys.exit(2) if opts_dict['cumul']: if opts_dict['regx']: in_files_list=get_cumul_filelist(opts_dict,opts_dict['indir'],opts_dict['regx']) in_files=me.partition(in_files_list,func=EqualLength(),involved=True) if me.get_rank()==0 and (verbose == True): print 'in_files=',in_files # Open the files in the input directory o_files=[] if me.get_rank() == 0 and opts_dict['verbose']: print 'Input files are: ' print "\n".join(in_files) #for i in in_files: # print "in_files =",i for onefile in in_files[0:esize]: if (os.path.isfile(input_dir+'/' + onefile)): o_files.append(Nio.open_file(input_dir+'/' + onefile,"r")) else: if me.get_rank()==0: print "COULD NOT LOCATE FILE "+ input_dir + onefile + "! EXITING...." sys.exit() # Store dimensions of the input fields if me.get_rank()==0 and (verbose == True): print "Getting spatial dimensions" nlev = -1 nilev = -1 ncol = -1 nlat = -1 nlon = -1 lonkey='' latkey='' # Look at first file and get dims input_dims = o_files[0].dimensions ndims = len(input_dims) for key in input_dims: if key == "lev": nlev = input_dims["lev"] elif key == "ilev": nilev = input_dims["ilev"] elif key == "ncol": ncol = input_dims["ncol"] elif (key == "nlon") or (key =="lon"): nlon = input_dims[key] lonkey=key elif (key == "nlat") or (key == "lat"): nlat = input_dims[key] latkey=key if (nlev == -1) : if me.get_rank()==0: print "COULD NOT LOCATE valid dimension lev => EXITING...." sys.exit() if (( ncol == -1) and ((nlat == -1) or (nlon == -1))): if me.get_rank()==0: print "Need either lat/lon or ncol => EXITING...." sys.exit() # Check if this is SE or FV data if (ncol != -1): is_SE = True else: is_SE = False # Make sure all files have the same dimensions if me.get_rank()==0 and (verbose == True): print "Checking dimensions across files...." print 'lev = ', nlev if (is_SE == True): print 'ncol = ', ncol else: print 'nlat = ', nlat print 'nlon = ', nlon for count, this_file in enumerate(o_files): input_dims = this_file.dimensions if (is_SE == True): if ( nlev != int(input_dims["lev"]) or ( ncol != int(input_dims["ncol"]))): if me.get_rank() == 0: print "Dimension mismatch between ", in_files[0], 'and', in_files[0], '!!!' sys.exit() else: if ( nlev != int(input_dims["lev"]) or ( nlat != int(input_dims[latkey]))\ or ( nlon != int(input_dims[lonkey]))): if me.get_rank() == 0: print "Dimension mismatch between ", in_files[0], 'and', in_files[0], '!!!' sys.exit() # Get 2d vars, 3d vars and all vars (For now include all variables) vars_dict_all = o_files[0].variables # Remove the excluded variables (specified in json file) from variable dictionary #print len(vars_dict_all) if exclude: vars_dict=vars_dict_all for i in ex_varlist: if i in vars_dict: del vars_dict[i] #Given an included var list, remove all float var that are not on the list else: vars_dict=vars_dict_all.copy() for k,v in vars_dict_all.iteritems(): if (k not in inc_varlist) and (vars_dict_all[k].typecode()=='f'): #print vars_dict_all[k].typecode() #print k del vars_dict[k] num_vars = len(vars_dict) #print num_vars #if me.get_rank() == 0: # for k,v in vars_dict.iteritems(): # print 'vars_dict',k,vars_dict[k].typecode() str_size = 0 d2_var_names = [] d3_var_names = [] num_2d = 0 num_3d = 0 # Which are 2d, which are 3d and max str_size for k,v in vars_dict.iteritems(): var = k vd = v.dimensions # all the variable's dimensions (names) vr = v.rank # num dimension vs = v.shape # dim values is_2d = False is_3d = False if (is_SE == True): # (time, lev, ncol) or (time, ncol) if ((vr == 2) and (vs[1] == ncol)): is_2d = True num_2d += 1 elif ((vr == 3) and (vs[2] == ncol and vs[1] == nlev )): is_3d = True num_3d += 1 else: # (time, lev, nlon, nlon) or (time, nlat, nlon) if ((vr == 3) and (vs[1] == nlat and vs[2] == nlon)): is_2d = True num_2d += 1 elif ((vr == 4) and (vs[2] == nlat and vs[3] == nlon and (vs[1] == nlev or vs[1]==nilev ))): is_3d = True num_3d += 1 if (is_3d == True) : str_size = max(str_size, len(k)) d3_var_names.append(k) elif (is_2d == True): str_size = max(str_size, len(k)) d2_var_names.append(k) #else: # print 'var=',k if me.get_rank() == 0 and (verbose == True): print 'Number of variables found: ', num_3d+num_2d print '3D variables: '+str(num_3d)+', 2D variables: '+str(num_2d) # Now sort these and combine (this sorts caps first, then lower case - # which is what we want) d2_var_names.sort() d3_var_names.sort() if esize<num_2d+num_3d: if me.get_rank()==0: print "************************************************************************************************************************************" print " Error: the total number of 3D and 2D variables "+str(num_2d+num_3d)+" is larger than the number of ensemble files "+str(esize) print " Cannot generate ensemble summary file, please remove more variables from your included variable list," print " or add more varaibles in your excluded variable list!!!" print "************************************************************************************************************************************" sys.exit() # All vars is 3d vars first (sorted), the 2d vars all_var_names = list(d3_var_names) all_var_names += d2_var_names n_all_var_names = len(all_var_names) #if me.get_rank() == 0 and (verbose == True): # print 'num vars = ', n_all_var_names, '(3d = ', num_3d, ' and 2d = ', num_2d, ")" # Create new summary ensemble file this_sumfile = opts_dict["sumfile"] if me.get_rank() == 0 and (verbose == True): print "Creating ", this_sumfile, " ..." if(me.get_rank() ==0 | opts_dict["popens"]): if os.path.exists(this_sumfile): os.unlink(this_sumfile) opt = Nio.options() opt.PreFill = False opt.Format = 'NetCDF4Classic' nc_sumfile = Nio.open_file(this_sumfile, 'w', options=opt) # Set dimensions if me.get_rank() == 0 and (verbose == True): print "Setting dimensions ....." if (is_SE == True): nc_sumfile.create_dimension('ncol', ncol) else: nc_sumfile.create_dimension('nlat', nlat) nc_sumfile.create_dimension('nlon', nlon) nc_sumfile.create_dimension('nlev', nlev) nc_sumfile.create_dimension('ens_size', esize) nc_sumfile.create_dimension('nvars', num_3d + num_2d) nc_sumfile.create_dimension('nvars3d', num_3d) nc_sumfile.create_dimension('nvars2d', num_2d) nc_sumfile.create_dimension('str_size', str_size) # Set global attributes now = time.strftime("%c") if me.get_rank() == 0 and (verbose == True): print "Setting global attributes ....." setattr(nc_sumfile, 'creation_date',now) setattr(nc_sumfile, 'title', 'CAM verification ensemble summary file') setattr(nc_sumfile, 'tag', opts_dict["tag"]) setattr(nc_sumfile, 'compset', opts_dict["compset"]) setattr(nc_sumfile, 'resolution', opts_dict["res"]) setattr(nc_sumfile, 'machine', opts_dict["mach"]) # Create variables if me.get_rank() == 0 and (verbose == True): print "Creating variables ....." v_lev = nc_sumfile.create_variable("lev", 'f', ('nlev',)) v_vars = nc_sumfile.create_variable("vars", 'S1', ('nvars', 'str_size')) v_var3d = nc_sumfile.create_variable("var3d", 'S1', ('nvars3d', 'str_size')) v_var2d = nc_sumfile.create_variable("var2d", 'S1', ('nvars2d', 'str_size')) if not opts_dict['gmonly']: if (is_SE == True): v_ens_avg3d = nc_sumfile.create_variable("ens_avg3d", 'f', ('nvars3d', 'nlev', 'ncol')) v_ens_stddev3d = nc_sumfile.create_variable("ens_stddev3d", 'f', ('nvars3d', 'nlev', 'ncol')) v_ens_avg2d = nc_sumfile.create_variable("ens_avg2d", 'f', ('nvars2d', 'ncol')) v_ens_stddev2d = nc_sumfile.create_variable("ens_stddev2d", 'f', ('nvars2d', 'ncol')) else: v_ens_avg3d = nc_sumfile.create_variable("ens_avg3d", 'f', ('nvars3d', 'nlev', 'nlat', 'nlon')) v_ens_stddev3d = nc_sumfile.create_variable("ens_stddev3d", 'f', ('nvars3d', 'nlev', 'nlat', 'nlon')) v_ens_avg2d = nc_sumfile.create_variable("ens_avg2d", 'f', ('nvars2d', 'nlat', 'nlon')) v_ens_stddev2d = nc_sumfile.create_variable("ens_stddev2d", 'f', ('nvars2d', 'nlat', 'nlon')) v_RMSZ = nc_sumfile.create_variable("RMSZ", 'f', ('nvars', 'ens_size')) v_gm = nc_sumfile.create_variable("global_mean", 'f', ('nvars', 'ens_size')) v_standardized_gm=nc_sumfile.create_variable("standardized_gm",'f',('nvars','ens_size')) v_loadings_gm = nc_sumfile.create_variable('loadings_gm','f',('nvars','nvars')) v_mu_gm = nc_sumfile.create_variable('mu_gm','f',('nvars',)) v_sigma_gm = nc_sumfile.create_variable('sigma_gm','f',('nvars',)) v_sigma_scores_gm = nc_sumfile.create_variable('sigma_scores_gm','f',('nvars',)) # Assign vars, var3d and var2d if me.get_rank() == 0 and (verbose == True): print "Assigning vars, var3d, and var2d ....." eq_all_var_names =[] eq_d3_var_names = [] eq_d2_var_names = [] l_eq = len(all_var_names) for i in range(l_eq): tt = list(all_var_names[i]) l_tt = len(tt) if (l_tt < str_size): extra = list(' ')*(str_size - l_tt) tt.extend(extra) eq_all_var_names.append(tt) l_eq = len(d3_var_names) for i in range(l_eq): tt = list(d3_var_names[i]) l_tt = len(tt) if (l_tt < str_size): extra = list(' ')*(str_size - l_tt) tt.extend(extra) eq_d3_var_names.append(tt) l_eq = len(d2_var_names) for i in range(l_eq): tt = list(d2_var_names[i]) l_tt = len(tt) if (l_tt < str_size): extra = list(' ')*(str_size - l_tt) tt.extend(extra) eq_d2_var_names.append(tt) v_vars[:] = eq_all_var_names[:] v_var3d[:] = eq_d3_var_names[:] v_var2d[:] = eq_d2_var_names[:] # Time-invarient metadata if me.get_rank() == 0 and (verbose == True): print "Assigning time invariant metadata ....." lev_data = vars_dict["lev"] v_lev = lev_data # Form ensembles, each missing one member; compute RMSZs and global means #for each variable, we also do max norm also (currently done in pyStats) tslice = opts_dict['tslice'] if not opts_dict['cumul']: # Partition the var list var3_list_loc=me.partition(d3_var_names,func=EqualStride(),involved=True) var2_list_loc=me.partition(d2_var_names,func=EqualStride(),involved=True) else: var3_list_loc=d3_var_names var2_list_loc=d2_var_names # Calculate global means # if me.get_rank() == 0 and (verbose == True): print "Calculating global means ....." if not opts_dict['cumul']: gm3d,gm2d,var_list = pyEnsLib.generate_global_mean_for_summary(o_files,var3_list_loc,var2_list_loc , is_SE, False,opts_dict) if me.get_rank() == 0 and (verbose == True): print "Finish calculating global means ....." # Calculate RMSZ scores if (not opts_dict['gmonly']) | (opts_dict['cumul']): if me.get_rank() == 0 and (verbose == True): print "Calculating RMSZ scores ....." zscore3d,zscore2d,ens_avg3d,ens_stddev3d,ens_avg2d,ens_stddev2d,temp1,temp2=pyEnsLib.calc_rmsz(o_files,var3_list_loc,var2_list_loc,is_SE,opts_dict) # Calculate max norm ensemble if opts_dict['maxnorm']: if me.get_rank() == 0 and (verbose == True): print "Calculating max norm of ensembles ....." pyEnsLib.calculate_maxnormens(opts_dict,var3_list_loc) pyEnsLib.calculate_maxnormens(opts_dict,var2_list_loc) if opts_dict['mpi_enable'] & ( not opts_dict['popens']): if not opts_dict['cumul']: # Gather the 3d variable results from all processors to the master processor slice_index=get_stride_list(len(d3_var_names),me) # Gather global means 3d results gm3d=gather_npArray(gm3d,me,slice_index,(len(d3_var_names),len(o_files))) if not opts_dict['gmonly']: # Gather zscore3d results zscore3d=gather_npArray(zscore3d,me,slice_index,(len(d3_var_names),len(o_files))) # Gather ens_avg3d and ens_stddev3d results shape_tuple3d=get_shape(ens_avg3d.shape,len(d3_var_names),me.get_rank()) ens_avg3d=gather_npArray(ens_avg3d,me,slice_index,shape_tuple3d) ens_stddev3d=gather_npArray(ens_stddev3d,me,slice_index,shape_tuple3d) # Gather 2d variable results from all processors to the master processor slice_index=get_stride_list(len(d2_var_names),me) # Gather global means 2d results gm2d=gather_npArray(gm2d,me,slice_index,(len(d2_var_names),len(o_files))) var_list=gather_list(var_list,me) if not opts_dict['gmonly']: # Gather zscore2d results zscore2d=gather_npArray(zscore2d,me,slice_index,(len(d2_var_names),len(o_files))) # Gather ens_avg3d and ens_stddev2d results shape_tuple2d=get_shape(ens_avg2d.shape,len(d2_var_names),me.get_rank()) ens_avg2d=gather_npArray(ens_avg2d,me,slice_index,shape_tuple2d) ens_stddev2d=gather_npArray(ens_stddev2d,me,slice_index,shape_tuple2d) else: gmall=np.concatenate((temp1,temp2),axis=0) gmall=pyEnsLib.gather_npArray_pop(gmall,me,(me.get_size(),len(d3_var_names)+len(d2_var_names))) # Assign to file: if me.get_rank() == 0 | opts_dict['popens'] : if not opts_dict['cumul']: gmall=np.concatenate((gm3d,gm2d),axis=0) if not opts_dict['gmonly']: Zscoreall=np.concatenate((zscore3d,zscore2d),axis=0) v_RMSZ[:,:]=Zscoreall[:,:] if not opts_dict['gmonly']: if (is_SE == True): v_ens_avg3d[:,:,:]=ens_avg3d[:,:,:] v_ens_stddev3d[:,:,:]=ens_stddev3d[:,:,:] v_ens_avg2d[:,:]=ens_avg2d[:,:] v_ens_stddev2d[:,:]=ens_stddev2d[:,:] else: v_ens_avg3d[:,:,:,:]=ens_avg3d[:,:,:,:] v_ens_stddev3d[:,:,:,:]=ens_stddev3d[:,:,:,:] v_ens_avg2d[:,:,:]=ens_avg2d[:,:,:] v_ens_stddev2d[:,:,:]=ens_stddev2d[:,:,:] else: gmall_temp=np.transpose(gmall[:,:]) gmall=gmall_temp mu_gm,sigma_gm,standardized_global_mean,loadings_gm,scores_gm=pyEnsLib.pre_PCA(gmall,all_var_names,var_list,me) v_gm[:,:]=gmall[:,:] v_standardized_gm[:,:]=standardized_global_mean[:,:] v_mu_gm[:]=mu_gm[:] v_sigma_gm[:]=sigma_gm[:].astype(np.float32) v_loadings_gm[:,:]=loadings_gm[:,:] v_sigma_scores_gm[:]=scores_gm[:] if me.get_rank() == 0: print "All Done"
def main(argv): # Get command line stuff and store in a dictionary s = """verbose sumfile= indir= input_globs= tslice= nPC= sigMul= minPCFail= minRunFail= numRunFile= printVarTest popens jsonfile= mpi_enable nbin= minrange= maxrange= outfile= casejson= npick= pepsi_gm pop_tol= web_enabled pop_threshold= prn_std_mean fIndex= lev= eet= json_case= """ optkeys = s.split() try: opts, args = getopt.getopt(argv, "h", optkeys) except getopt.GetoptError: pyEnsLib.CECT_usage() sys.exit(2) # Set the default value for options opts_dict = {} opts_dict['input_globs'] = '' opts_dict['indir'] = '' opts_dict['tslice'] = 1 opts_dict['nPC'] = 50 opts_dict['sigMul'] = 2 opts_dict['verbose'] = False opts_dict['minPCFail'] = 3 opts_dict['minRunFail'] = 2 opts_dict['numRunFile'] = 3 opts_dict['printVarTest'] = False opts_dict['popens'] = False opts_dict['jsonfile'] = '' opts_dict['mpi_enable'] = False opts_dict['nbin'] = 40 opts_dict['minrange'] = 0.0 opts_dict['maxrange'] = 4.0 opts_dict['outfile'] = 'testcase.result' opts_dict['casejson'] = '' opts_dict['npick'] = 10 opts_dict['pepsi_gm'] = False opts_dict['test_failure'] = True opts_dict['pop_tol'] = 3.0 opts_dict['pop_threshold'] = 0.90 opts_dict['prn_std_mean'] = False opts_dict['lev'] = 0 opts_dict['eet'] = 0 opts_dict['json_case'] = '' opts_dict['sumfile'] = '' opts_dict['web_enabled'] = False # Call utility library getopt_parseconfig to parse the option keys # and save to the dictionary caller = 'CECT' gmonly = False opts_dict = pyEnsLib.getopt_parseconfig(opts, optkeys, caller, opts_dict) popens = opts_dict['popens'] #some mods for POP-ECT if popens == True: opts_dict['tslice'] = 0 opts_dict['numRunFile'] = 1 opts_dict['eet'] = 0 opts_dict['mpi_enable'] = False #print opts_dict # Create a mpi simplecomm object if opts_dict['mpi_enable']: me = simplecomm.create_comm() else: me = simplecomm.create_comm(not opts_dict['mpi_enable']) # Print out timestamp, input ensemble file and new run directory dt = datetime.now() verbose = opts_dict['verbose'] if me.get_rank() == 0: print '--------pyCECT--------' print ' ' print dt.strftime("%A, %d. %B %Y %I:%M%p") print ' ' if not opts_dict['web_enabled']: print 'Ensemble summary file = ' + opts_dict['sumfile'] print ' ' print 'Testcase file directory = ' + opts_dict['indir'] print ' ' print ' ' # Ensure sensible EET value if opts_dict['eet'] and opts_dict['numRunFile'] > opts_dict['eet']: pyEnsLib.CECT_usage() sys.exit(2) ifiles = [] in_files = [] # Random pick pop files from not_pick_files list if opts_dict['casejson']: with open(opts_dict['casejson']) as fin: result = json.load(fin) in_files_first = result['not_pick_files'] in_files = random.sample(in_files_first, opts_dict['npick']) print 'Testcase files:' print '\n'.join(in_files) elif opts_dict['json_case']: json_file = opts_dict['json_case'] if (os.path.exists(json_file)): fd = open(json_file) metainfo = json.load(fd) if 'CaseName' in metainfo: casename = metainfo['CaseName'] if (os.path.exists(opts_dict['indir'])): for name in casename: wildname = '*.' + name + '.*' full_glob_str = os.path.join(opts_dict['indir'], wildname) glob_file = glob.glob(full_glob_str) in_files.extend(glob_file) else: print "ERROR: " + opts_dict['json_case'] + " does not exist." sys.exit() print "in_files=", in_files else: wildname = '*' + str(opts_dict['input_globs']) + '*' # Open all input files if (os.path.exists(opts_dict['indir'])): full_glob_str = os.path.join(opts_dict['indir'], wildname) glob_files = glob.glob(full_glob_str) in_files.extend(glob_files) num_file = len(in_files) if num_file == 0: print "ERROR: no matching files for wildcard=" + wildname + " found in specified --indir" sys.exit() else: print "Found " + str( num_file) + " matching files in specified --indir" if opts_dict['numRunFile'] > num_file: print "ERROR: more files needed (" + str( opts_dict['numRunFile'] ) + ") than available in the indir (" + str(num_file) + ")." sys.exit() #in_files_temp=os.listdir(opts_dict['indir']) in_files.sort() #print in_files if popens: #Partition the input file list in_files_list = me.partition(in_files, func=EqualStride(), involved=True) else: # Random pick non pop files in_files_list = pyEnsLib.Random_pickup(in_files, opts_dict) #in_files_list=in_files for frun_file in in_files_list: if frun_file.find(opts_dict['indir']) != -1: frun_temp = frun_file else: frun_temp = opts_dict['indir'] + '/' + frun_file if (os.path.isfile(frun_temp)): ifiles.append(Nio.open_file(frun_temp, "r")) else: print "ERROR: COULD NOT LOCATE FILE " + frun_temp sys.exit() if opts_dict['web_enabled']: if len(opts_dict['sumfile']) == 0: opts_dict[ 'sumfile'] = '/glade/p/cesmdata/cseg/inputdata/validation/' opts_dict['sumfile'], machineid, compiler = pyEnsLib.search_sumfile( opts_dict, ifiles) if len(machineid) != 0 and len(compiler) != 0: print ' ' print 'Validation file : machineid = ' + machineid + ', compiler = ' + compiler print 'Found summary file : ' + opts_dict['sumfile'] print ' ' else: print 'Warning: machine and compiler are unknown' if popens: # Read in the included var list if not os.path.exists(opts_dict['jsonfile']): print "ERROR: POP-ECT requires the specification of a valid json file via --jsonfile." sys.exit() Var2d, Var3d = pyEnsLib.read_jsonlist(opts_dict['jsonfile'], 'ESP') print ' ' print 'Z-score tolerance = ' + '{:3.2f}'.format(opts_dict['pop_tol']) print 'ZPR = ' + '{:.2%}'.format(opts_dict['pop_threshold']) zmall, n_timeslice = pyEnsLib.pop_compare_raw_score( opts_dict, ifiles, me.get_rank(), Var3d, Var2d) #zmall = np.concatenate((Zscore3d,Zscore2d),axis=0) np.set_printoptions(threshold=np.nan) if opts_dict['mpi_enable']: zmall = pyEnsLib.gather_npArray_pop( zmall, me, (me.get_size(), len(Var3d) + len(Var2d), len(ifiles), opts_dict['nbin'])) if me.get_rank() == 0: fout = open(opts_dict['outfile'], "w") for i in range(me.get_size()): for j in zmall[i]: np.savetxt(fout, j, fmt='%-7.2e') #cam else: # Read all variables from the ensemble summary file ens_var_name, ens_avg, ens_stddev, ens_rmsz, ens_gm, num_3d, mu_gm, sigma_gm, loadings_gm, sigma_scores_gm, is_SE_sum, std_gm = pyEnsLib.read_ensemble_summary( opts_dict['sumfile']) if len(ens_rmsz) == 0: gmonly = True # Add ensemble rmsz and global mean to the dictionary "variables" variables = {} if not gmonly: for k, v in ens_rmsz.iteritems(): pyEnsLib.addvariables(variables, k, 'zscoreRange', v) for k, v in ens_gm.iteritems(): pyEnsLib.addvariables(variables, k, 'gmRange', v) # Get 3d variable name list and 2d variable name list separately var_name3d = [] var_name2d = [] for vcount, v in enumerate(ens_var_name): if vcount < num_3d: var_name3d.append(v) else: var_name2d.append(v) # Get ncol and nlev value npts3d, npts2d, is_SE = pyEnsLib.get_ncol_nlev(ifiles[0]) if (is_SE ^ is_SE_sum): print 'Warning: please note the ensemble summary file is different from the testing files, they use different grids' # Compare the new run and the ensemble summary file to get rmsz score results = {} countzscore = np.zeros(len(ifiles), dtype=np.int32) countgm = np.zeros(len(ifiles), dtype=np.int32) if not gmonly: for fcount, fid in enumerate(ifiles): otimeSeries = fid.variables for var_name in ens_var_name: orig = otimeSeries[var_name] Zscore, has_zscore = pyEnsLib.calculate_raw_score( var_name, orig[opts_dict['tslice']], npts3d, npts2d, ens_avg, ens_stddev, is_SE, opts_dict, 0, 0, 0) if has_zscore: # Add the new run rmsz zscore to the dictionary "results" pyEnsLib.addresults(results, 'zscore', Zscore, var_name, 'f' + str(fcount)) # Evaluate the new run rmsz score if is in the range of the ensemble summary rmsz zscore range for fcount, fid in enumerate(ifiles): countzscore[fcount] = pyEnsLib.evaluatestatus( 'zscore', 'zscoreRange', variables, 'ens', results, 'f' + str(fcount)) # Calculate the new run global mean mean3d, mean2d, varlist = pyEnsLib.generate_global_mean_for_summary( ifiles, var_name3d, var_name2d, is_SE, opts_dict['pepsi_gm'], opts_dict) means = np.concatenate((mean3d, mean2d), axis=0) # Add the new run global mean to the dictionary "results" for i in range(means.shape[1]): for j in range(means.shape[0]): pyEnsLib.addresults(results, 'means', means[j][i], ens_var_name[j], 'f' + str(i)) # Evaluate the new run global mean if it is in the range of the ensemble summary global mean range for fcount, fid in enumerate(ifiles): countgm[fcount] = pyEnsLib.evaluatestatus('means', 'gmRange', variables, 'gm', results, 'f' + str(fcount)) # Calculate the PCA scores of the new run new_scores, var_list, comp_std_gm = pyEnsLib.standardized( means, mu_gm, sigma_gm, loadings_gm, ens_var_name, opts_dict, ens_avg, me) run_index, decision = pyEnsLib.comparePCAscores( ifiles, new_scores, sigma_scores_gm, opts_dict, me) # If there is failure, plot out standardized mean and compared standardized mean in box plots if opts_dict['prn_std_mean'] and decision == 'FAILED': import seaborn as sns category = { "all_outside99": [], "two_outside99": [], "one_outside99": [], "all_oneside_outside1QR": [] } b = list(pyEnsLib.chunk(ens_var_name, 10)) for f, alist in enumerate(b): for fc, avar in enumerate(alist): dist_995 = np.percentile(std_gm[avar], 99.5) dist_75 = np.percentile(std_gm[avar], 75) dist_25 = np.percentile(std_gm[avar], 25) dist_05 = np.percentile(std_gm[avar], 0.5) c = 0 d = 0 p = 0 q = 0 for i in range(comp_std_gm[f + fc].size): if comp_std_gm[f + fc][i] > dist_995: c = c + 1 elif comp_std_gm[f + fc][i] < dist_05: d = d + 1 elif (comp_std_gm[f + fc][i] < dist_995 and comp_std_gm[f + fc][i] > dist_75): p = p + 1 elif (comp_std_gm[f + fc][i] > dist_05 and comp_std_gm[f + fc][i] < dist_25): q = q + 1 if c == 3 or d == 3: category["all_outside99"].append((avar, f + fc)) elif c == 2 or d == 2: category["two_outside99"].append((avar, f + fc)) elif c == 1 or d == 1: category["one_outside99"].append((avar, f + fc)) if p == 3 or q == 3: category["all_oneside_outside1QR"].append( (avar, f + fc)) part_name = opts_dict['indir'].split('/')[-1] if not part_name: part_name = opts_dict['indir'].split('/')[-2] for key in sorted(category): list_array = [] list_array2 = [] list_var = [] value = category[key] print "value len=", key, len(value) for each_var in value: list_array.append(std_gm[each_var[0]]) list_array2.append(comp_std_gm[each_var[1]]) list_var.append(each_var[0]) if len(value) != 0: ax = sns.boxplot(data=list_array, whis=[0.5, 99.5], fliersize=0.0) sns.stripplot(data=list_array2, jitter=True, color="r") sns.plt.xticks(range(len(list_array)), list_var, fontsize=8, rotation=-45) if decision == 'FAILED': sns.plt.savefig(part_name + "_" + key + "_fail.png") else: sns.plt.savefig(part_name + "_" + key + "_pass.png") sns.plt.clf() ''' if len(run_index)>0: json_file=opts_dict['json_case'] if (os.path.exists(json_file)): fd=open(json_file) metainfo=json.load(fd) caseindex=metainfo['CaseIndex'] enspath=str(metainfo['EnsPath'][0]) #print caseindex if (os.path.exists(enspath)): i=0 comp_file=[] search = '\.[0-9]{3}\.' for name in in_files_list: s=re.search(search,name) in_files_index=s.group(0) if in_files_index[1:4] in caseindex: ens_index=str(caseindex[in_files_index[1:4]]) wildname='*.'+ens_index+'.*' full_glob_str=os.path.join(enspath,wildname) glob_file=glob.glob(full_glob_str) comp_file.extend(glob_file) print "comp_file=",comp_file pyEnsLib.plot_variable(in_files_list,comp_file,opts_dict,var_list,run_index,me) ''' # Print out if opts_dict['printVarTest']: print '*********************************************** ' print 'Variable-based testing (for reference only - not used to determine pass/fail)' print '*********************************************** ' for fcount, fid in enumerate(ifiles): print ' ' print 'Run ' + str(fcount + 1) + ":" print ' ' if not gmonly: print '***' + str(countzscore[fcount]), " of " + str( len(ens_var_name) ) + ' variables are outside of ensemble RMSZ distribution***' pyEnsLib.printsummary(results, 'ens', 'zscore', 'zscoreRange', (fcount), variables, 'RMSZ') print ' ' print '***' + str(countgm[fcount]), " of " + str( len(ens_var_name) ) + ' variables are outside of ensemble global mean distribution***' pyEnsLib.printsummary(results, 'gm', 'means', 'gmRange', fcount, variables, 'global mean') print ' ' print '----------------------------------------------------------------------------' if me.get_rank() == 0: print ' ' print "Testing complete." print ' '
def __init__(self, specifier, serial=False, verbosity=1, wmode='w', once=False, simplecomm=None): """ Constructor Parameters: specifier (Specifier): An instance of the Specifier class, defining the input specification for this reshaper operation. serial (bool): True or False, indicating whether the operation should be performed in serial (True) or parallel (False). The default is to assume parallel operation (but serial will be chosen if the mpi4py cannot be found when trying to initialize decomposition. verbosity(int): Level of printed output (stdout). A value of 0 means no output, and a higher value means more output. The default value is 1. wmode (str): The mode to use for writing output. Can be 'w' for normal write operation, 's' to skip the output generation for existing time-series files, 'o' to overwrite existing time-series files, 'a' to append to existing time-series files. once (bool): True or False, indicating whether the Reshaper should write all metadata to a 'once' file (separately). simplecomm (SimpleComm): A SimpleComm object to handle the parallel communication, if necessary """ # Type checking (or double-checking) if not isinstance(specifier, Specifier): err_msg = "Input must be given in the form of a Specifier object" raise TypeError(err_msg) if type(serial) is not bool: err_msg = "Serial indicator must be True or False." raise TypeError(err_msg) if type(verbosity) is not int: err_msg = "Verbosity level must be an integer." raise TypeError(err_msg) if type(wmode) is not str: err_msg = "Write mode flag must be a str." raise TypeError(err_msg) if type(once) is not bool: err_msg = "Once-file indicator must be True or False." raise TypeError(err_msg) if simplecomm is not None: if not isinstance(simplecomm, SimpleComm): err_msg = "Simple communicator object is not a SimpleComm" raise TypeError(err_msg) if wmode not in ['w', 's', 'o', 'a']: err_msg = "Write mode '{0}' not recognized".format(wmode) raise ValueError(err_msg) # Whether to write a once file self._use_once_file = once # The output write mode to use self._write_mode = wmode # Internal timer data self._timer = TimeKeeper() self._timer.start('Initializing Simple Communicator') if simplecomm is None: simplecomm = create_comm(serial=serial) # Reference to the simple communicator self._simplecomm = simplecomm self._timer.stop('Initializing Simple Communicator') # Dictionary storing read/write data amounts self.assumed_block_size = float(4 * 1024 * 1024) self._byte_counts = {} # Contruct the print header header = ''.join(['[', str(self._simplecomm.get_rank()), '/', str(self._simplecomm.get_size()), '] ']) # Reference to the verbose printer tool self._vprint = VPrinter(header=header, verbosity=verbosity) # Debug output starting if self._simplecomm.is_manager(): self._vprint('Initializing Reshaper...', verbosity=0) self._vprint(' MPI Communicator Size: {}'.format( self._simplecomm.get_size()), verbosity=1) # Validate the user input data self._timer.start('Specifier Validation') specifier.validate() self._timer.stop('Specifier Validation') if self._simplecomm.is_manager(): self._vprint(' Specifier validated', verbosity=1) # The I/O backend to use if iobackend.is_available(specifier.io_backend): self._backend = specifier.io_backend else: self._backend = iobackend.get_backend() self._vprint((' I/O Backend {0} not available. Using {1} ' 'instead').format(specifier.io_backend, self._backend), verbosity=1) # Store the input file names self._input_filenames = specifier.input_file_list # Store the time-series variable names self._time_series_names = specifier.time_series if self._time_series_names is not None: vnames = ', '.join(self._time_series_names) if self._simplecomm.is_manager(): self._vprint('WARNING: Extracting only variables: {0}'.format( vnames), verbosity=-1) # Store the list of metadata names self._metadata_names = specifier.time_variant_metadata # Store whether to treat 1D time-variant variables as metadata self._1d_metadata = specifier.assume_1d_time_variant_metadata # Store the metadata filename self._metadata_filename = specifier.metadata_filename # Store time invariant variables that should be excluded from the timeseries files self._exclude_list = specifier.exclude_list # Store the output file prefix and suffix self._output_prefix = specifier.output_file_prefix self._output_suffix = specifier.output_file_suffix # Setup NetCDF file options self._netcdf_format = specifier.netcdf_format self._netcdf_compression = specifier.compression_level self._netcdf_least_significant_digit = specifier.least_significant_digit if self._simplecomm.is_manager(): self._vprint( ' NetCDF I/O Backend: {0}'.format(self._backend), verbosity=1) self._vprint(' NetCDF Output Format: {0}'.format( self._netcdf_format), verbosity=1) self._vprint(' NetCDF Compression: {0}'.format( self._netcdf_compression), verbosity=1) trunc_str = ('{} decimal places'.format(self._netcdf_least_significant_digit) if self._netcdf_least_significant_digit else 'Disabled') self._vprint(' NetCDF Truncation: {0}'.format( trunc_str), verbosity=1) # Helpful debugging message if self._simplecomm.is_manager(): self._vprint('...Reshaper initialized.', verbosity=0) # Sync before continuing.. self._simplecomm.sync()
def main(argv): # Get command line stuff and store in a dictionary s="""verbose sumfile= indir= input_globs= tslice= nPC= sigMul= minPCFail= minRunFail= numRunFile= printVarTest popens jsonfile= mpi_enable nbin= minrange= maxrange= outfile= casejson= npick= pepsi_gm test_failure pop_tol= web_enabled pop_threshold= prn_std_mean fIndex= lev= eet= json_case= """ optkeys = s.split() try: opts, args = getopt.getopt(argv,"h",optkeys) except getopt.GetoptError: pyEnsLib.CECT_usage() sys.exit(2) # Set the default value for options opts_dict = {} opts_dict['input_globs'] = '' opts_dict['indir'] = '' opts_dict['tslice'] = 1 opts_dict['nPC'] = 50 opts_dict['sigMul'] = 2 opts_dict['verbose'] = False opts_dict['minPCFail'] = 3 opts_dict['minRunFail'] = 2 opts_dict['numRunFile'] = 3 opts_dict['printVarTest'] = False opts_dict['popens'] = False opts_dict['jsonfile'] = '' opts_dict['mpi_enable'] = False opts_dict['nbin'] = 40 opts_dict['minrange'] = 0.0 opts_dict['maxrange'] = 4.0 opts_dict['outfile'] = 'testcase.result' opts_dict['casejson'] = '' opts_dict['npick'] = 10 opts_dict['pepsi_gm'] = False opts_dict['test_failure'] = True opts_dict['pop_tol'] = 3.0 opts_dict['pop_threshold'] = 0.90 opts_dict['prn_std_mean'] = False opts_dict['lev'] = 0 opts_dict['eet'] = 0 opts_dict['json_case'] = '' opts_dict['sumfile'] = '' opts_dict['web_enabled'] = False # Call utility library getopt_parseconfig to parse the option keys # and save to the dictionary caller = 'CECT' gmonly = False opts_dict = pyEnsLib.getopt_parseconfig(opts,optkeys,caller,opts_dict) popens = opts_dict['popens'] # Create a mpi simplecomm object if opts_dict['mpi_enable']: me=simplecomm.create_comm() else: me=simplecomm.create_comm(not opts_dict['mpi_enable']) # Print out timestamp, input ensemble file and new run directory dt=datetime.now() verbose = opts_dict['verbose'] if me.get_rank()==0: print '--------pyCECT--------' print ' ' print dt.strftime("%A, %d. %B %Y %I:%M%p") print ' ' if not opts_dict['web_enabled']: print 'Ensemble summary file = '+opts_dict['sumfile'] print ' ' print 'Testcase file directory = '+opts_dict['indir'] print ' ' print ' ' # Ensure sensible EET value if opts_dict['eet'] and opts_dict['numRunFile'] > opts_dict['eet']: pyEnsLib.CECT_usage() sys.exit(2) ifiles=[] in_files=[] # Random pick pop files from not_pick_files list if opts_dict['casejson']: with open(opts_dict['casejson']) as fin: result=json.load(fin) in_files_first=result['not_pick_files'] in_files=random.sample(in_files_first,opts_dict['npick']) print 'Testcase files:' print '\n'.join(in_files) elif opts_dict['json_case']: json_file=opts_dict['json_case'] if (os.path.exists(json_file)): fd=open(json_file) metainfo=json.load(fd) if 'CaseName' in metainfo: casename=metainfo['CaseName'] if (os.path.exists(opts_dict['indir'])): for name in casename: wildname='*.'+name+'.*' full_glob_str=os.path.join(opts_dict['indir'],wildname) glob_file=glob.glob(full_glob_str) in_files.extend(glob_file) else: print "Error: "+opts_dict['json_case']+" does not exist" sys.exit() print "in_files=",in_files else: wildname='*'+opts_dict['input_globs']+'*' # Open all input files if (os.path.exists(opts_dict['indir'])): full_glob_str=os.path.join(opts_dict['indir'],wildname) glob_files=glob.glob(full_glob_str) in_files.extend(glob_files) num_file=len(in_files) if opts_dict['numRunFile'] > num_file: print "You requested more numRunFile than it is available at the indir, please change" sys.exit() #in_files_temp=os.listdir(opts_dict['indir']) in_files.sort() if popens: #Partition the input file list in_files_list=me.partition(in_files,func=EqualStride(),involved=True) else: # Random pick non pop files in_files_list=pyEnsLib.Random_pickup(in_files,opts_dict) #in_files_list=in_files for frun_file in in_files_list: if frun_file.find(opts_dict['indir']) != -1: frun_temp=frun_file else: frun_temp=opts_dict['indir']+'/'+frun_file if (os.path.isfile(frun_temp)): ifiles.append(Nio.open_file(frun_temp,"r")) else: print "COULD NOT LOCATE FILE " +frun_temp+" EXISTING" sys.exit() if opts_dict['web_enabled']: if len(opts_dict['sumfile'])==0: opts_dict['sumfile']='/glade/p/cesmdata/cseg/inputdata/validation/' opts_dict['sumfile'],machineid,compiler=pyEnsLib.search_sumfile(opts_dict,ifiles) if len(machineid)!=0 and len(compiler)!=0: print ' ' print 'Validation file : machineid = '+machineid+', compiler = '+compiler print 'Found summery file : '+opts_dict['sumfile'] print ' ' else: print 'Warning: machineid and compiler are unknown' if popens: # Read in the included var list Var2d,Var3d=pyEnsLib.read_jsonlist(opts_dict['jsonfile'],'ESP') print ' ' print 'Z-score tolerance = '+'{:3.2f}'.format(opts_dict['pop_tol']) print 'ZPR = '+'{:.2%}'.format(opts_dict['pop_threshold']) zmall,n_timeslice=pyEnsLib.compare_raw_score(opts_dict,ifiles,me.get_rank(),Var3d,Var2d) #zmall = np.concatenate((Zscore3d,Zscore2d),axis=0) np.set_printoptions(threshold=np.nan) if opts_dict['mpi_enable']: zmall = pyEnsLib.gather_npArray_pop(zmall,me,(me.get_size(),len(Var3d)+len(Var2d),len(ifiles),opts_dict['nbin'])) if me.get_rank()==0: fout = open(opts_dict['outfile'],"w") for i in range(me.get_size()): for j in zmall[i]: np.savetxt(fout,j,fmt='%-7.2e') else: # Read all variables from the ensemble summary file ens_var_name,ens_avg,ens_stddev,ens_rmsz,ens_gm,num_3d,mu_gm,sigma_gm,loadings_gm,sigma_scores_gm,is_SE_sum,std_gm=pyEnsLib.read_ensemble_summary(opts_dict['sumfile']) if len(ens_rmsz) == 0: gmonly = True # Add ensemble rmsz and global mean to the dictionary "variables" variables={} if not gmonly: for k,v in ens_rmsz.iteritems(): pyEnsLib.addvariables(variables,k,'zscoreRange',v) for k,v in ens_gm.iteritems(): pyEnsLib.addvariables(variables,k,'gmRange',v) # Get 3d variable name list and 2d variable name list seperately var_name3d=[] var_name2d=[] for vcount,v in enumerate(ens_var_name): if vcount < num_3d: var_name3d.append(v) else: var_name2d.append(v) # Get ncol and nlev value npts3d,npts2d,is_SE=pyEnsLib.get_ncol_nlev(ifiles[0]) if (is_SE ^ is_SE_sum): print 'Warning: please note the ensemble summary file is different from the testing files, they use different grids' # Compare the new run and the ensemble summary file to get rmsz score results={} countzscore=np.zeros(len(ifiles),dtype=np.int32) countgm=np.zeros(len(ifiles),dtype=np.int32) if not gmonly: for fcount,fid in enumerate(ifiles): otimeSeries = fid.variables for var_name in ens_var_name: orig=otimeSeries[var_name] Zscore,has_zscore=pyEnsLib.calculate_raw_score(var_name,orig[opts_dict['tslice']],npts3d,npts2d,ens_avg,ens_stddev,is_SE,opts_dict,0,0,0) if has_zscore: # Add the new run rmsz zscore to the dictionary "results" pyEnsLib.addresults(results,'zscore',Zscore,var_name,'f'+str(fcount)) # Evaluate the new run rmsz score if is in the range of the ensemble summary rmsz zscore range for fcount,fid in enumerate(ifiles): countzscore[fcount]=pyEnsLib.evaluatestatus('zscore','zscoreRange',variables,'ens',results,'f'+str(fcount)) # Calculate the new run global mean mean3d,mean2d,varlist=pyEnsLib.generate_global_mean_for_summary(ifiles,var_name3d,var_name2d,is_SE,opts_dict['pepsi_gm'],opts_dict) means=np.concatenate((mean3d,mean2d),axis=0) # Add the new run global mean to the dictionary "results" for i in range(means.shape[1]): for j in range(means.shape[0]): pyEnsLib.addresults(results,'means',means[j][i],ens_var_name[j],'f'+str(i)) # Evaluate the new run global mean if it is in the range of the ensemble summary global mean range for fcount,fid in enumerate(ifiles): countgm[fcount]=pyEnsLib.evaluatestatus('means','gmRange',variables,'gm',results,'f'+str(fcount)) # Calculate the PCA scores of the new run new_scores,var_list,comp_std_gm=pyEnsLib.standardized(means,mu_gm,sigma_gm,loadings_gm,ens_var_name,opts_dict,ens_avg,me) run_index,decision=pyEnsLib.comparePCAscores(ifiles,new_scores,sigma_scores_gm,opts_dict,me) # If there is failure, plot out standardized mean and compared standardized mean in box plots if opts_dict['prn_std_mean'] and decision == 'FAILED': import seaborn as sns category={"all_outside99":[],"two_outside99":[],"one_outside99":[],"all_oneside_outside1QR":[]} b=list(pyEnsLib.chunk(ens_var_name,10)) for f,alist in enumerate(b): for fc,avar in enumerate(alist): dist_995=np.percentile(std_gm[avar],99.5) dist_75=np.percentile(std_gm[avar],75) dist_25=np.percentile(std_gm[avar],25) dist_05=np.percentile(std_gm[avar],0.5) c=0 d=0 p=0 q=0 for i in range(comp_std_gm[f+fc].size): if comp_std_gm[f+fc][i]>dist_995: c=c+1 elif comp_std_gm[f+fc][i]<dist_05: d=d+1 elif (comp_std_gm[f+fc][i]<dist_995 and comp_std_gm[f+fc][i]>dist_75): p=p+1 elif (comp_std_gm[f+fc][i]>dist_05 and comp_std_gm[f+fc][i]<dist_25): q=q+1 if c == 3 or d == 3: category["all_outside99"].append((avar,f+fc)) elif c == 2 or d == 2: category["two_outside99"].append((avar,f+fc)) elif c == 1 or d == 1: category["one_outside99"].append((avar,f+fc)) if p == 3 or q == 3: category["all_oneside_outside1QR"].append((avar,f+fc)) part_name=opts_dict['indir'].split('/')[-1] if not part_name: part_name=opts_dict['indir'].split('/')[-2] for key in sorted(category): list_array=[] list_array2=[] list_var=[] value=category[key] print "value len=",key,len(value) for each_var in value: list_array.append(std_gm[each_var[0]]) list_array2.append(comp_std_gm[each_var[1]]) list_var.append(each_var[0]) if len(value) !=0 : ax=sns.boxplot(data=list_array,whis=[0.5,99.5],fliersize=0.0) sns.stripplot(data=list_array2,jitter=True,color="r") sns.plt.xticks(range(len(list_array)),list_var,fontsize=8,rotation=-45) if decision == 'FAILED': sns.plt.savefig(part_name+"_"+key+"_fail.png") else: sns.plt.savefig(part_name+"_"+key+"_pass.png") sns.plt.clf() ''' if len(run_index)>0: json_file=opts_dict['json_case'] if (os.path.exists(json_file)): fd=open(json_file) metainfo=json.load(fd) caseindex=metainfo['CaseIndex'] enspath=str(metainfo['EnsPath'][0]) #print caseindex if (os.path.exists(enspath)): i=0 comp_file=[] search = '\.[0-9]{3}\.' for name in in_files_list: s=re.search(search,name) in_files_index=s.group(0) if in_files_index[1:4] in caseindex: ens_index=str(caseindex[in_files_index[1:4]]) wildname='*.'+ens_index+'.*' full_glob_str=os.path.join(enspath,wildname) glob_file=glob.glob(full_glob_str) comp_file.extend(glob_file) print "comp_file=",comp_file pyEnsLib.plot_variable(in_files_list,comp_file,opts_dict,var_list,run_index,me) ''' # Print out if opts_dict['printVarTest']: print '*********************************************** ' print 'Variable-based testing (for reference only - not used to determine pass/fail)' print '*********************************************** ' for fcount,fid in enumerate(ifiles): print ' ' print 'Run '+str(fcount+1)+":" print ' ' if not gmonly: print '***'+str(countzscore[fcount])," of "+str(len(ens_var_name))+' variables are outside of ensemble RMSZ distribution***' pyEnsLib.printsummary(results,'ens','zscore','zscoreRange',(fcount),variables,'RMSZ') print ' ' print '***'+str(countgm[fcount])," of "+str(len(ens_var_name))+' variables are outside of ensemble global mean distribution***' pyEnsLib.printsummary(results,'gm','means','gmRange',fcount,variables,'global mean') print ' ' print '----------------------------------------------------------------------------' if me.get_rank() == 0: print ' ' print "Testing complete." print ' '
def main(argv): print('Running pyEnsSum!') # Get command line stuff and store in a dictionary s = 'tag= compset= esize= tslice= res= sumfile= indir= sumfiledir= mach= verbose jsonfile= mpi_enable maxnorm gmonly popens cumul regx= startMon= endMon= fIndex=' optkeys = s.split() try: opts, args = getopt.getopt(argv, "h", optkeys) except getopt.GetoptError: pyEnsLib.EnsSum_usage() sys.exit(2) # Put command line options in a dictionary - also set defaults opts_dict={} # Defaults opts_dict['tag'] = '' opts_dict['compset'] = '' opts_dict['mach'] = '' opts_dict['esize'] = 151 opts_dict['tslice'] = 0 opts_dict['res'] = '' opts_dict['sumfile'] = 'ens.summary.nc' opts_dict['indir'] = './' opts_dict['sumfiledir'] = './' opts_dict['jsonfile'] = '' opts_dict['verbose'] = True opts_dict['mpi_enable'] = False opts_dict['maxnorm'] = False opts_dict['gmonly'] = False opts_dict['popens'] = False opts_dict['cumul'] = False opts_dict['regx'] = 'test' opts_dict['startMon'] = 1 opts_dict['endMon'] = 1 opts_dict['fIndex'] = 151 # This creates the dictionary of input arguments opts_dict = pyEnsLib.getopt_parseconfig(opts,optkeys,'ES',opts_dict) verbose = opts_dict['verbose'] st = opts_dict['esize'] esize = int(st) if (verbose == True): print(opts_dict) print('Ensemble size for summary = ', esize) if not (opts_dict['tag'] and opts_dict['compset'] and opts_dict['mach'] or opts_dict['res']): print('Please specify --tag, --compset, --mach and --res options') sys.exit() # Now find file names in indir input_dir = opts_dict['indir'] # The var list that will be excluded ex_varlist=[] # Create a mpi simplecomm object if opts_dict['mpi_enable']: me=simplecomm.create_comm() else: me=simplecomm.create_comm(not opts_dict['mpi_enable']) if me.get_rank() == 0: if opts_dict['jsonfile']: # Read in the excluded var list ex_varlist=pyEnsLib.read_jsonlist(opts_dict['jsonfile'],'ES') # Broadcast the excluded var list to each processor if opts_dict['mpi_enable']: ex_varlist=me.partition(ex_varlist,func=Duplicate(),involved=True) in_files=[] if(os.path.exists(input_dir)): # Get the list of files in_files_temp = os.listdir(input_dir) in_files=sorted(in_files_temp) #print in_files # Make sure we have enough num_files = len(in_files) if (verbose == True): print('Number of files in input directory = ', num_files) if (num_files < esize): print('Number of files in input directory (',num_files, ') is less than specified ensemble size of ', esize) sys.exit(2) if (num_files > esize): print('NOTE: Number of files in ', input_dir, 'is greater than specified ensemble size of ', esize, '\nwill just use the first ', esize, 'files') else: print('Input directory: ',input_dir,' not found') sys.exit(2) if opts_dict['cumul']: if opts_dict['regx']: in_files_list=get_cumul_filelist(opts_dict,opts_dict['indir'],opts_dict['regx']) in_files=me.partition(in_files_list,func=EqualLength(),involved=True) if me.get_rank()==0: print('in_files=',in_files) # Open the files in the input directory o_files=[] for onefile in in_files[0:esize]: if (os.path.isfile(input_dir+'/' + onefile)): o_files.append(Nio.open_file(input_dir+'/' + onefile,"r")) else: print("COULD NOT LOCATE FILE "+ input_dir + onefile + "! EXITING....") sys.exit() # Store dimensions of the input fields if (verbose == True): print("Getting spatial dimensions") nlev = -1 ncol = -1 nlat = -1 nlon = -1 lonkey='' latkey='' # Look at first file and get dims input_dims = o_files[0].dimensions ndims = len(input_dims) for key in input_dims: if key == "lev": nlev = input_dims["lev"] elif key == "ncol": ncol = input_dims["ncol"] elif (key == "nlon") or (key =="lon"): nlon = input_dims[key] lonkey=key elif (key == "nlat") or (key == "lat"): nlat = input_dims[key] latkey=key if (nlev == -1) : print("COULD NOT LOCATE valid dimension lev => EXITING....") sys.exit() if (( ncol == -1) and ((nlat == -1) or (nlon == -1))): print("Need either lat/lon or ncol => EXITING....") sys.exit() # Check if this is SE or FV data if (ncol != -1): is_SE = True else: is_SE = False # Make sure all files have the same dimensions if (verbose == True): print("Checking dimensions across files....") print('lev = ', nlev) if (is_SE == True): print('ncol = ', ncol) else: print('nlat = ', nlat) print('nlon = ', nlon) for count, this_file in enumerate(o_files): input_dims = this_file.dimensions if (is_SE == True): if ( nlev != int(input_dims["lev"]) or ( ncol != int(input_dims["ncol"]))): print("Dimension mismatch between ", in_files[0], 'and', in_files[0], '!!!') sys.exit() else: if ( nlev != int(input_dims["lev"]) or ( nlat != int(input_dims[latkey]))\ or ( nlon != int(input_dims[lonkey]))): print("Dimension mismatch between ", in_files[0], 'and', in_files[0], '!!!') sys.exit() # Get 2d vars, 3d vars and all vars (For now include all variables) vars_dict = o_files[0].variables # Remove the excluded variables (specified in json file) from variable dictionary if ex_varlist: for i in ex_varlist: if i in vars_dict: del vars_dict[i] num_vars = len(vars_dict) if (verbose == True): print('Number of variables (including metadata) found = ', num_vars) str_size = 0 d2_var_names = [] d3_var_names = [] num_2d = 0 num_3d = 0 # Which are 2d, which are 3d and max str_size for k,v in vars_dict.iteritems(): var = k vd = v.dimensions # all the variable's dimensions (names) vr = v.rank # num dimension vs = v.shape # dim values is_2d = False is_3d = False if (is_SE == True): # (time, lev, ncol) or (time, ncol) if ((vr == 2) and (vs[1] == ncol)): is_2d = True num_2d += 1 elif ((vr == 3) and (vs[2] == ncol and vs[1] == nlev )): is_3d = True num_3d += 1 else: # (time, lev, nlon, nlon) or (time, nlat, nlon) if ((vr == 3) and (vs[1] == nlat and vs[2] == nlon)): is_2d = True num_2d += 1 elif ((vr == 4) and (vs[2] == nlat and vs[3] == nlon and vs[1] == nlev )): is_3d = True num_3d += 1 if (is_3d == True) : str_size = max(str_size, len(k)) d3_var_names.append(k) elif (is_2d == True): str_size = max(str_size, len(k)) d2_var_names.append(k) # Now sort these and combine (this sorts caps first, then lower case - # which is what we want) d2_var_names.sort() d3_var_names.sort() # All vars is 3d vars first (sorted), the 2d vars all_var_names = list(d3_var_names) all_var_names += d2_var_names n_all_var_names = len(all_var_names) if (verbose == True): print('num vars = ', n_all_var_names, '(3d = ', num_3d, ' and 2d = ', num_2d, ")") # Create new summary ensemble file this_sumfile = opts_dict["sumfile"] if (verbose == True): print("Creating ", this_sumfile, " ...") if(me.get_rank() ==0 | opts_dict["popens"]): if os.path.exists(this_sumfile): os.unlink(this_sumfile) opt = Nio.options() opt.PreFill = False opt.Format = 'NetCDF4Classic' nc_sumfile = Nio.open_file(this_sumfile, 'w', options=opt) # Set dimensions if (verbose == True): print("Setting dimensions .....") if (is_SE == True): nc_sumfile.create_dimension('ncol', ncol) else: nc_sumfile.create_dimension('nlat', nlat) nc_sumfile.create_dimension('nlon', nlon) nc_sumfile.create_dimension('nlev', nlev) nc_sumfile.create_dimension('ens_size', esize) nc_sumfile.create_dimension('nvars', num_3d + num_2d) nc_sumfile.create_dimension('nvars3d', num_3d) nc_sumfile.create_dimension('nvars2d', num_2d) nc_sumfile.create_dimension('str_size', str_size) # Set global attributes now = time.strftime("%c") if (verbose == True): print("Setting global attributes .....") setattr(nc_sumfile, 'creation_date',now) setattr(nc_sumfile, 'title', 'CAM verification ensemble summary file') setattr(nc_sumfile, 'tag', opts_dict["tag"]) setattr(nc_sumfile, 'compset', opts_dict["compset"]) setattr(nc_sumfile, 'resolution', opts_dict["res"]) setattr(nc_sumfile, 'machine', opts_dict["mach"]) # Create variables if (verbose == True): print("Creating variables .....") v_lev = nc_sumfile.create_variable("lev", 'f', ('nlev',)) v_vars = nc_sumfile.create_variable("vars", 'S1', ('nvars', 'str_size')) v_var3d = nc_sumfile.create_variable("var3d", 'S1', ('nvars3d', 'str_size')) v_var2d = nc_sumfile.create_variable("var2d", 'S1', ('nvars2d', 'str_size')) if not opts_dict['gmonly']: if (is_SE == True): v_ens_avg3d = nc_sumfile.create_variable("ens_avg3d", 'f', ('nvars3d', 'nlev', 'ncol')) v_ens_stddev3d = nc_sumfile.create_variable("ens_stddev3d", 'f', ('nvars3d', 'nlev', 'ncol')) v_ens_avg2d = nc_sumfile.create_variable("ens_avg2d", 'f', ('nvars2d', 'ncol')) v_ens_stddev2d = nc_sumfile.create_variable("ens_stddev2d", 'f', ('nvars2d', 'ncol')) else: v_ens_avg3d = nc_sumfile.create_variable("ens_avg3d", 'f', ('nvars3d', 'nlev', 'nlat', 'nlon')) v_ens_stddev3d = nc_sumfile.create_variable("ens_stddev3d", 'f', ('nvars3d', 'nlev', 'nlat', 'nlon')) v_ens_avg2d = nc_sumfile.create_variable("ens_avg2d", 'f', ('nvars2d', 'nlat', 'nlon')) v_ens_stddev2d = nc_sumfile.create_variable("ens_stddev2d", 'f', ('nvars2d', 'nlat', 'nlon')) v_RMSZ = nc_sumfile.create_variable("RMSZ", 'f', ('nvars', 'ens_size')) v_gm = nc_sumfile.create_variable("global_mean", 'f', ('nvars', 'ens_size')) v_loadings_gm = nc_sumfile.create_variable('loadings_gm','f',('nvars','nvars')) v_mu_gm = nc_sumfile.create_variable('mu_gm','f',('nvars',)) v_sigma_gm = nc_sumfile.create_variable('sigma_gm','f',('nvars',)) v_sigma_scores_gm = nc_sumfile.create_variable('sigma_scores_gm','f',('nvars',)) # Assign vars, var3d and var2d if (verbose == True): print("Assigning vars, var3d, and var2d .....") eq_all_var_names =[] eq_d3_var_names = [] eq_d2_var_names = [] l_eq = len(all_var_names) for i in range(l_eq): tt = list(all_var_names[i]) l_tt = len(tt) if (l_tt < str_size): extra = list(' ')*(str_size - l_tt) tt.extend(extra) eq_all_var_names.append(tt) l_eq = len(d3_var_names) for i in range(l_eq): tt = list(d3_var_names[i]) l_tt = len(tt) if (l_tt < str_size): extra = list(' ')*(str_size - l_tt) tt.extend(extra) eq_d3_var_names.append(tt) l_eq = len(d2_var_names) for i in range(l_eq): tt = list(d2_var_names[i]) l_tt = len(tt) if (l_tt < str_size): extra = list(' ')*(str_size - l_tt) tt.extend(extra) eq_d2_var_names.append(tt) v_vars[:] = eq_all_var_names[:] v_var3d[:] = eq_d3_var_names[:] v_var2d[:] = eq_d2_var_names[:] # Time-invarient metadata if (verbose == True): print("Assigning time invariant metadata .....") lev_data = vars_dict["lev"] v_lev = lev_data # Form ensembles, each missing one member; compute RMSZs and global means #for each variable, we also do max norm also (currently done in pyStats) tslice = opts_dict['tslice'] if not opts_dict['cumul']: # Partition the var list var3_list_loc=me.partition(d3_var_names,func=EqualStride(),involved=True) var2_list_loc=me.partition(d2_var_names,func=EqualStride(),involved=True) else: var3_list_loc=d3_var_names var2_list_loc=d2_var_names # Calculate global means # if (verbose == True): print("Calculating global means .....") if not opts_dict['cumul']: gm3d,gm2d = pyEnsLib.generate_global_mean_for_summary(o_files,var3_list_loc,var2_list_loc , is_SE, False,opts_dict) if (verbose == True): print("Finish calculating global means .....") # Calculate RMSZ scores if (verbose == True): print("Calculating RMSZ scores .....") if (not opts_dict['gmonly']) | (opts_dict['cumul']): zscore3d,zscore2d,ens_avg3d,ens_stddev3d,ens_avg2d,ens_stddev2d,temp1,temp2=pyEnsLib.calc_rmsz(o_files,var3_list_loc,var2_list_loc,is_SE,opts_dict) # Calculate max norm ensemble if opts_dict['maxnorm']: if (verbose == True): print("Calculating max norm of ensembles .....") pyEnsLib.calculate_maxnormens(opts_dict,var3_list_loc) pyEnsLib.calculate_maxnormens(opts_dict,var2_list_loc) if opts_dict['mpi_enable'] & ( not opts_dict['popens']): if not opts_dict['cumul']: # Gather the 3d variable results from all processors to the master processor slice_index=get_stride_list(len(d3_var_names),me) # Gather global means 3d results gm3d=gather_npArray(gm3d,me,slice_index,(len(d3_var_names),len(o_files))) if not opts_dict['gmonly']: # Gather zscore3d results zscore3d=gather_npArray(zscore3d,me,slice_index,(len(d3_var_names),len(o_files))) # Gather ens_avg3d and ens_stddev3d results shape_tuple3d=get_shape(ens_avg3d.shape,len(d3_var_names),me.get_rank()) ens_avg3d=gather_npArray(ens_avg3d,me,slice_index,shape_tuple3d) ens_stddev3d=gather_npArray(ens_stddev3d,me,slice_index,shape_tuple3d) # Gather 2d variable results from all processors to the master processor slice_index=get_stride_list(len(d2_var_names),me) # Gather global means 2d results gm2d=gather_npArray(gm2d,me,slice_index,(len(d2_var_names),len(o_files))) if not opts_dict['gmonly']: # Gather zscore2d results zscore2d=gather_npArray(zscore2d,me,slice_index,(len(d2_var_names),len(o_files))) # Gather ens_avg3d and ens_stddev2d results shape_tuple2d=get_shape(ens_avg2d.shape,len(d2_var_names),me.get_rank()) ens_avg2d=gather_npArray(ens_avg2d,me,slice_index,shape_tuple2d) ens_stddev2d=gather_npArray(ens_stddev2d,me,slice_index,shape_tuple2d) else: gmall=np.concatenate((temp1,temp2),axis=0) gmall=pyEnsLib.gather_npArray_pop(gmall,me,(me.get_size(),len(d3_var_names)+len(d2_var_names))) # Assign to file: if me.get_rank() == 0 | opts_dict['popens'] : if not opts_dict['cumul']: gmall=np.concatenate((gm3d,gm2d),axis=0) if not opts_dict['gmonly']: Zscoreall=np.concatenate((zscore3d,zscore2d),axis=0) v_RMSZ[:,:]=Zscoreall[:,:] if not opts_dict['gmonly']: if (is_SE == True): v_ens_avg3d[:,:,:]=ens_avg3d[:,:,:] v_ens_stddev3d[:,:,:]=ens_stddev3d[:,:,:] v_ens_avg2d[:,:]=ens_avg2d[:,:] v_ens_stddev2d[:,:]=ens_stddev2d[:,:] else: v_ens_avg3d[:,:,:,:]=ens_avg3d[:,:,:,:] v_ens_stddev3d[:,:,:,:]=ens_stddev3d[:,:,:,:] v_ens_avg2d[:,:,:]=ens_avg2d[:,:,:] v_ens_stddev2d[:,:,:]=ens_stddev2d[:,:,:] else: gmall_temp=np.transpose(gmall[:,:]) gmall=gmall_temp mu_gm,sigma_gm,standardized_global_mean,loadings_gm,scores_gm=pyEnsLib.pre_PCA(gmall) v_gm[:,:]=gmall[:,:] v_mu_gm[:]=mu_gm[:] v_sigma_gm[:]=sigma_gm[:].astype(np.float32) v_loadings_gm[:,:]=loadings_gm[:,:] v_sigma_scores_gm[:]=scores_gm[:] print("All Done") def get_cumul_filelist(opts_dict,indir,regx): if not opts_dict['indir']: print('input dir is not specified') sys.exit(2) #regx='(pgi(.)*-(01|02))' regx_list=["mon","gnu","pgi"] all_files=[] for prefix in regx_list: for i in range(opts_dict['fIndex'],opts_dict['fIndex']+opts_dict['esize']/3): for j in range(opts_dict['startMon'],opts_dict['endMon']+1): mon_str=str(j).zfill(2) regx='(^'+prefix+'(.)*'+str(i)+'(.)*-('+mon_str+'))' print('regx=',regx) res=[f for f in os.listdir(indir) if re.search(regx,f)] in_files=sorted(res) all_files.extend(in_files) print("all_files=",all_files) #in_files=res return all_files # # Get the shape of all variable list in tuple for all processor # def get_shape(shape_tuple,shape1,rank): lst=list(shape_tuple) lst[0]=shape1 shape_tuple=tuple(lst) return shape_tuple # # Get the mpi partition list for each processor # def get_stride_list(len_of_list,me): slice_index=[] for i in range(me.get_size()): index_arr=np.arange(len_of_list) slice_index.append(index_arr[i::me.get_size()]) return slice_index # # Gather arrays from each processor by the var_list to the master processor and make it an array # def gather_npArray(npArray,me,slice_index,array_shape): the_array=np.zeros(array_shape,dtype=np.float32) if me.get_rank()==0: k=0 for j in slice_index[me.get_rank()]: the_array[j,:]=npArray[k,:] k=k+1 for i in range(1,me.get_size()): if me.get_rank() == 0: rank,npArray=me.collect() k=0 for j in slice_index[rank]: the_array[j,:]=npArray[k,:] k=k+1 if me.get_rank() != 0: message={"from_rank":me.get_rank(),"shape":npArray.shape} me.collect(npArray) me.sync() return the_array if __name__ == "__main__": main(sys.argv[1:])
def setUp(self): self.gcomm = simplecomm.create_comm() self.size = MPI_COMM_WORLD.Get_size() self.rank = MPI_COMM_WORLD.Get_rank()
def main(argv): # Get command line stuff and store in a dictionary s='verbose sumfile= indir= input_globs= tslice= nPC= sigMul= minPCFail= minRunFail= numRunFile= printVarTest popens jsonfile= mpi_enable nbin= minrange= maxrange= outfile= casejson= npick= pepsi_gm test_failure pop_tol= pop_threshold=' optkeys = s.split() try: opts, args = getopt.getopt(argv,"h",optkeys) except getopt.GetoptError: pyEnsLib.CECT_usage() sys.exit(2) # Set the default value for options opts_dict = {} opts_dict['input_globs'] = '' opts_dict['indir'] = '' opts_dict['tslice'] = 1 opts_dict['nPC'] = 50 opts_dict['sigMul'] = 2 opts_dict['verbose'] = False opts_dict['minPCFail'] = 3 opts_dict['minRunFail'] = 2 opts_dict['numRunFile'] = 3 opts_dict['printVarTest'] = False opts_dict['popens'] = False opts_dict['jsonfile'] = '' opts_dict['mpi_enable'] = False opts_dict['nbin'] = 40 opts_dict['minrange'] = 0.0 opts_dict['maxrange'] = 4.0 opts_dict['outfile'] = 'testcase.result' opts_dict['casejson'] = '' opts_dict['npick'] = 10 opts_dict['pepsi_gm'] = False opts_dict['test_failure'] = True opts_dict['pop_tol'] = 3.0 opts_dict['pop_threshold'] = 0.90 # Call utility library getopt_parseconfig to parse the option keys # and save to the dictionary caller = 'CECT' gmonly = False opts_dict = pyEnsLib.getopt_parseconfig(opts,optkeys,caller,opts_dict) popens = opts_dict['popens'] # Print out timestamp, input ensemble file and new run directory dt=datetime.now() verbose = opts_dict['verbose'] print('--------pyCECT--------') print(' ') print(dt.strftime("%A, %d. %B %Y %I:%M%p")) print(' ') print('Ensemble summary file = '+opts_dict['sumfile']) print(' ') print('Testcase file directory = '+opts_dict['indir'] ) print(' ') print(' ') # Create a mpi simplecomm object if opts_dict['mpi_enable']: me=simplecomm.create_comm() else: me=simplecomm.create_comm(not opts_dict['mpi_enable']) ifiles=[] in_files=[] # Random pick pop files from not_pick_files list if opts_dict['casejson']: with open(opts_dict['casejson']) as fin: result=json.load(fin) in_files_first=result['not_pick_files'] in_files=random.sample(in_files_first,opts_dict['npick']) print('Testcase files:') print('\n'.join(in_files)) else: wildname='*'+opts_dict['input_globs']+'*' # Open all input files if (os.path.exists(opts_dict['indir'])): full_glob_str=os.path.join(opts_dict['indir'],wildname) glob_files=glob.glob(full_glob_str) in_files.extend(glob_files) #in_files_temp=os.listdir(opts_dict['indir']) in_files.sort() if popens: #Partition the input file list in_files_list=me.partition(in_files,func=EqualStride(),involved=True) else: # Random pick non pop files in_files_list=pyEnsLib.Random_pickup(in_files,opts_dict) for frun_file in in_files_list: if frun_file.find(opts_dict['indir']) != -1: frun_temp=frun_file else: frun_temp=opts_dict['indir']+'/'+frun_file if (os.path.isfile(frun_temp)): ifiles.append(Nio.open_file(frun_temp,"r")) else: print("COULD NOT LOCATE FILE " +frun_temp+" EXISTING") sys.exit() if popens: # Read in the included var list Var2d,Var3d=pyEnsLib.read_jsonlist(opts_dict['jsonfile'],'ESP') print(' ') print('Z-score tolerance = '+'{:3.2f}'.format(opts_dict['pop_tol'])) print('ZPR = '+'{:.2%}'.format(opts_dict['pop_threshold'])) zmall,n_timeslice=pyEnsLib.compare_raw_score(opts_dict,ifiles,me.get_rank(),Var3d,Var2d) #zmall = np.concatenate((Zscore3d,Zscore2d),axis=0) np.set_printoptions(threshold=np.nan) if opts_dict['mpi_enable']: zmall = pyEnsLib.gather_npArray_pop(zmall,me,(me.get_size(),len(Var3d)+len(Var2d),len(ifiles),opts_dict['nbin'])) if me.get_rank()==0: fout = open(opts_dict['outfile'],"w") for i in range(me.get_size()): for j in zmall[i]: np.savetxt(fout,j,fmt='%-7.2e') else: # Read all variables from the ensemble summary file ens_var_name,ens_avg,ens_stddev,ens_rmsz,ens_gm,num_3d,mu_gm,sigma_gm,loadings_gm,sigma_scores_gm,is_SE_sum=pyEnsLib.read_ensemble_summary(opts_dict['sumfile']) if len(ens_rmsz) == 0: gmonly = True # Add ensemble rmsz and global mean to the dictionary "variables" variables={} if not gmonly: for k,v in ens_rmsz.iteritems(): pyEnsLib.addvariables(variables,k,'zscoreRange',v) for k,v in ens_gm.iteritems(): pyEnsLib.addvariables(variables,k,'gmRange',v) # Get 3d variable name list and 2d variable name list seperately var_name3d=[] var_name2d=[] for vcount,v in enumerate(ens_var_name): if vcount < num_3d: var_name3d.append(v) else: var_name2d.append(v) # Get ncol and nlev value npts3d,npts2d,is_SE=pyEnsLib.get_ncol_nlev(ifiles[0]) if (is_SE ^ is_SE_sum): print('Warning: please note the ensemble summary file is different from the testing files, they use different grids') # Compare the new run and the ensemble summary file to get rmsz score results={} countzscore=np.zeros(len(ifiles),dtype=np.int32) countgm=np.zeros(len(ifiles),dtype=np.int32) if not gmonly: for fcount,fid in enumerate(ifiles): otimeSeries = fid.variables for var_name in ens_var_name: orig=otimeSeries[var_name] Zscore,has_zscore=pyEnsLib.calculate_raw_score(var_name,orig[opts_dict['tslice']],npts3d,npts2d,ens_avg,ens_stddev,is_SE,opts_dict,0,0,0) if has_zscore: # Add the new run rmsz zscore to the dictionary "results" pyEnsLib.addresults(results,'zscore',Zscore,var_name,'f'+str(fcount)) # Evaluate the new run rmsz score if is in the range of the ensemble summary rmsz zscore range for fcount,fid in enumerate(ifiles): countzscore[fcount]=pyEnsLib.evaluatestatus('zscore','zscoreRange',variables,'ens',results,'f'+str(fcount)) # Calculate the new run global mean mean3d,mean2d=pyEnsLib.generate_global_mean_for_summary(ifiles,var_name3d,var_name2d,is_SE,opts_dict['pepsi_gm'],opts_dict) means=np.concatenate((mean3d,mean2d),axis=0) # Add the new run global mean to the dictionary "results" for i in range(means.shape[1]): for j in range(means.shape[0]): pyEnsLib.addresults(results,'means',means[j][i],ens_var_name[j],'f'+str(i)) # Evaluate the new run global mean if it is in the range of the ensemble summary global mean range for fcount,fid in enumerate(ifiles): countgm[fcount]=pyEnsLib.evaluatestatus('means','gmRange',variables,'gm',results,'f'+str(fcount)) # Calculate the PCA scores of the new run new_scores=pyEnsLib.standardized(means,mu_gm,sigma_gm,loadings_gm) pyEnsLib.comparePCAscores(ifiles,new_scores,sigma_scores_gm,opts_dict) # Print out if opts_dict['printVarTest']: print('*********************************************** ') print('Variable-based testing (for reference only - not used to determine pass/fail)') print('*********************************************** ') for fcount,fid in enumerate(ifiles): print(' ') print('Run '+str(fcount+1)+":") print(' ') if not gmonly: print('***'+str(countzscore[fcount])," of "+str(len(ens_var_name))+' variables are outside of ensemble RMSZ distribution***') pyEnsLib.printsummary(results,'ens','zscore','zscoreRange',(fcount),variables,'RMSZ') print(' ') print('***'+str(countgm[fcount])," of "+str(len(ens_var_name))+' variables are outside of ensemble global mean distribution***') pyEnsLib.printsummary(results,'gm','means','gmRange',fcount,variables,'global mean') print(' ') print('----------------------------------------------------------------------------') if __name__ == "__main__": main(sys.argv[1:]) print(' ') print("Testing complete.")
debugMsg('calling initialize_main', header=True, verbosity=1) envDict = initialize_main(envDict, caseroot, debugMsg, options.standalone) debugMsg('calling setup_config', header=True, verbosity=1) setup_config(envDict) main_comm.sync() #=================================== if __name__ == "__main__": # initialize simplecomm object main_comm = simplecomm.create_comm(serial=True) # setup an overall timer timer = timekeeper.TimeKeeper() # get commandline options options = commandline_options() # initialize global vprinter object for printing debug messages print("debug level = {0}".format(options.debug[0])) if options.debug: header = "[" + str(main_comm.get_rank()) + "/" + str(main_comm.get_size()) + "]: DEBUG... " debugMsg = vprinter.VPrinter(header=header, verbosity=options.debug[0]) try: timer.start("Total Time")
def __init__(self, specifiers, serial=False, verbosity=1, skip_existing=False, overwrite=False, once=False, simplecomm=None): """ Constructor Parameters: specifiers (dict): A dict of named Specifier instances, each defining an input specification for this reshaper operation. Keyword Arguments: serial (bool): True or False, indicating whether the operation should be performed in serial (True) or parallel (False). The default is to assume parallel operation (but serial will be chosen if the mpi4py cannot be found when trying to initialize decomposition. verbosity(int): Level of printed output (stdout). A value of 0 means no output, and a higher value means more output. The default value is 1. skip_existing (bool): Flag specifying whether to skip the generation of time-series for variables with time-series files that already exist. Default is False. overwrite (bool): Flag specifying whether to forcefully overwrite output files if they already exist. Default is False. once (bool): True or False, indicating whether the Reshaper should write all metadata to a 'once' file (separately). simplecomm (SimpleComm): A SimpleComm object to handle the parallel communication, if necessary """ # Check types if not isinstance(specifiers, dict): err_msg = "Input must be given in a dictionary of Specifiers" raise TypeError(err_msg) if type(serial) is not bool: err_msg = "Serial indicator must be True or False." raise TypeError(err_msg) if type(verbosity) is not int: err_msg = "Verbosity level must be an integer." raise TypeError(err_msg) if type(skip_existing) is not bool: err_msg = "Skip_existing flag must be True or False." raise TypeError(err_msg) if type(once) is not bool: err_msg = "Once-file indicator must be True or False." raise TypeError(err_msg) if simplecomm is not None: if simplecomm is not isinstance(simplecomm, SimpleComm): err_msg = "Simple communicator object is not a SimpleComm" raise TypeError(err_msg) # Whether to write to a once file self._use_once_file = once # Whether to write to a once file self._skip_existing = skip_existing # Whether to write to overwrite output files self._overwrite = overwrite # Store the list of specifiers self._specifiers = specifiers # Store the serial specifier self._serial = serial # Check for a SimpleComm, and if none create it if simplecomm is None: simplecomm = create_comm(serial=serial) # Pointer to its own messenger self._simplecomm = simplecomm # Store the verbosity self._verbosity = verbosity # Set the verbose printer self._vprint = VPrinter(verbosity=verbosity) # Storage for timing data self._times = {} # Orders for printing timing data self._time_orders = {} # Storage for all byte counters self._byte_counts = {}
oldfile = os.path.join(olddir, filename) if oldfile in oldfiles: item_dict["old"] = oldfile oldfiles.remove(oldfile) items_to_check.append(item_dict) else: item_dict["old"] = None unchecked_new_items.append(item_dict) for oldfile in oldfiles: item_dict = {"test": test_name} item_dict["new"] = None item_dict["old"] = oldfile unchecked_old_items.append(item_dict) # Get a basic MPI comm comm = create_comm(serial=(args.serial or args.list_tests)) # Print tests that will be checked if comm.is_manager(): if args.multispec: print "Checking multitest results." else: print "Checking individual test results." print for test_name in tests_to_check: print "Test {0!s}:".format(test_name) num_chk = sum(1 for i in items_to_check if i["test"] == test_name) num_new = num_chk + sum(1 for i in unchecked_new_items if i["test"] == test_name) num_old = num_chk + sum(1 for i in unchecked_old_items if i["test"] == test_name) print " Checking {0!s} of {1!s}".format(num_chk, num_new),
def main(argv=None): args = cli(argv) # Create the necessary SimpleComm scomm = create_comm(serial=args.serial) # Do setup only on manager node if scomm.is_manager(): # Check that the specfile exists if not exists(args.stdfile): raise OSError(("Output specification file {!r} not " "found").format(args.stdfile)) # Read the specfile into a dictionary print("Reading standardization file: {}".format(args.stdfile)) dsdict = json_load(open(args.stdfile, "r"), object_pairs_hook=OrderedDict) # Parse the output Dataset print( "Creating output dataset descriptor from standardization file...") outds = OutputDatasetDesc(dsdict=dsdict) else: outds = None # Send the output descriptor to all nodes outds = scomm.partition(outds, func=Duplicate(), involved=True) # Sync scomm.sync() # Continue setup only on manager node if scomm.is_manager(): # Gather the list of input files infiles = [] for infile in args.infiles: infiles.extend(glob(infile)) # If no input files, stop here if len(infiles) == 0: print("Standardization file validated.") return # Parse the input Dataset print( "Creating input dataset descriptor from {} input files...".format( len(infiles))) inpds = InputDatasetDesc(filenames=infiles) else: inpds = None # Send the input descriptor to all nodes inpds = scomm.partition(inpds, func=Duplicate(), involved=True) # Sync and continue process on all nodes scomm.sync() # Check for warn/error if args.error: simplefilter("error", ValidationWarning) # Try importing all of the necessary user-defined modules if args.module is not None: for i, modpath in enumerate(args.module): load_source("user{}".format(i), modpath) # Setup the PyConform data flow on all nodes if scomm.is_manager(): print("Creating the data flow...") dataflow = DataFlow(inpds, outds) # Execute the data flow (write to files) history = not args.no_history dataflow.execute( chunks=dict(args.chunks), scomm=scomm, history=history, deflate=args.deflate, debug=args.debug, )
debugMsg('calling setup_config', header=True, verbosity=1) setup_config(envDict) debugMsg('expanding variables in batch script', header=True, verbosity=1) expand_batch_vars(envDict, imb_name) main_comm.sync() #=================================== if __name__ == "__main__": # initialize simplecomm object main_comm = simplecomm.create_comm(serial=True) # setup an overall timer timer = timekeeper.TimeKeeper() # get commandline options options = commandline_options() # initialize global vprinter object for printing debug messages print("debug level = {0}".format(options.debug[0])) if options.debug: header = "[" + str(main_comm.get_rank()) + "/" + str( main_comm.get_size()) + "]: DEBUG... " debugMsg = vprinter.VPrinter(header=header, verbosity=options.debug[0]) try:
def main(argv): # Get command line stuff and store in a dictionary s = 'tag= compset= esize= tslice= res= sumfile= indir= sumfiledir= mach= verbose jsonfile= mpi_enable maxnorm gmonly popens cumul regx= startMon= endMon= fIndex=' optkeys = s.split() try: opts, args = getopt.getopt(argv, "h", optkeys) except getopt.GetoptError: pyEnsLib.EnsSum_usage() sys.exit(2) # Put command line options in a dictionary - also set defaults opts_dict={} # Defaults opts_dict['tag'] = 'cesm2_0_beta10' opts_dict['compset'] = 'F2000climo' opts_dict['mach'] = 'cheyenne' opts_dict['esize'] = 350 opts_dict['tslice'] = 1 opts_dict['res'] = 'f19_f19' opts_dict['sumfile'] = 'ens.summary.nc' opts_dict['indir'] = './' opts_dict['sumfiledir'] = './' opts_dict['jsonfile'] = 'exclude_empty.json' opts_dict['verbose'] = False opts_dict['mpi_enable'] = False opts_dict['maxnorm'] = False opts_dict['gmonly'] = True opts_dict['popens'] = False opts_dict['cumul'] = False opts_dict['regx'] = 'test' opts_dict['startMon'] = 1 opts_dict['endMon'] = 1 opts_dict['fIndex'] = 151 # This creates the dictionary of input arguments opts_dict = pyEnsLib.getopt_parseconfig(opts,optkeys,'ES',opts_dict) verbose = opts_dict['verbose'] st = opts_dict['esize'] esize = int(st) if opts_dict['popens' == True]: print "Error: Please use pyEnsSumPop.py for a POP ensemble (not --popens)." sys.exit() if not (opts_dict['tag'] and opts_dict['compset'] and opts_dict['mach'] or opts_dict['res']): print 'Please specify --tag, --compset, --mach and --res options' sys.exit() # Now find file names in indir input_dir = opts_dict['indir'] # The var list that will be excluded ex_varlist=[] inc_varlist=[] # Create a mpi simplecomm object if opts_dict['mpi_enable']: me=simplecomm.create_comm() else: me=simplecomm.create_comm(not opts_dict['mpi_enable']) if me.get_rank() == 0: print 'Running pyEnsSum!' if me.get_rank() ==0 and (verbose == True): print opts_dict print 'Ensemble size for summary = ', esize exclude=False if me.get_rank() == 0: if opts_dict['jsonfile']: inc_varlist=[] # Read in the excluded or included var list ex_varlist,exclude=pyEnsLib.read_jsonlist(opts_dict['jsonfile'],'ES') if exclude == False: inc_varlist=ex_varlist ex_varlist=[] # Read in the included var list #inc_varlist=pyEnsLib.read_jsonlist(opts_dict['jsonfile'],'ES') # Broadcast the excluded var list to each processor #if opts_dict['mpi_enable']: # ex_varlist=me.partition(ex_varlist,func=Duplicate(),involved=True) # Broadcast the excluded var list to each processor if opts_dict['mpi_enable']: exclude=me.partition(exclude,func=Duplicate(),involved=True) if exclude: ex_varlist=me.partition(ex_varlist,func=Duplicate(),involved=True) else: inc_varlist=me.partition(inc_varlist,func=Duplicate(),involved=True) in_files=[] if(os.path.exists(input_dir)): # Get the list of files in_files_temp = os.listdir(input_dir) in_files=sorted(in_files_temp) # Make sure we have enough num_files = len(in_files) if me.get_rank()==0 and (verbose == True): print 'Number of files in input directory = ', num_files if (num_files < esize): if me.get_rank()==0 and (verbose == True): print 'Number of files in input directory (',num_files,\ ') is less than specified ensemble size of ', esize sys.exit(2) if (num_files > esize): if me.get_rank()==0 and (verbose == True): print 'NOTE: Number of files in ', input_dir, \ 'is greater than specified ensemble size of ', esize ,\ '\nwill just use the first ', esize, 'files' else: if me.get_rank()==0: print 'Input directory: ',input_dir,' not found' sys.exit(2) if opts_dict['cumul']: if opts_dict['regx']: in_files_list=get_cumul_filelist(opts_dict,opts_dict['indir'],opts_dict['regx']) in_files=me.partition(in_files_list,func=EqualLength(),involved=True) if me.get_rank()==0 and (verbose == True): print 'in_files=',in_files # Open the files in the input directory o_files=[] if me.get_rank() == 0 and opts_dict['verbose']: print 'Input files are: ' print "\n".join(in_files) #for i in in_files: # print "in_files =",i for onefile in in_files[0:esize]: if (os.path.isfile(input_dir+'/' + onefile)): o_files.append(Nio.open_file(input_dir+'/' + onefile,"r")) else: if me.get_rank()==0: print "COULD NOT LOCATE FILE ", input_dir+'/'+onefile , "! EXITING...." sys.exit() # Store dimensions of the input fields if me.get_rank()==0 and (verbose == True): print "Getting spatial dimensions" nlev = -1 nilev = -1 ncol = -1 nlat = -1 nlon = -1 lonkey='' latkey='' # Look at first file and get dims input_dims = o_files[0].dimensions ndims = len(input_dims) for key in input_dims: if key == "lev": nlev = input_dims["lev"] elif key == "ilev": nilev = input_dims["ilev"] elif key == "ncol": ncol = input_dims["ncol"] elif (key == "nlon") or (key =="lon"): nlon = input_dims[key] lonkey=key elif (key == "nlat") or (key == "lat"): nlat = input_dims[key] latkey=key if (nlev == -1) : if me.get_rank()==0: print "COULD NOT LOCATE valid dimension lev => EXITING...." sys.exit() if (( ncol == -1) and ((nlat == -1) or (nlon == -1))): if me.get_rank()==0: print "Need either lat/lon or ncol => EXITING...." sys.exit() # Check if this is SE or FV data if (ncol != -1): is_SE = True else: is_SE = False # Make sure all files have the same dimensions if me.get_rank()==0 and (verbose == True): print "Checking dimensions across files...." print 'lev = ', nlev if (is_SE == True): print 'ncol = ', ncol else: print 'nlat = ', nlat print 'nlon = ', nlon for count, this_file in enumerate(o_files): input_dims = this_file.dimensions if (is_SE == True): if ( nlev != int(input_dims["lev"]) or ( ncol != int(input_dims["ncol"]))): if me.get_rank() == 0: print "Dimension mismatch between ", in_files[0], 'and', in_files[0], '!!!' sys.exit() else: if ( nlev != int(input_dims["lev"]) or ( nlat != int(input_dims[latkey]))\ or ( nlon != int(input_dims[lonkey]))): if me.get_rank() == 0: print "Dimension mismatch between ", in_files[0], 'and', in_files[0], '!!!' sys.exit() # Get 2d vars, 3d vars and all vars (For now include all variables) vars_dict_all = o_files[0].variables # Remove the excluded variables (specified in json file) from variable dictionary #print len(vars_dict_all) if exclude: vars_dict=vars_dict_all for i in ex_varlist: if i in vars_dict: del vars_dict[i] #Given an included var list, remove all float var that are not on the list else: vars_dict=vars_dict_all.copy() for k,v in vars_dict_all.iteritems(): if (k not in inc_varlist) and (vars_dict_all[k].typecode()=='f'): #print vars_dict_all[k].typecode() #print k del vars_dict[k] num_vars = len(vars_dict) #print num_vars #if me.get_rank() == 0: # for k,v in vars_dict.iteritems(): # print 'vars_dict',k,vars_dict[k].typecode() str_size = 0
def __init__(self,in_directory, out_directory, prefix, suffix, file_pattern='null', date_pattern='null', m_id = ['-999'], hist_type='slice', avg_list=[], weighted=False, split=False, split_files='null', split_orig_size='null', ncformat='netcdf4c', varlist=[], serial=False, mean_diff_rms_obs_dir='null', region_nc_var='null', regions={}, region_wgt_var='null', obs_file='null', reg_obs_file_suffix='null', obs_dir='null', main_comm=None, clobber=False, ice_obs_file='null', reg_file = 'null', ncl_location='null', year0=-99, year1=-99, collapse_dim='', vertical_levels=60): ''' Initializes the internal data with optional arguments @param in_directory Where the input directory resides (needs full path). @param out_directory Where the output will be produced (needs full path). @param prefix String specifying the full file name before the date string. @param suffix String specifying the suffix of the file names @param file_pattern File pattern used put the prefix, date, and suffix together for input files. @param date_pattern The pattern used to decipher the date string within the file name. @param m_id Array of member identifiers. All averages will be done on each member individually and then across all members. @param hist_type Type of file ('slice' or 'series'). Default is 'slice'. @param avg_list List of averages that need to be computed. Elements should contain aveType:year0:year1. year2 is only required for multi year averaging. @param weighted Boolean variable to selected if weights will be applied to the averaging. True = weights will be applied. Default is False. @param split Boolean variable. True = the file is split spatially and the final average needs to be pieced together. (ie. CICE times series files) Default is False. @param split_files The strings indicating the naming difference between split files. Expects a string with elements separated by a comma. Defualt is 'null'. @param split_orig_size A string listing the lat and lon values of the origianl grid size. Needed in case some of the grid has been deleted. (example: 'lon=288,lat=192'). Default is 'null'. @param ncformat Format to output the averaged file(s) in. Default is 'netcdf4c'. Other options: 'netcdf','netcdf4','netcdf4c' @param varlist Optional variables list, if not averaging all variables @param serial Boolean to run in serial mode. True=serial (without MPI) False=run in parallel(with MPI) False requires mpi4py to be installed. Default is False. @param regions Dictionary that contains regions to average over. Fromat is 'string region name: int region value'. Default is an empty dictionary. @param region_nc_var String that identifies the netcdf variable that contains the region mask used by a regional average. @param region_wgt_var String that identifies the netcdf variable that contains the weights. @param obs_file Observational file used for the creation of the mean_diff_rms file. This file must contain all of the variables within the variable list (or if a variable list is not specified, must contain all hist file variables). Dimension must be nlon and nlat. @param reg_obs_file_suffix The suffix of the regional, weighted averages of the 'obs_file'. Used for the creation of the mean_diff_rms file. @param obs_dir Full path to the observational files used for the mean_diff_rms file. @param main_comm A simplecomm to be used by the PyAverager. If not specified, one will be created by this specifier. Default None. @param clobber Remove netcdf output file(s) if they exist. Default False - will exit if an output file of the same name exists. @param ice_obs_file Full path to the observational file used to create the cice model pre_proc file @param reg_file Full path to the regional file used to create the cice model pre_proc file @param ncl_location Location of where the ncl scripts reside @param year0 The first year - only used to create the cice pre_proc file. @param year1 The last year - only used to create the cice pre_proc file. @param collapse_dims Used to collapse/average over one dim. @param vertical_levels Number of ocean vertical levels ''' # Where the input is located self.in_directory = in_directory # Where the output should be produced self.out_directory = out_directory # Full file name up to the date string self.prefix = prefix # The suffix of the data files self.suffix = suffix # Type of file self.hist_type = hist_type # List of averages to compute self.avg_list = avg_list # Should weights be applied? self.weighted = weighted # Are files split spatially? self.split = split # Split file name indicators self.split_files = split_files # The original grid size of the split files self.split_orig_size = split_orig_size # The netcdf output format self.ncformat = ncformat # Varlist to average (if not all variables) self.varlist = varlist # Run in serial mode? If True, will be ran without MPI self.serial = serial # Directory where to find the regional obds files for the mean_diff_rms climo file self.mean_diff_rms_obs_dir = mean_diff_rms_obs_dir # Regions to average over self.regions = regions # Netcdf variable name that contains a region mask self.region_nc_var = region_nc_var # Netcdf variable name that contains the weights self.region_wgt_var = region_wgt_var # String that indicates the suffix of the regional obs files used for the mean_diff_rms file self.reg_obs_file_suffix = reg_obs_file_suffix # String that indicates the name of the observational file self.obs_file = obs_file # String indicating the path to the observational files used for the mean_diff_rms file self.obs_dir = obs_dir # File pattern used to piece together a full file name if (file_pattern == 'null'): if (hist_type == 'slice'): self.file_pattern = ['$prefix','.','$date_pattern','.','$suffix'] if (hist_type == 'series'): if split: self.file_pattern = ['$prefix','.','$var','_','$hem','.','$date_pattern','.','$suffix'] else: self.file_pattern = ['$prefix','.','$var','.','$date_pattern','.','$suffix'] else: self.file_pattern = file_pattern # The date pattern to decipher the date within the file name self.date_pattern = date_pattern self.m_id = m_id # Get first and last years used in the averaging by parsing the avg_list dates = [] for avg in avg_list: avg_descr = avg.split(':') for yr in avg_descr[1:]: dates.append(int(yr)) if (year0 == -99 and year1 == -99): self.year0 = int(min(dates)) self.year1 = int(max(dates)) else: self.year0 = int(year0) self.year1 = int(year1) # Initialize a simple_comm object if one was not passed in by the user if (main_comm is None): from asaptools import simplecomm self.main_comm = simplecomm.create_comm(serial=serial) else: self.main_comm = main_comm # True/False, rm average file(s) is it has already been created self.clobber = clobber # File that contains the weight/area information self.ice_obs_file = ice_obs_file # File that exists or will be created that contains a region mask for ice self.reg_file = reg_file # Location of the ncl script that will be used to create reg_file if it doesn't exist self.ncl_location = ncl_location # Used to collapse/average over one dim. self.collapse_dim = collapse_dim # Used to specify the number of ocean vertical levels self.vertical_levels = vertical_levels
def setUp(self): self.scomm = simplecomm.create_comm(serial=True) self.pcomm = simplecomm.create_comm(serial=False) self.size = MPI_COMM_WORLD.Get_size() self.rank = MPI_COMM_WORLD.Get_rank()
def main(argv): print "Running pyEnsSum!" # Get command line stuff and store in a dictionary s = "tag= compset= esize= tslice= res= sumfile= indir= sumfiledir= mach= verbose jsonfile= mpi_enable maxnorm gmonly popens cumul regx= startMon= endMon= fIndex=" optkeys = s.split() try: opts, args = getopt.getopt(argv, "h", optkeys) except getopt.GetoptError: pyEnsLib.EnsSum_usage() sys.exit(2) # Put command line options in a dictionary - also set defaults opts_dict = {} # Defaults opts_dict["tag"] = "" opts_dict["compset"] = "" opts_dict["mach"] = "" opts_dict["esize"] = 151 opts_dict["tslice"] = 0 opts_dict["res"] = "" opts_dict["sumfile"] = "ens.summary.nc" opts_dict["indir"] = "./" opts_dict["sumfiledir"] = "./" opts_dict["jsonfile"] = "" opts_dict["verbose"] = True opts_dict["mpi_enable"] = False opts_dict["maxnorm"] = False opts_dict["gmonly"] = False opts_dict["popens"] = False opts_dict["cumul"] = False opts_dict["regx"] = "test" opts_dict["startMon"] = 1 opts_dict["endMon"] = 1 opts_dict["fIndex"] = 151 # This creates the dictionary of input arguments opts_dict = pyEnsLib.getopt_parseconfig(opts, optkeys, "ES", opts_dict) verbose = opts_dict["verbose"] st = opts_dict["esize"] esize = int(st) if verbose == True: print opts_dict print "Ensemble size for summary = ", esize if not (opts_dict["tag"] and opts_dict["compset"] and opts_dict["mach"] or opts_dict["res"]): print "Please specify --tag, --compset, --mach and --res options" sys.exit() # Now find file names in indir input_dir = opts_dict["indir"] # The var list that will be excluded ex_varlist = [] # Create a mpi simplecomm object if opts_dict["mpi_enable"]: me = simplecomm.create_comm() else: me = simplecomm.create_comm(not opts_dict["mpi_enable"]) if me.get_rank() == 0: if opts_dict["jsonfile"]: # Read in the excluded var list ex_varlist = pyEnsLib.read_jsonlist(opts_dict["jsonfile"], "ES") # Broadcast the excluded var list to each processor if opts_dict["mpi_enable"]: ex_varlist = me.partition(ex_varlist, func=Duplicate(), involved=True) in_files = [] if os.path.exists(input_dir): # Get the list of files in_files_temp = os.listdir(input_dir) in_files = sorted(in_files_temp) # Make sure we have enough num_files = len(in_files) if verbose == True: print "Number of files in input directory = ", num_files if num_files < esize: print "Number of files in input directory (", num_files, ") is less than specified ensemble size of ", esize sys.exit(2) if num_files > esize: print "NOTE: Number of files in ", input_dir, "is greater than specified ensemble size of ", esize, "\nwill just use the first ", esize, "files" else: print "Input directory: ", input_dir, " not found" sys.exit(2) if opts_dict["cumul"]: if opts_dict["regx"]: in_files_list = get_cumul_filelist(opts_dict, opts_dict["indir"], opts_dict["regx"]) in_files = me.partition(in_files_list, func=EqualLength(), involved=True) if me.get_rank() == 0: print "in_files=", in_files # Open the files in the input directory o_files = [] for onefile in in_files[0:esize]: if os.path.isfile(input_dir + "/" + onefile): o_files.append(Nio.open_file(input_dir + "/" + onefile, "r")) else: print "COULD NOT LOCATE FILE " + input_dir + onefile + "! EXITING...." sys.exit() # Store dimensions of the input fields if verbose == True: print "Getting spatial dimensions" nlev = -1 ncol = -1 nlat = -1 nlon = -1 lonkey = "" latkey = "" # Look at first file and get dims input_dims = o_files[0].dimensions ndims = len(input_dims) for key in input_dims: if key == "lev": nlev = input_dims["lev"] elif key == "ncol": ncol = input_dims["ncol"] elif (key == "nlon") or (key == "lon"): nlon = input_dims[key] lonkey = key elif (key == "nlat") or (key == "lat"): nlat = input_dims[key] latkey = key if nlev == -1: print "COULD NOT LOCATE valid dimension lev => EXITING...." sys.exit() if (ncol == -1) and ((nlat == -1) or (nlon == -1)): print "Need either lat/lon or ncol => EXITING...." sys.exit() # Check if this is SE or FV data if ncol != -1: is_SE = True else: is_SE = False # Make sure all files have the same dimensions if verbose == True: print "Checking dimensions across files...." print "lev = ", nlev if is_SE == True: print "ncol = ", ncol else: print "nlat = ", nlat print "nlon = ", nlon for count, this_file in enumerate(o_files): input_dims = this_file.dimensions if is_SE == True: if nlev != int(input_dims["lev"]) or (ncol != int(input_dims["ncol"])): print "Dimension mismatch between ", in_files[0], "and", in_files[0], "!!!" sys.exit() else: if nlev != int(input_dims["lev"]) or (nlat != int(input_dims[latkey])) or (nlon != int(input_dims[lonkey])): print "Dimension mismatch between ", in_files[0], "and", in_files[0], "!!!" sys.exit() # Get 2d vars, 3d vars and all vars (For now include all variables) vars_dict = o_files[0].variables # Remove the excluded variables (specified in json file) from variable dictionary if ex_varlist: for i in ex_varlist: del vars_dict[i] num_vars = len(vars_dict) if verbose == True: print "Number of variables (including metadata) found = ", num_vars str_size = 0 d2_var_names = [] d3_var_names = [] num_2d = 0 num_3d = 0 # Which are 2d, which are 3d and max str_size for k, v in vars_dict.iteritems(): var = k vd = v.dimensions # all the variable's dimensions (names) vr = v.rank # num dimension vs = v.shape # dim values is_2d = False is_3d = False if is_SE == True: # (time, lev, ncol) or (time, ncol) if (vr == 2) and (vs[1] == ncol): is_2d = True num_2d += 1 elif (vr == 3) and (vs[2] == ncol and vs[1] == nlev): is_3d = True num_3d += 1 else: # (time, lev, nlon, nlon) or (time, nlat, nlon) if (vr == 3) and (vs[1] == nlat and vs[2] == nlon): is_2d = True num_2d += 1 elif (vr == 4) and (vs[2] == nlat and vs[3] == nlon and vs[1] == nlev): is_3d = True num_3d += 1 if is_3d == True: str_size = max(str_size, len(k)) d3_var_names.append(k) elif is_2d == True: str_size = max(str_size, len(k)) d2_var_names.append(k) # Now sort these and combine (this sorts caps first, then lower case - # which is what we want) d2_var_names.sort() d3_var_names.sort() # All vars is 3d vars first (sorted), the 2d vars all_var_names = list(d3_var_names) all_var_names += d2_var_names n_all_var_names = len(all_var_names) if verbose == True: print "num vars = ", n_all_var_names, "(3d = ", num_3d, " and 2d = ", num_2d, ")" # Create new summary ensemble file this_sumfile = opts_dict["sumfile"] if verbose == True: print "Creating ", this_sumfile, " ..." if me.get_rank() == 0 | opts_dict["popens"]: if os.path.exists(this_sumfile): os.unlink(this_sumfile) opt = Nio.options() opt.PreFill = False opt.Format = "NetCDF4Classic" nc_sumfile = Nio.open_file(this_sumfile, "w", options=opt) # Set dimensions if verbose == True: print "Setting dimensions ....." if is_SE == True: nc_sumfile.create_dimension("ncol", ncol) else: nc_sumfile.create_dimension("nlat", nlat) nc_sumfile.create_dimension("nlon", nlon) nc_sumfile.create_dimension("nlev", nlev) nc_sumfile.create_dimension("ens_size", esize) nc_sumfile.create_dimension("nvars", num_3d + num_2d) nc_sumfile.create_dimension("nvars3d", num_3d) nc_sumfile.create_dimension("nvars2d", num_2d) nc_sumfile.create_dimension("str_size", str_size) # Set global attributes now = time.strftime("%c") if verbose == True: print "Setting global attributes ....." setattr(nc_sumfile, "creation_date", now) setattr(nc_sumfile, "title", "CAM verification ensemble summary file") setattr(nc_sumfile, "tag", opts_dict["tag"]) setattr(nc_sumfile, "compset", opts_dict["compset"]) setattr(nc_sumfile, "resolution", opts_dict["res"]) setattr(nc_sumfile, "machine", opts_dict["mach"]) # Create variables if verbose == True: print "Creating variables ....." v_lev = nc_sumfile.create_variable("lev", "f", ("nlev",)) v_vars = nc_sumfile.create_variable("vars", "S1", ("nvars", "str_size")) v_var3d = nc_sumfile.create_variable("var3d", "S1", ("nvars3d", "str_size")) v_var2d = nc_sumfile.create_variable("var2d", "S1", ("nvars2d", "str_size")) if not opts_dict["gmonly"]: if is_SE == True: v_ens_avg3d = nc_sumfile.create_variable("ens_avg3d", "f", ("nvars3d", "nlev", "ncol")) v_ens_stddev3d = nc_sumfile.create_variable("ens_stddev3d", "f", ("nvars3d", "nlev", "ncol")) v_ens_avg2d = nc_sumfile.create_variable("ens_avg2d", "f", ("nvars2d", "ncol")) v_ens_stddev2d = nc_sumfile.create_variable("ens_stddev2d", "f", ("nvars2d", "ncol")) else: v_ens_avg3d = nc_sumfile.create_variable("ens_avg3d", "f", ("nvars3d", "nlev", "nlat", "nlon")) v_ens_stddev3d = nc_sumfile.create_variable("ens_stddev3d", "f", ("nvars3d", "nlev", "nlat", "nlon")) v_ens_avg2d = nc_sumfile.create_variable("ens_avg2d", "f", ("nvars2d", "nlat", "nlon")) v_ens_stddev2d = nc_sumfile.create_variable("ens_stddev2d", "f", ("nvars2d", "nlat", "nlon")) v_RMSZ = nc_sumfile.create_variable("RMSZ", "f", ("nvars", "ens_size")) v_gm = nc_sumfile.create_variable("global_mean", "f", ("nvars", "ens_size")) v_loadings_gm = nc_sumfile.create_variable("loadings_gm", "f", ("nvars", "nvars")) v_mu_gm = nc_sumfile.create_variable("mu_gm", "f", ("nvars",)) v_sigma_gm = nc_sumfile.create_variable("sigma_gm", "f", ("nvars",)) v_sigma_scores_gm = nc_sumfile.create_variable("sigma_scores_gm", "f", ("nvars",)) # Assign vars, var3d and var2d if verbose == True: print "Assigning vars, var3d, and var2d ....." eq_all_var_names = [] eq_d3_var_names = [] eq_d2_var_names = [] l_eq = len(all_var_names) for i in range(l_eq): tt = list(all_var_names[i]) l_tt = len(tt) if l_tt < str_size: extra = list(" ") * (str_size - l_tt) tt.extend(extra) eq_all_var_names.append(tt) l_eq = len(d3_var_names) for i in range(l_eq): tt = list(d3_var_names[i]) l_tt = len(tt) if l_tt < str_size: extra = list(" ") * (str_size - l_tt) tt.extend(extra) eq_d3_var_names.append(tt) l_eq = len(d2_var_names) for i in range(l_eq): tt = list(d2_var_names[i]) l_tt = len(tt) if l_tt < str_size: extra = list(" ") * (str_size - l_tt) tt.extend(extra) eq_d2_var_names.append(tt) v_vars[:] = eq_all_var_names[:] v_var3d[:] = eq_d3_var_names[:] v_var2d[:] = eq_d2_var_names[:] # Time-invarient metadata if verbose == True: print "Assigning time invariant metadata ....." lev_data = vars_dict["lev"] v_lev = lev_data # Form ensembles, each missing one member; compute RMSZs and global means # for each variable, we also do max norm also (currently done in pyStats) tslice = opts_dict["tslice"] if not opts_dict["cumul"]: # Partition the var list var3_list_loc = me.partition(d3_var_names, func=EqualStride(), involved=True) var2_list_loc = me.partition(d2_var_names, func=EqualStride(), involved=True) else: var3_list_loc = d3_var_names var2_list_loc = d2_var_names # Calculate global means # if verbose == True: print "Calculating global means ....." if not opts_dict["cumul"]: gm3d, gm2d = pyEnsLib.generate_global_mean_for_summary( o_files, var3_list_loc, var2_list_loc, is_SE, False, opts_dict ) if verbose == True: print "Finish calculating global means ....." # Calculate RMSZ scores if verbose == True: print "Calculating RMSZ scores ....." if (not opts_dict["gmonly"]) | (opts_dict["cumul"]): zscore3d, zscore2d, ens_avg3d, ens_stddev3d, ens_avg2d, ens_stddev2d, temp1, temp2 = pyEnsLib.calc_rmsz( o_files, var3_list_loc, var2_list_loc, is_SE, opts_dict ) # Calculate max norm ensemble if opts_dict["maxnorm"]: if verbose == True: print "Calculating max norm of ensembles ....." pyEnsLib.calculate_maxnormens(opts_dict, var3_list_loc) pyEnsLib.calculate_maxnormens(opts_dict, var2_list_loc) if opts_dict["mpi_enable"] & (not opts_dict["popens"]): if not opts_dict["cumul"]: # Gather the 3d variable results from all processors to the master processor slice_index = get_stride_list(len(d3_var_names), me) # Gather global means 3d results gm3d = gather_npArray(gm3d, me, slice_index, (len(d3_var_names), len(o_files))) if not opts_dict["gmonly"]: # Gather zscore3d results zscore3d = gather_npArray(zscore3d, me, slice_index, (len(d3_var_names), len(o_files))) # Gather ens_avg3d and ens_stddev3d results shape_tuple3d = get_shape(ens_avg3d.shape, len(d3_var_names), me.get_rank()) ens_avg3d = gather_npArray(ens_avg3d, me, slice_index, shape_tuple3d) ens_stddev3d = gather_npArray(ens_stddev3d, me, slice_index, shape_tuple3d) # Gather 2d variable results from all processors to the master processor slice_index = get_stride_list(len(d2_var_names), me) # Gather global means 2d results gm2d = gather_npArray(gm2d, me, slice_index, (len(d2_var_names), len(o_files))) if not opts_dict["gmonly"]: # Gather zscore2d results zscore2d = gather_npArray(zscore2d, me, slice_index, (len(d2_var_names), len(o_files))) # Gather ens_avg3d and ens_stddev2d results shape_tuple2d = get_shape(ens_avg2d.shape, len(d2_var_names), me.get_rank()) ens_avg2d = gather_npArray(ens_avg2d, me, slice_index, shape_tuple2d) ens_stddev2d = gather_npArray(ens_stddev2d, me, slice_index, shape_tuple2d) else: gmall = np.concatenate((temp1, temp2), axis=0) gmall = pyEnsLib.gather_npArray_pop(gmall, me, (me.get_size(), len(d3_var_names) + len(d2_var_names))) # Assign to file: if me.get_rank() == 0 | opts_dict["popens"]: if not opts_dict["cumul"]: gmall = np.concatenate((gm3d, gm2d), axis=0) if not opts_dict["gmonly"]: Zscoreall = np.concatenate((zscore3d, zscore2d), axis=0) v_RMSZ[:, :] = Zscoreall[:, :] if not opts_dict["gmonly"]: if is_SE == True: v_ens_avg3d[:, :, :] = ens_avg3d[:, :, :] v_ens_stddev3d[:, :, :] = ens_stddev3d[:, :, :] v_ens_avg2d[:, :] = ens_avg2d[:, :] v_ens_stddev2d[:, :] = ens_stddev2d[:, :] else: v_ens_avg3d[:, :, :, :] = ens_avg3d[:, :, :, :] v_ens_stddev3d[:, :, :, :] = ens_stddev3d[:, :, :, :] v_ens_avg2d[:, :, :] = ens_avg2d[:, :, :] v_ens_stddev2d[:, :, :] = ens_stddev2d[:, :, :] else: gmall_temp = np.transpose(gmall[:, :]) gmall = gmall_temp mu_gm, sigma_gm, standardized_global_mean, loadings_gm, scores_gm = pyEnsLib.pre_PCA(gmall) v_gm[:, :] = gmall[:, :] v_mu_gm[:] = mu_gm[:] v_sigma_gm[:] = sigma_gm[:].astype(np.float32) v_loadings_gm[:, :] = loadings_gm[:, :] v_sigma_scores_gm[:] = scores_gm[:] print "All Done"
def __init__(self, specifiers, serial=False, verbosity=1, wmode='w', once=False, simplecomm=None): """ Constructor Parameters: specifiers (dict): A dict of named Specifier instances, each defining an input specification for this reshaper operation. serial (bool): True or False, indicating whether the operation should be performed in serial (True) or parallel (False). The default is to assume parallel operation (but serial will be chosen if the mpi4py cannot be found when trying to initialize decomposition. verbosity(int): Level of printed output (stdout). A value of 0 means no output, and a higher value means more output. The default value is 1. wmode (str): The mode to use for writing output. Can be 'w' for normal write operation, 's' to skip the output generation for existing time-series files, 'o' to overwrite existing time-series files, 'a' to append to existing time-series files. once (bool): True or False, indicating whether the Reshaper should write all metadata to a 'once' file (separately). simplecomm (SimpleComm): A SimpleComm object to handle the parallel communication, if necessary """ # Check types if not isinstance(specifiers, dict): err_msg = "Input must be given in a dictionary of Specifiers" raise TypeError(err_msg) if type(serial) is not bool: err_msg = "Serial indicator must be True or False." raise TypeError(err_msg) if type(verbosity) is not int: err_msg = "Verbosity level must be an integer." raise TypeError(err_msg) if type(wmode) is not str: err_msg = "Write mode flag must be a str." raise TypeError(err_msg) if type(once) is not bool: err_msg = "Once-file indicator must be True or False." raise TypeError(err_msg) if simplecomm is not None: if not isinstance(simplecomm, SimpleComm): err_msg = "Simple communicator object is not a SimpleComm" raise TypeError(err_msg) if wmode not in ['w', 's', 'o', 'a']: err_msg = "Write mode '{}' not recognized".format(wmode) raise ValueError(err_msg) # Whether to write to a once file self._use_once_file = once # Output file write mode self._write_mode = wmode # Store the list of specifiers self._specifiers = specifiers # Store the serial specifier self._serial = serial # Check for a SimpleComm, and if none create it if simplecomm is None: simplecomm = create_comm(serial=serial) # Pointer to its own messenger self._simplecomm = simplecomm # Store the verbosity self._verbosity = verbosity # Set the verbose printer self._vprint = VPrinter(verbosity=verbosity) # Storage for timing data self._times = {} # Orders for printing timing data self._time_orders = {} # Storage for all byte counters self._byte_counts = {}
def __init__(self, specifier, serial=False, verbosity=1, skip_existing=False, overwrite=False, once=False, simplecomm=None): """ Constructor Parameters: specifier (Specifier): An instance of the Specifier class, defining the input specification for this reshaper operation. Keyword Arguments: serial (bool): True or False, indicating whether the operation should be performed in serial (True) or parallel (False). The default is to assume parallel operation (but serial will be chosen if the mpi4py cannot be found when trying to initialize decomposition. verbosity(int): Level of printed output (stdout). A value of 0 means no output, and a higher value means more output. The default value is 1. skip_existing (bool): Flag specifying whether to skip the generation of time-series for variables with time-series files that already exist. Default is False. overwrite (bool): Flag specifying whether to forcefully overwrite output files if they already exist. Default is False. once (bool): True or False, indicating whether the Reshaper should write all metadata to a 'once' file (separately). simplecomm (SimpleComm): A SimpleComm object to handle the parallel communication, if necessary """ # Type checking (or double-checking) if not isinstance(specifier, Specifier): err_msg = "Input must be given in the form of a Specifier object" raise TypeError(err_msg) if type(serial) is not bool: err_msg = "Serial indicator must be True or False." raise TypeError(err_msg) if type(verbosity) is not int: err_msg = "Verbosity level must be an integer." raise TypeError(err_msg) if type(skip_existing) is not bool: err_msg = "Skip_existing flag must be True or False." raise TypeError(err_msg) if type(once) is not bool: err_msg = "Once-file indicator must be True or False." raise TypeError(err_msg) if simplecomm is not None: if not (isinstance(simplecomm, SimpleComm) or \ isinstance(simplecomm, SimpleCommMPI)): err_msg = ("Simple communicator object is not a SimpleComm or ", "SimpleCommMPI") raise TypeError(err_msg) # Whether to write a once file self._use_once_file = once # Internal timer data self._timer = TimeKeeper() # Dictionary storing read/write data amounts self.assumed_block_size = float(4 * 1024 * 1024) self._byte_counts = {} self._timer.start('Initializing Simple Communicator') if simplecomm is None: simplecomm = create_comm(serial=serial) # Reference to the simple communicator self._simplecomm = simplecomm self._timer.stop('Initializing Simple Communicator') # Contruct the print header header = ''.join(['[', str(self._simplecomm.get_rank()), '/', str(self._simplecomm.get_size()), '] ']) # Reference to the verbose printer tool self._vprint = VPrinter(header=header, verbosity=verbosity) # Debug output starting if self._simplecomm.is_manager(): self._vprint('Initializing Reshaper', verbosity=1) # Validate the user input data self._timer.start('Specifier Validation') specifier.validate() self._timer.stop('Specifier Validation') if self._simplecomm.is_manager(): self._vprint('Specifier validated', verbosity=1) # Setup PyNIO options (including disabling the default PreFill option) opt = Nio.options() opt.PreFill = False # Determine the Format and CompressionLevel options # from the NetCDF format string in the Specifier if specifier.netcdf_format == 'netcdf': opt.Format = 'Classic' elif specifier.netcdf_format == 'netcdf4': opt.Format = 'NetCDF4Classic' opt.CompressionLevel = 0 elif specifier.netcdf_format == 'netcdf4c': opt.Format = 'NetCDF4Classic' opt.CompressionLevel = specifier.netcdf_deflate if self._simplecomm.is_manager(): self._vprint('PyNIO compression level: {0}'.format(\ specifier.netcdf_deflate), verbosity=2) self._nio_options = opt if self._simplecomm.is_manager(): self._vprint('PyNIO options set', verbosity=2) # Open all of the input files self._timer.start('Open Input Files') self._input_files = [] for filename in specifier.input_file_list: self._input_files.append(Nio.open_file(filename, "r")) self._timer.stop('Open Input Files') if self._simplecomm.is_manager(): self._vprint('Input files opened', verbosity=2) # Validate the input files themselves self._timer.start('Input File Validation') self._validate_input_files(specifier) self._timer.stop('Input File Validation') if self._simplecomm.is_manager(): self._vprint('Input files validated', verbosity=2) # Sort the input files by time self._timer.start('Sort Input Files') self._sort_input_files_by_time(specifier) self._timer.stop('Sort Input Files') if self._simplecomm.is_manager(): self._vprint('Input files sorted', verbosity=2) # Retrieve and sort the variables in each time-slice file # (To determine if it is time-invariant metadata, time-variant # metadata, or if it is a time-series variable) self._timer.start('Sort Variables') self._sort_variables(specifier) self._timer.stop('Sort Variables') if self._simplecomm.is_manager(): self._vprint('Variables sorted', verbosity=2) # Validate the output files self._timer.start('Output File Validation') self._validate_output_files(specifier, skip_existing, overwrite) self._timer.stop('Output File Validation') if self._simplecomm.is_manager(): self._vprint('Output files validated', verbosity=2) # Helpful debugging message if self._simplecomm.is_manager(): self._vprint('Reshaper initialized.', verbosity=1) # Sync before continuing.. self._simplecomm.sync()
oldfile = os.path.join(olddir, filename) if oldfile in oldfiles: item_dict['old'] = oldfile oldfiles.remove(oldfile) items_to_check.append(item_dict) else: item_dict['old'] = None unchecked_new_items.append(item_dict) for oldfile in oldfiles: item_dict = {'test': test_name} item_dict['new'] = None item_dict['old'] = oldfile unchecked_old_items.append(item_dict) # Get a basic MPI comm comm = create_comm(serial=(opts.serial or opts.list_tests)) # Print tests that will be checked if comm.is_manager(): print 'Checking test results.' for test_name in tests_to_check: print 'Test {0!s}:'.format(test_name) num_chk = sum(1 for i in items_to_check if i['test'] == test_name) num_new = num_chk + sum( 1 for i in unchecked_new_items if i['test'] == test_name) num_old = num_chk + sum( 1 for i in unchecked_old_items if i['test'] == test_name) print ' Checking {0!s} of {1!s}'.format(num_chk, num_new), print 'new files generated against {0!s}'.format(num_old), print 'old files found.'
def main(argv): print 'Running pyEnsSumPop!' # Get command line stuff and store in a dictionary s = 'nyear= nmonth= npert= tag= res= mach= compset= sumfile= indir= tslice= verbose jsonfile= mpi_enable zscoreonly nrand= rand seq= jsondir=' optkeys = s.split() try: opts, args = getopt.getopt(argv, "h", optkeys) except getopt.GetoptError: pyEnsLib.EnsSumPop_usage() sys.exit(2) # Put command line options in a dictionary - also set defaults opts_dict={} # Defaults opts_dict['tag'] = 'cesm1_2_0' opts_dict['compset'] = 'FC5' opts_dict['mach'] = 'yellowstone' opts_dict['tslice'] = 0 opts_dict['nyear'] = 3 opts_dict['nmonth'] = 12 opts_dict['npert'] = 40 opts_dict['nbin'] = 40 opts_dict['minrange'] = 0.0 opts_dict['maxrange'] = 4.0 opts_dict['res'] = 'ne30_ne30' opts_dict['sumfile'] = 'ens.pop.summary.nc' opts_dict['indir'] = './' opts_dict['jsonfile'] = '' opts_dict['verbose'] = True opts_dict['mpi_enable'] = False opts_dict['zscoreonly'] = False opts_dict['popens'] = True opts_dict['nrand'] = 40 opts_dict['rand'] = False opts_dict['seq'] = 0 opts_dict['jsondir'] = '/glade/scratch/haiyingx/' # This creates the dictionary of input arguments print "before parseconfig" opts_dict = pyEnsLib.getopt_parseconfig(opts,optkeys,'ESP',opts_dict) verbose = opts_dict['verbose'] nbin = opts_dict['nbin'] if verbose: print opts_dict # Now find file names in indir input_dir = opts_dict['indir'] # Create a mpi simplecomm object if opts_dict['mpi_enable']: me=simplecomm.create_comm() else: me=simplecomm.create_comm(not opts_dict['mpi_enable']) if opts_dict['jsonfile']: # Read in the included var list Var2d,Var3d=pyEnsLib.read_jsonlist(opts_dict['jsonfile'],'ESP') str_size=0 for str in Var3d: if str_size < len(str): str_size=len(str) for str in Var2d: if str_size < len(str): str_size=len(str) in_files=[] if(os.path.exists(input_dir)): # Pick up the 'nrand' random number of input files to generate summary files if opts_dict['rand']: in_files=pyEnsLib.Random_pickup_pop(input_dir,opts_dict,opts_dict['nrand']) else: # Get the list of files in_files_temp = os.listdir(input_dir) in_files=sorted(in_files_temp) # Make sure we have enough num_files = len(in_files) else: print 'Input directory: ',input_dir,' not found' sys.exit(2) # Create a mpi simplecomm object if opts_dict['mpi_enable']: me=simplecomm.create_comm() else: me=simplecomm.create_comm(not opts_dict['mpi_enable']) #Partition the input file list in_file_list=me.partition(in_files,func=EqualStride(),involved=True) # Open the files in the input directory o_files=[] for onefile in in_file_list: if (os.path.isfile(input_dir+'/' + onefile)): o_files.append(Nio.open_file(input_dir+'/' + onefile,"r")) else: print "COULD NOT LOCATE FILE "+ input_dir + onefile + "! EXITING...." sys.exit() print in_file_list # Store dimensions of the input fields if (verbose == True): print "Getting spatial dimensions" nlev = -1 nlat = -1 nlon = -1 # Look at first file and get dims input_dims = o_files[0].dimensions ndims = len(input_dims) # Make sure all files have the same dimensions for key in input_dims: if key == "z_t": nlev = input_dims["z_t"] elif key == "nlon": nlon = input_dims["nlon"] elif key == "nlat": nlat = input_dims["nlat"] for count, this_file in enumerate(o_files): input_dims = this_file.dimensions if ( nlev != int(input_dims["z_t"]) or ( nlat != int(input_dims["nlat"]))\ or ( nlon != int(input_dims["nlon"]))): print "Dimension mismatch between ", in_file_list[0], 'and', in_file_list[count], '!!!' sys.exit() # Create new summary ensemble file this_sumfile = opts_dict["sumfile"] if verbose: print "Creating ", this_sumfile, " ..." if (me.get_rank() == 0 ): if os.path.exists(this_sumfile): os.unlink(this_sumfile) opt =Nio.options() opt.PreFill = False opt.Format = 'NetCDF4Classic' nc_sumfile = Nio.open_file(this_sumfile, 'w', options=opt) # Set dimensions if (verbose == True): print "Setting dimensions ....." nc_sumfile.create_dimension('nlat', nlat) nc_sumfile.create_dimension('nlon', nlon) nc_sumfile.create_dimension('nlev', nlev) nc_sumfile.create_dimension('time',None) nc_sumfile.create_dimension('ens_size', opts_dict['npert']) nc_sumfile.create_dimension('nbin', opts_dict['nbin']) nc_sumfile.create_dimension('nvars', len(Var3d) + len(Var2d)) nc_sumfile.create_dimension('nvars3d', len(Var3d)) nc_sumfile.create_dimension('nvars2d', len(Var2d)) nc_sumfile.create_dimension('str_size', str_size) # Set global attributes now = time.strftime("%c") if (verbose == True): print "Setting global attributes ....." setattr(nc_sumfile, 'creation_date',now) setattr(nc_sumfile, 'title', 'POP verification ensemble summary file') setattr(nc_sumfile, 'tag', opts_dict["tag"]) setattr(nc_sumfile, 'compset', opts_dict["compset"]) setattr(nc_sumfile, 'resolution', opts_dict["res"]) setattr(nc_sumfile, 'machine', opts_dict["mach"]) # Create variables if (verbose == True): print "Creating variables ....." v_lev = nc_sumfile.create_variable("lev", 'f', ('nlev',)) v_vars = nc_sumfile.create_variable("vars", 'S1', ('nvars', 'str_size')) v_var3d = nc_sumfile.create_variable("var3d", 'S1', ('nvars3d', 'str_size')) v_var2d = nc_sumfile.create_variable("var2d", 'S1', ('nvars2d', 'str_size')) v_time = nc_sumfile.create_variable("time",'d',('time',)) v_ens_avg3d = nc_sumfile.create_variable("ens_avg3d", 'f', ('time','nvars3d', 'nlev', 'nlat', 'nlon')) v_ens_stddev3d = nc_sumfile.create_variable("ens_stddev3d", 'f', ('time','nvars3d', 'nlev', 'nlat', 'nlon')) v_ens_avg2d = nc_sumfile.create_variable("ens_avg2d", 'f', ('time','nvars2d', 'nlat', 'nlon')) v_ens_stddev2d = nc_sumfile.create_variable("ens_stddev2d", 'f', ('time','nvars2d', 'nlat', 'nlon')) v_RMSZ = nc_sumfile.create_variable("RMSZ", 'f', ('time','nvars', 'ens_size','nbin')) if not opts_dict['zscoreonly']: v_gm = nc_sumfile.create_variable("global_mean", 'f', ('time','nvars', 'ens_size')) # Assign vars, var3d and var2d if (verbose == True): print "Assigning vars, var3d, and var2d ....." eq_all_var_names =[] eq_d3_var_names = [] eq_d2_var_names = [] all_var_names = list(Var3d) all_var_names += Var2d l_eq = len(all_var_names) for i in range(l_eq): tt = list(all_var_names[i]) l_tt = len(tt) if (l_tt < str_size): extra = list(' ')*(str_size - l_tt) tt.extend(extra) eq_all_var_names.append(tt) l_eq = len(Var3d) for i in range(l_eq): tt = list(Var3d[i]) l_tt = len(tt) if (l_tt < str_size): extra = list(' ')*(str_size - l_tt) tt.extend(extra) eq_d3_var_names.append(tt) l_eq = len(Var2d) for i in range(l_eq): tt = list(Var2d[i]) l_tt = len(tt) if (l_tt < str_size): extra = list(' ')*(str_size - l_tt) tt.extend(extra) eq_d2_var_names.append(tt) v_vars[:] = eq_all_var_names[:] v_var3d[:] = eq_d3_var_names[:] v_var2d[:] = eq_d2_var_names[:] # Time-invarient metadata if (verbose == True): print "Assigning time invariant metadata ....." vars_dict = o_files[0].variables lev_data = vars_dict["z_t"] v_lev = lev_data # Time-varient metadata if verbose: print "Assigning time variant metadata ....." vars_dict = o_files[0].variables time_value = vars_dict['time'] time_array = np.array([time_value]) time_array = pyEnsLib.gather_npArray_pop(time_array,me,(me.get_size(),)) if me.get_rank() == 0: v_time[:]=time_array[:] # Calculate global mean, average, standard deviation if verbose: print "Calculating global means ....." is_SE = False tslice=0 if not opts_dict['zscoreonly']: gm3d,gm2d = pyEnsLib.generate_global_mean_for_summary(o_files,Var3d,Var2d, is_SE,False,opts_dict) if verbose: print "Finish calculating global means ....." # Calculate RMSZ scores if (verbose == True): print "Calculating RMSZ scores ....." zscore3d,zscore2d,ens_avg3d,ens_stddev3d,ens_avg2d,ens_stddev2d,temp1,temp2=pyEnsLib.calc_rmsz(o_files,Var3d,Var2d,is_SE,opts_dict) # Collect from all processors if opts_dict['mpi_enable'] : # Gather the 3d variable results from all processors to the master processor # Gather global means 3d results if not opts_dict['zscoreonly']: gmall=np.concatenate((gm3d,gm2d),axis=0) #print "before gather, gmall.shape=",gmall.shape gmall=pyEnsLib.gather_npArray_pop(gmall,me,(me.get_size(),len(Var3d)+len(Var2d),len(o_files))) zmall=np.concatenate((zscore3d,zscore2d),axis=0) zmall=pyEnsLib.gather_npArray_pop(zmall,me,(me.get_size(),len(Var3d)+len(Var2d),len(o_files),nbin)) #print 'zmall=',zmall #print "after gather, gmall.shape=",gmall.shape ens_avg3d=pyEnsLib.gather_npArray_pop(ens_avg3d,me,(me.get_size(),len(Var3d),nlev,(nlat),nlon)) ens_avg2d=pyEnsLib.gather_npArray_pop(ens_avg2d,me,(me.get_size(),len(Var2d),(nlat),nlon)) ens_stddev3d=pyEnsLib.gather_npArray_pop(ens_stddev3d,me,(me.get_size(),len(Var3d),nlev,(nlat),nlon)) ens_stddev2d=pyEnsLib.gather_npArray_pop(ens_stddev2d,me,(me.get_size(),len(Var2d),(nlat),nlon)) # Assign to file: if me.get_rank() == 0 : #Zscoreall=np.concatenate((zscore3d,zscore2d),axis=0) v_RMSZ[:,:,:,:]=zmall[:,:,:,:] v_ens_avg3d[:,:,:,:,:]=ens_avg3d[:,:,:,:,:] v_ens_stddev3d[:,:,:,:,:]=ens_stddev3d[:,:,:,:,:] v_ens_avg2d[:,:,:,:]=ens_avg2d[:,:,:,:] v_ens_stddev2d[:,:,:,:]=ens_stddev2d[:,:,:,:] if not opts_dict['zscoreonly']: v_gm[:,:,:]=gmall[:,:,:] print "All done"
def execute(self, chunks={}, serial=False, history=False, scomm=None, deflate=None): """ Execute the Data Flow Parameters: chunks (dict): A dictionary of output dimension names and chunk sizes for each dimension given. Output dimensions not included in the dictionary will not be chunked. (Use OrderedDict to preserve order of dimensions, where the first dimension will be assumed to correspond to the fastest-varying index and the last dimension will be assumed to correspond to the slowest-varying index.) serial (bool): Whether to run in serial (True) or parallel (False) history (bool): Whether to write a history attribute generated during execution for each variable in the file scomm (SimpleComm): An externally created SimpleComm object to use for managing parallel operation deflate (int): Override all output file deflate levels with given value """ # Check chunks type if not isinstance(chunks, dict): raise TypeError('Chunks must be specified with a dictionary') # Make sure that the specified chunking dimensions are valid for odname, odsize in chunks.iteritems(): if odname not in self._o2imap: raise ValueError( 'Cannot chunk over unknown output dimension {!r}'.format( odname)) if not isinstance(odsize, int): raise TypeError( ('Chunk size invalid for output dimension {!r}: ' '{}').format(odname, odsize)) # Check that we are not chunking over any "sum-like" dimensions sumlike_chunk_dims = sorted(d for d in chunks if d in self._sumlike_dimensions) if len(sumlike_chunk_dims) > 0: raise ValueError(( 'Cannot chunk over dimensions that are summed over (or "sum-like")' ': {}'.format(', '.join(sumlike_chunk_dims)))) # Create the simple communicator, if necessary if scomm is None: scomm = create_comm(serial=bool(serial)) elif isinstance(scomm, SimpleComm): if scomm.is_manager(): print 'Inheriting SimpleComm object from parent. (Ignoring serial argument.)' else: raise TypeError('Communication object is not a SimpleComm!') # Start general output prefix = '[{}/{}]'.format(scomm.get_rank(), scomm.get_size()) if scomm.is_manager(): print 'Beginning execution of data flow...' print 'Mapping Input Dimensions to Output Dimensions:' for d in sorted(self._i2omap): print ' {} --> {}'.format(d, self._i2omap[d]) if len(chunks) > 0: print 'Chunking over Output Dimensions:' for d in chunks: print ' {}: {}'.format(d, chunks[d]) else: print 'Not chunking output.' # Partition the output files/variables over available parallel (MPI) ranks fnames = scomm.partition(self._filesizes.items(), func=WeightBalanced(), involved=True) if scomm.is_manager(): print 'Writing {} files across {} MPI processes.'.format( len(self._filesizes), scomm.get_size()) scomm.sync() # Standard output print '{}: Writing {} files: {}'.format(prefix, len(fnames), ', '.join(fnames)) scomm.sync() # Loop over output files and write using given chunking for fname in fnames: print '{}: Writing file: {}'.format(prefix, fname) if history: self._writenodes[fname].enable_history() else: self._writenodes[fname].disable_history() self._writenodes[fname].execute(chunks=chunks, deflate=deflate) print '{}: Finished writing file: {}'.format(prefix, fname) scomm.sync() if scomm.is_manager(): print 'All output variables written.' print
def __init__(self, in_directory, out_directory, prefix, suffix, file_pattern='null', date_pattern='null', m_id=['-999'], hist_type='slice', avg_list=[], weighted=False, split=False, split_files='null', split_orig_size='null', ncformat='netcdf4c', varlist=[], serial=False, mean_diff_rms_obs_dir='null', region_nc_var='null', regions={}, region_wgt_var='null', obs_file='null', reg_obs_file_suffix='null', obs_dir='null', main_comm=None, clobber=False, ice_obs_file='null', reg_file='null', ncl_location='null', year0=-99, year1=-99, collapse_dim='', vertical_levels=60): ''' Initializes the internal data with optional arguments @param in_directory Where the input directory resides (needs full path). @param out_directory Where the output will be produced (needs full path). @param prefix String specifying the full file name before the date string. @param suffix String specifying the suffix of the file names @param file_pattern File pattern used put the prefix, date, and suffix together for input files. @param date_pattern The pattern used to decipher the date string within the file name. @param m_id Array of member identifiers. All averages will be done on each member individually and then across all members. @param hist_type Type of file ('slice' or 'series'). Default is 'slice'. @param avg_list List of averages that need to be computed. Elements should contain aveType:year0:year1. year2 is only required for multi year averaging. @param weighted Boolean variable to selected if weights will be applied to the averaging. True = weights will be applied. Default is False. @param split Boolean variable. True = the file is split spatially and the final average needs to be pieced together. (ie. CICE times series files) Default is False. @param split_files The strings indicating the naming difference between split files. Expects a string with elements separated by a comma. Defualt is 'null'. @param split_orig_size A string listing the lat and lon values of the origianl grid size. Needed in case some of the grid has been deleted. (example: 'lon=288,lat=192'). Default is 'null'. @param ncformat Format to output the averaged file(s) in. Default is 'netcdf4c'. Other options: 'netcdf','netcdf4','netcdf4c' @param varlist Optional variables list, if not averaging all variables @param serial Boolean to run in serial mode. True=serial (without MPI) False=run in parallel(with MPI) False requires mpi4py to be installed. Default is False. @param regions Dictionary that contains regions to average over. Fromat is 'string region name: int region value'. Default is an empty dictionary. @param region_nc_var String that identifies the netcdf variable that contains the region mask used by a regional average. @param region_wgt_var String that identifies the netcdf variable that contains the weights. @param obs_file Observational file used for the creation of the mean_diff_rms file. This file must contain all of the variables within the variable list (or if a variable list is not specified, must contain all hist file variables). Dimension must be nlon and nlat. @param reg_obs_file_suffix The suffix of the regional, weighted averages of the 'obs_file'. Used for the creation of the mean_diff_rms file. @param obs_dir Full path to the observational files used for the mean_diff_rms file. @param main_comm A simplecomm to be used by the PyAverager. If not specified, one will be created by this specifier. Default None. @param clobber Remove netcdf output file(s) if they exist. Default False - will exit if an output file of the same name exists. @param ice_obs_file Full path to the observational file used to create the cice model pre_proc file @param reg_file Full path to the regional file used to create the cice model pre_proc file @param ncl_location Location of where the ncl scripts reside @param year0 The first year - only used to create the cice pre_proc file. @param year1 The last year - only used to create the cice pre_proc file. @param collapse_dims Used to collapse/average over one dim. @param vertical_levels Number of ocean vertical levels ''' # Where the input is located self.in_directory = in_directory # Where the output should be produced self.out_directory = out_directory # Full file name up to the date string self.prefix = prefix # The suffix of the data files self.suffix = suffix # Type of file self.hist_type = hist_type # List of averages to compute self.avg_list = avg_list # Should weights be applied? self.weighted = weighted # Are files split spatially? self.split = split # Split file name indicators self.split_files = split_files # The original grid size of the split files self.split_orig_size = split_orig_size # The netcdf output format self.ncformat = ncformat # Varlist to average (if not all variables) self.varlist = varlist # Run in serial mode? If True, will be ran without MPI self.serial = serial # Directory where to find the regional obds files for the mean_diff_rms climo file self.mean_diff_rms_obs_dir = mean_diff_rms_obs_dir # Regions to average over self.regions = regions # Netcdf variable name that contains a region mask self.region_nc_var = region_nc_var # Netcdf variable name that contains the weights self.region_wgt_var = region_wgt_var # String that indicates the suffix of the regional obs files used for the mean_diff_rms file self.reg_obs_file_suffix = reg_obs_file_suffix # String that indicates the name of the observational file self.obs_file = obs_file # String indicating the path to the observational files used for the mean_diff_rms file self.obs_dir = obs_dir # File pattern used to piece together a full file name if (file_pattern == 'null'): if (hist_type == 'slice'): self.file_pattern = [ '$prefix', '.', '$date_pattern', '.', '$suffix' ] if (hist_type == 'series'): if split: self.file_pattern = [ '$prefix', '.', '$var', '_', '$hem', '.', '$date_pattern', '.', '$suffix' ] else: self.file_pattern = [ '$prefix', '.', '$var', '.', '$date_pattern', '.', '$suffix' ] else: self.file_pattern = file_pattern # The date pattern to decipher the date within the file name self.date_pattern = date_pattern self.m_id = m_id # Get first and last years used in the averaging by parsing the avg_list dates = [] for avg in avg_list: avg_descr = avg.split(':') for yr in avg_descr[1:]: dates.append(int(yr)) if (year0 == -99 and year1 == -99): self.year0 = int(min(dates)) self.year1 = int(max(dates)) else: self.year0 = int(year0) self.year1 = int(year1) # Initialize a simple_comm object if one was not passed in by the user if (main_comm is None): from asaptools import simplecomm self.main_comm = simplecomm.create_comm(serial=serial) else: self.main_comm = main_comm # True/False, rm average file(s) is it has already been created self.clobber = clobber # File that contains the weight/area information self.ice_obs_file = ice_obs_file # File that exists or will be created that contains a region mask for ice self.reg_file = reg_file # Location of the ncl script that will be used to create reg_file if it doesn't exist self.ncl_location = ncl_location # Used to collapse/average over one dim. self.collapse_dim = collapse_dim # Used to specify the number of ocean vertical levels self.vertical_levels = vertical_levels
workdir = '{0}/climo/{1}/{2}/{3}/'.format(envDict['PTMPDIR_'+t], envDict['caseid_'+t], subdir, m_dir) timer_tag = '{0}_{1}'.format(t, climo_file) timer.start(timer_tag) debugMsg('Before call to lnd_regrid using workdir = {0}/{1}'.format(workdir, ext_dir), header=True, verbosity=1) diagUtilsLib.lnd_regrid(climo_file, regrid_script, t, workdir, ext_dir, envDict) timer.stop(timer_tag) debugMsg("Total time to regrid file {0} = {1}".format(climo_file, timer.get_time(timer_tag)), header=True, verbosity=1) #=================================== if __name__ == "__main__": # initialize simplecomm object main_comm = simplecomm.create_comm(serial=False) # setup an overall timer timer = timekeeper.TimeKeeper() # get commandline options options = commandline_options() # initialize global vprinter object for printing debug messages if options.debug: header = "[" + str(main_comm.get_rank()) + "/" + str(main_comm.get_size()) + "]: DEBUG... " debugMsg = vprinter.VPrinter(header=header, verbosity=options.debug[0]) try: timer.start("Total Time") status = main(options, main_comm, debugMsg, timer)
def __init__(self, specifier, serial=False, verbosity=1, skip_existing=False, overwrite=False, once=False, simplecomm=None): """ Constructor Parameters: specifier (Specifier): An instance of the Specifier class, defining the input specification for this reshaper operation. Keyword Arguments: serial (bool): True or False, indicating whether the operation should be performed in serial (True) or parallel (False). The default is to assume parallel operation (but serial will be chosen if the mpi4py cannot be found when trying to initialize decomposition. verbosity(int): Level of printed output (stdout). A value of 0 means no output, and a higher value means more output. The default value is 1. skip_existing (bool): Flag specifying whether to skip the generation of time-series for variables with time-series files that already exist. Default is False. overwrite (bool): Flag specifying whether to forcefully overwrite output files if they already exist. Default is False. once (bool): True or False, indicating whether the Reshaper should write all metadata to a 'once' file (separately). simplecomm (SimpleComm): A SimpleComm object to handle the parallel communication, if necessary """ # Type checking (or double-checking) if not isinstance(specifier, Specifier): err_msg = "Input must be given in the form of a Specifier object" raise TypeError(err_msg) if type(serial) is not bool: err_msg = "Serial indicator must be True or False." raise TypeError(err_msg) if type(verbosity) is not int: err_msg = "Verbosity level must be an integer." raise TypeError(err_msg) if type(skip_existing) is not bool: err_msg = "Skip_existing flag must be True or False." raise TypeError(err_msg) if type(once) is not bool: err_msg = "Once-file indicator must be True or False." raise TypeError(err_msg) if simplecomm is not None: if not (isinstance(simplecomm, SimpleComm) or \ isinstance(simplecomm, SimpleCommMPI)): err_msg = ( "Simple communicator object is not a SimpleComm or ", "SimpleCommMPI") raise TypeError(err_msg) # Whether to write a once file self._use_once_file = once # Internal timer data self._timer = TimeKeeper() # Dictionary storing read/write data amounts self.assumed_block_size = float(4 * 1024 * 1024) self._byte_counts = {} self._timer.start('Initializing Simple Communicator') if simplecomm is None: simplecomm = create_comm(serial=serial) # Reference to the simple communicator self._simplecomm = simplecomm self._timer.stop('Initializing Simple Communicator') # Contruct the print header header = ''.join([ '[', str(self._simplecomm.get_rank()), '/', str(self._simplecomm.get_size()), '] ' ]) # Reference to the verbose printer tool self._vprint = VPrinter(header=header, verbosity=verbosity) # Debug output starting if self._simplecomm.is_manager(): self._vprint('Initializing Reshaper', verbosity=1) # Validate the user input data self._timer.start('Specifier Validation') specifier.validate() self._timer.stop('Specifier Validation') if self._simplecomm.is_manager(): self._vprint('Specifier validated', verbosity=1) # Setup PyNIO options (including disabling the default PreFill option) opt = Nio.options() opt.PreFill = False # Determine the Format and CompressionLevel options # from the NetCDF format string in the Specifier if specifier.netcdf_format == 'netcdf': opt.Format = 'Classic' elif specifier.netcdf_format == 'netcdf4': opt.Format = 'NetCDF4Classic' opt.CompressionLevel = 0 elif specifier.netcdf_format == 'netcdf4c': opt.Format = 'NetCDF4Classic' opt.CompressionLevel = specifier.netcdf_deflate if self._simplecomm.is_manager(): self._vprint('PyNIO compression level: {0}'.format(\ specifier.netcdf_deflate), verbosity=2) self._nio_options = opt if self._simplecomm.is_manager(): self._vprint('PyNIO options set', verbosity=2) # Open all of the input files self._timer.start('Open Input Files') self._input_files = [] for filename in specifier.input_file_list: self._input_files.append(Nio.open_file(filename, "r")) self._timer.stop('Open Input Files') if self._simplecomm.is_manager(): self._vprint('Input files opened', verbosity=2) # Validate the input files themselves self._timer.start('Input File Validation') self._validate_input_files(specifier) self._timer.stop('Input File Validation') if self._simplecomm.is_manager(): self._vprint('Input files validated', verbosity=2) # Sort the input files by time self._timer.start('Sort Input Files') self._sort_input_files_by_time(specifier) self._timer.stop('Sort Input Files') if self._simplecomm.is_manager(): self._vprint('Input files sorted', verbosity=2) # Retrieve and sort the variables in each time-slice file # (To determine if it is time-invariant metadata, time-variant # metadata, or if it is a time-series variable) self._timer.start('Sort Variables') self._sort_variables(specifier) self._timer.stop('Sort Variables') if self._simplecomm.is_manager(): self._vprint('Variables sorted', verbosity=2) # Validate the output files self._timer.start('Output File Validation') self._validate_output_files(specifier, skip_existing, overwrite) self._timer.stop('Output File Validation') if self._simplecomm.is_manager(): self._vprint('Output files validated', verbosity=2) # Helpful debugging message if self._simplecomm.is_manager(): self._vprint('Reshaper initialized.', verbosity=1) # Sync before continuing.. self._simplecomm.sync()
def main(argv): # Get command line stuff and store in a dictionary s = 'tag= compset= esize= tslice= res= sumfile= indir= sumfiledir= mach= verbose jsonfile= mpi_enable maxnorm gmonly popens cumul regx= startMon= endMon= fIndex=' optkeys = s.split() try: opts, args = getopt.getopt(argv, "h", optkeys) except getopt.GetoptError: pyEnsLib.EnsSum_usage() sys.exit(2) # Put command line options in a dictionary - also set defaults opts_dict = {} # Defaults opts_dict['tag'] = 'cesm2_0_beta08' opts_dict['compset'] = 'F2000' opts_dict['mach'] = 'cheyenne' opts_dict['esize'] = 350 opts_dict['tslice'] = 1 opts_dict['res'] = 'f19_f19' opts_dict['sumfile'] = 'ens.summary.nc' opts_dict['indir'] = './' opts_dict['sumfiledir'] = './' opts_dict['jsonfile'] = 'exclude_empty.json' opts_dict['verbose'] = False opts_dict['mpi_enable'] = False opts_dict['maxnorm'] = False opts_dict['gmonly'] = True opts_dict['popens'] = False opts_dict['cumul'] = False opts_dict['regx'] = 'test' opts_dict['startMon'] = 1 opts_dict['endMon'] = 1 opts_dict['fIndex'] = 151 # This creates the dictionary of input arguments opts_dict = pyEnsLib.getopt_parseconfig(opts, optkeys, 'ES', opts_dict) verbose = opts_dict['verbose'] st = opts_dict['esize'] esize = int(st) if not (opts_dict['tag'] and opts_dict['compset'] and opts_dict['mach'] or opts_dict['res']): print 'Please specify --tag, --compset, --mach and --res options' sys.exit() # Now find file names in indir input_dir = opts_dict['indir'] # The var list that will be excluded ex_varlist = [] inc_varlist = [] # Create a mpi simplecomm object if opts_dict['mpi_enable']: me = simplecomm.create_comm() else: me = simplecomm.create_comm(not opts_dict['mpi_enable']) if me.get_rank() == 0: print 'Running pyEnsSum!' if me.get_rank() == 0 and (verbose == True): print opts_dict print 'Ensemble size for summary = ', esize exclude = False if me.get_rank() == 0: if opts_dict['jsonfile']: inc_varlist = [] # Read in the excluded or included var list ex_varlist, exclude = pyEnsLib.read_jsonlist( opts_dict['jsonfile'], 'ES') if exclude == False: inc_varlist = ex_varlist ex_varlist = [] # Read in the included var list #inc_varlist=pyEnsLib.read_jsonlist(opts_dict['jsonfile'],'ES') # Broadcast the excluded var list to each processor #if opts_dict['mpi_enable']: # ex_varlist=me.partition(ex_varlist,func=Duplicate(),involved=True) # Broadcast the excluded var list to each processor if opts_dict['mpi_enable']: exclude = me.partition(exclude, func=Duplicate(), involved=True) if exclude: ex_varlist = me.partition(ex_varlist, func=Duplicate(), involved=True) else: inc_varlist = me.partition(inc_varlist, func=Duplicate(), involved=True) in_files = [] if (os.path.exists(input_dir)): # Get the list of files in_files_temp = os.listdir(input_dir) in_files = sorted(in_files_temp) # Make sure we have enough num_files = len(in_files) if me.get_rank() == 0 and (verbose == True): print 'Number of files in input directory = ', num_files if (num_files < esize): if me.get_rank() == 0 and (verbose == True): print 'Number of files in input directory (',num_files,\ ') is less than specified ensemble size of ', esize sys.exit(2) if (num_files > esize): if me.get_rank() == 0 and (verbose == True): print 'NOTE: Number of files in ', input_dir, \ 'is greater than specified ensemble size of ', esize ,\ '\nwill just use the first ', esize, 'files' else: if me.get_rank() == 0: print 'Input directory: ', input_dir, ' not found' sys.exit(2) if opts_dict['cumul']: if opts_dict['regx']: in_files_list = get_cumul_filelist(opts_dict, opts_dict['indir'], opts_dict['regx']) in_files = me.partition(in_files_list, func=EqualLength(), involved=True) if me.get_rank() == 0 and (verbose == True): print 'in_files=', in_files # Open the files in the input directory o_files = [] if me.get_rank() == 0 and opts_dict['verbose']: print 'Input files are: ' print "\n".join(in_files) #for i in in_files: # print "in_files =",i for onefile in in_files[0:esize]: if (os.path.isfile(input_dir + '/' + onefile)): o_files.append(Nio.open_file(input_dir + '/' + onefile, "r")) else: if me.get_rank() == 0: print "COULD NOT LOCATE FILE " + input_dir + onefile + "! EXITING...." sys.exit() # Store dimensions of the input fields if me.get_rank() == 0 and (verbose == True): print "Getting spatial dimensions" nlev = -1 nilev = -1 ncol = -1 nlat = -1 nlon = -1 lonkey = '' latkey = '' # Look at first file and get dims input_dims = o_files[0].dimensions ndims = len(input_dims) for key in input_dims: if key == "lev": nlev = input_dims["lev"] elif key == "ilev": nilev = input_dims["ilev"] elif key == "ncol": ncol = input_dims["ncol"] elif (key == "nlon") or (key == "lon"): nlon = input_dims[key] lonkey = key elif (key == "nlat") or (key == "lat"): nlat = input_dims[key] latkey = key if (nlev == -1): if me.get_rank() == 0: print "COULD NOT LOCATE valid dimension lev => EXITING...." sys.exit() if ((ncol == -1) and ((nlat == -1) or (nlon == -1))): if me.get_rank() == 0: print "Need either lat/lon or ncol => EXITING...." sys.exit() # Check if this is SE or FV data if (ncol != -1): is_SE = True else: is_SE = False # Make sure all files have the same dimensions if me.get_rank() == 0 and (verbose == True): print "Checking dimensions across files...." print 'lev = ', nlev if (is_SE == True): print 'ncol = ', ncol else: print 'nlat = ', nlat print 'nlon = ', nlon for count, this_file in enumerate(o_files): input_dims = this_file.dimensions if (is_SE == True): if (nlev != int(input_dims["lev"]) or (ncol != int(input_dims["ncol"]))): if me.get_rank() == 0: print "Dimension mismatch between ", in_files[ 0], 'and', in_files[0], '!!!' sys.exit() else: if ( nlev != int(input_dims["lev"]) or ( nlat != int(input_dims[latkey]))\ or ( nlon != int(input_dims[lonkey]))): if me.get_rank() == 0: print "Dimension mismatch between ", in_files[ 0], 'and', in_files[0], '!!!' sys.exit() # Get 2d vars, 3d vars and all vars (For now include all variables) vars_dict_all = o_files[0].variables # Remove the excluded variables (specified in json file) from variable dictionary #print len(vars_dict_all) if exclude: vars_dict = vars_dict_all for i in ex_varlist: if i in vars_dict: del vars_dict[i] #Given an included var list, remove all float var that are not on the list else: vars_dict = vars_dict_all.copy() for k, v in vars_dict_all.iteritems(): if (k not in inc_varlist) and (vars_dict_all[k].typecode() == 'f'): #print vars_dict_all[k].typecode() #print k del vars_dict[k] num_vars = len(vars_dict) #print num_vars #if me.get_rank() == 0: # for k,v in vars_dict.iteritems(): # print 'vars_dict',k,vars_dict[k].typecode() str_size = 0 d2_var_names = [] d3_var_names = [] num_2d = 0 num_3d = 0 # Which are 2d, which are 3d and max str_size for k, v in vars_dict.iteritems(): var = k vd = v.dimensions # all the variable's dimensions (names) vr = v.rank # num dimension vs = v.shape # dim values is_2d = False is_3d = False if (is_SE == True): # (time, lev, ncol) or (time, ncol) if ((vr == 2) and (vs[1] == ncol)): is_2d = True num_2d += 1 elif ((vr == 3) and (vs[2] == ncol and vs[1] == nlev)): is_3d = True num_3d += 1 else: # (time, lev, nlon, nlon) or (time, nlat, nlon) if ((vr == 3) and (vs[1] == nlat and vs[2] == nlon)): is_2d = True num_2d += 1 elif ((vr == 4) and (vs[2] == nlat and vs[3] == nlon and (vs[1] == nlev or vs[1] == nilev))): is_3d = True num_3d += 1 if (is_3d == True): str_size = max(str_size, len(k)) d3_var_names.append(k) elif (is_2d == True): str_size = max(str_size, len(k)) d2_var_names.append(k) #else: # print 'var=',k if me.get_rank() == 0 and (verbose == True): print 'Number of variables found: ', num_3d + num_2d print '3D variables: ' + str(num_3d) + ', 2D variables: ' + str(num_2d) # Now sort these and combine (this sorts caps first, then lower case - # which is what we want) d2_var_names.sort() d3_var_names.sort() if esize < num_2d + num_3d: if me.get_rank() == 0: print "************************************************************************************************************************************" print " Error: the total number of 3D and 2D variables " + str( num_2d + num_3d ) + " is larger than the number of ensemble files " + str(esize) print " Cannot generate ensemble summary file, please remove more variables from your included variable list," print " or add more varaibles in your excluded variable list!!!" print "************************************************************************************************************************************" sys.exit() # All vars is 3d vars first (sorted), the 2d vars all_var_names = list(d3_var_names) all_var_names += d2_var_names n_all_var_names = len(all_var_names) #if me.get_rank() == 0 and (verbose == True): # print 'num vars = ', n_all_var_names, '(3d = ', num_3d, ' and 2d = ', num_2d, ")" # Create new summary ensemble file this_sumfile = opts_dict["sumfile"] if me.get_rank() == 0 and (verbose == True): print "Creating ", this_sumfile, " ..." if (me.get_rank() == 0 | opts_dict["popens"]): if os.path.exists(this_sumfile): os.unlink(this_sumfile) opt = Nio.options() opt.PreFill = False opt.Format = 'NetCDF4Classic' nc_sumfile = Nio.open_file(this_sumfile, 'w', options=opt) # Set dimensions if me.get_rank() == 0 and (verbose == True): print "Setting dimensions ....." if (is_SE == True): nc_sumfile.create_dimension('ncol', ncol) else: nc_sumfile.create_dimension('nlat', nlat) nc_sumfile.create_dimension('nlon', nlon) nc_sumfile.create_dimension('nlev', nlev) nc_sumfile.create_dimension('ens_size', esize) nc_sumfile.create_dimension('nvars', num_3d + num_2d) nc_sumfile.create_dimension('nvars3d', num_3d) nc_sumfile.create_dimension('nvars2d', num_2d) nc_sumfile.create_dimension('str_size', str_size) # Set global attributes now = time.strftime("%c") if me.get_rank() == 0 and (verbose == True): print "Setting global attributes ....." setattr(nc_sumfile, 'creation_date', now) setattr(nc_sumfile, 'title', 'CAM verification ensemble summary file') setattr(nc_sumfile, 'tag', opts_dict["tag"]) setattr(nc_sumfile, 'compset', opts_dict["compset"]) setattr(nc_sumfile, 'resolution', opts_dict["res"]) setattr(nc_sumfile, 'machine', opts_dict["mach"]) # Create variables if me.get_rank() == 0 and (verbose == True): print "Creating variables ....." v_lev = nc_sumfile.create_variable("lev", 'f', ('nlev', )) v_vars = nc_sumfile.create_variable("vars", 'S1', ('nvars', 'str_size')) v_var3d = nc_sumfile.create_variable("var3d", 'S1', ('nvars3d', 'str_size')) v_var2d = nc_sumfile.create_variable("var2d", 'S1', ('nvars2d', 'str_size')) if not opts_dict['gmonly']: if (is_SE == True): v_ens_avg3d = nc_sumfile.create_variable( "ens_avg3d", 'f', ('nvars3d', 'nlev', 'ncol')) v_ens_stddev3d = nc_sumfile.create_variable( "ens_stddev3d", 'f', ('nvars3d', 'nlev', 'ncol')) v_ens_avg2d = nc_sumfile.create_variable( "ens_avg2d", 'f', ('nvars2d', 'ncol')) v_ens_stddev2d = nc_sumfile.create_variable( "ens_stddev2d", 'f', ('nvars2d', 'ncol')) else: v_ens_avg3d = nc_sumfile.create_variable( "ens_avg3d", 'f', ('nvars3d', 'nlev', 'nlat', 'nlon')) v_ens_stddev3d = nc_sumfile.create_variable( "ens_stddev3d", 'f', ('nvars3d', 'nlev', 'nlat', 'nlon')) v_ens_avg2d = nc_sumfile.create_variable( "ens_avg2d", 'f', ('nvars2d', 'nlat', 'nlon')) v_ens_stddev2d = nc_sumfile.create_variable( "ens_stddev2d", 'f', ('nvars2d', 'nlat', 'nlon')) v_RMSZ = nc_sumfile.create_variable("RMSZ", 'f', ('nvars', 'ens_size')) v_gm = nc_sumfile.create_variable("global_mean", 'f', ('nvars', 'ens_size')) v_standardized_gm = nc_sumfile.create_variable("standardized_gm", 'f', ('nvars', 'ens_size')) v_loadings_gm = nc_sumfile.create_variable('loadings_gm', 'f', ('nvars', 'nvars')) v_mu_gm = nc_sumfile.create_variable('mu_gm', 'f', ('nvars', )) v_sigma_gm = nc_sumfile.create_variable('sigma_gm', 'f', ('nvars', )) v_sigma_scores_gm = nc_sumfile.create_variable('sigma_scores_gm', 'f', ('nvars', )) # Assign vars, var3d and var2d if me.get_rank() == 0 and (verbose == True): print "Assigning vars, var3d, and var2d ....." eq_all_var_names = [] eq_d3_var_names = [] eq_d2_var_names = [] l_eq = len(all_var_names) for i in range(l_eq): tt = list(all_var_names[i]) l_tt = len(tt) if (l_tt < str_size): extra = list(' ') * (str_size - l_tt) tt.extend(extra) eq_all_var_names.append(tt) l_eq = len(d3_var_names) for i in range(l_eq): tt = list(d3_var_names[i]) l_tt = len(tt) if (l_tt < str_size): extra = list(' ') * (str_size - l_tt) tt.extend(extra) eq_d3_var_names.append(tt) l_eq = len(d2_var_names) for i in range(l_eq): tt = list(d2_var_names[i]) l_tt = len(tt) if (l_tt < str_size): extra = list(' ') * (str_size - l_tt) tt.extend(extra) eq_d2_var_names.append(tt) v_vars[:] = eq_all_var_names[:] v_var3d[:] = eq_d3_var_names[:] v_var2d[:] = eq_d2_var_names[:] # Time-invarient metadata if me.get_rank() == 0 and (verbose == True): print "Assigning time invariant metadata ....." lev_data = vars_dict["lev"] v_lev = lev_data # Form ensembles, each missing one member; compute RMSZs and global means #for each variable, we also do max norm also (currently done in pyStats) tslice = opts_dict['tslice'] if not opts_dict['cumul']: # Partition the var list var3_list_loc = me.partition(d3_var_names, func=EqualStride(), involved=True) var2_list_loc = me.partition(d2_var_names, func=EqualStride(), involved=True) else: var3_list_loc = d3_var_names var2_list_loc = d2_var_names # Calculate global means # if me.get_rank() == 0 and (verbose == True): print "Calculating global means ....." if not opts_dict['cumul']: gm3d, gm2d, var_list = pyEnsLib.generate_global_mean_for_summary( o_files, var3_list_loc, var2_list_loc, is_SE, False, opts_dict) if me.get_rank() == 0 and (verbose == True): print "Finish calculating global means ....." # Calculate RMSZ scores if (not opts_dict['gmonly']) | (opts_dict['cumul']): if me.get_rank() == 0 and (verbose == True): print "Calculating RMSZ scores ....." zscore3d, zscore2d, ens_avg3d, ens_stddev3d, ens_avg2d, ens_stddev2d, temp1, temp2 = pyEnsLib.calc_rmsz( o_files, var3_list_loc, var2_list_loc, is_SE, opts_dict) # Calculate max norm ensemble if opts_dict['maxnorm']: if me.get_rank() == 0 and (verbose == True): print "Calculating max norm of ensembles ....." pyEnsLib.calculate_maxnormens(opts_dict, var3_list_loc) pyEnsLib.calculate_maxnormens(opts_dict, var2_list_loc) if opts_dict['mpi_enable'] & (not opts_dict['popens']): if not opts_dict['cumul']: # Gather the 3d variable results from all processors to the master processor slice_index = get_stride_list(len(d3_var_names), me) # Gather global means 3d results gm3d = gather_npArray(gm3d, me, slice_index, (len(d3_var_names), len(o_files))) if not opts_dict['gmonly']: # Gather zscore3d results zscore3d = gather_npArray(zscore3d, me, slice_index, (len(d3_var_names), len(o_files))) # Gather ens_avg3d and ens_stddev3d results shape_tuple3d = get_shape(ens_avg3d.shape, len(d3_var_names), me.get_rank()) ens_avg3d = gather_npArray(ens_avg3d, me, slice_index, shape_tuple3d) ens_stddev3d = gather_npArray(ens_stddev3d, me, slice_index, shape_tuple3d) # Gather 2d variable results from all processors to the master processor slice_index = get_stride_list(len(d2_var_names), me) # Gather global means 2d results gm2d = gather_npArray(gm2d, me, slice_index, (len(d2_var_names), len(o_files))) var_list = gather_list(var_list, me) if not opts_dict['gmonly']: # Gather zscore2d results zscore2d = gather_npArray(zscore2d, me, slice_index, (len(d2_var_names), len(o_files))) # Gather ens_avg3d and ens_stddev2d results shape_tuple2d = get_shape(ens_avg2d.shape, len(d2_var_names), me.get_rank()) ens_avg2d = gather_npArray(ens_avg2d, me, slice_index, shape_tuple2d) ens_stddev2d = gather_npArray(ens_stddev2d, me, slice_index, shape_tuple2d) else: gmall = np.concatenate((temp1, temp2), axis=0) gmall = pyEnsLib.gather_npArray_pop( gmall, me, (me.get_size(), len(d3_var_names) + len(d2_var_names))) # Assign to file: if me.get_rank() == 0 | opts_dict['popens']: if not opts_dict['cumul']: gmall = np.concatenate((gm3d, gm2d), axis=0) if not opts_dict['gmonly']: Zscoreall = np.concatenate((zscore3d, zscore2d), axis=0) v_RMSZ[:, :] = Zscoreall[:, :] if not opts_dict['gmonly']: if (is_SE == True): v_ens_avg3d[:, :, :] = ens_avg3d[:, :, :] v_ens_stddev3d[:, :, :] = ens_stddev3d[:, :, :] v_ens_avg2d[:, :] = ens_avg2d[:, :] v_ens_stddev2d[:, :] = ens_stddev2d[:, :] else: v_ens_avg3d[:, :, :, :] = ens_avg3d[:, :, :, :] v_ens_stddev3d[:, :, :, :] = ens_stddev3d[:, :, :, :] v_ens_avg2d[:, :, :] = ens_avg2d[:, :, :] v_ens_stddev2d[:, :, :] = ens_stddev2d[:, :, :] else: gmall_temp = np.transpose(gmall[:, :]) gmall = gmall_temp mu_gm, sigma_gm, standardized_global_mean, loadings_gm, scores_gm = pyEnsLib.pre_PCA( gmall, all_var_names, var_list, me) v_gm[:, :] = gmall[:, :] v_standardized_gm[:, :] = standardized_global_mean[:, :] v_mu_gm[:] = mu_gm[:] v_sigma_gm[:] = sigma_gm[:].astype(np.float32) v_loadings_gm[:, :] = loadings_gm[:, :] v_sigma_scores_gm[:] = scores_gm[:] if me.get_rank() == 0: print "All Done"
def main(argv): # Get command line stuff and store in a dictionary s = 'tag= compset= esize= tslice= res= sumfile= indir= sumfiledir= mach= verbose jsonfile= mpi_enable maxnorm gmonly popens cumul regx= startMon= endMon= fIndex= mpi_disable' optkeys = s.split() try: opts, args = getopt.getopt(argv, "h", optkeys) except getopt.GetoptError: pyEnsLib.EnsSum_usage() sys.exit(2) # Put command line options in a dictionary - also set defaults opts_dict = {} # Defaults opts_dict['tag'] = 'cesm2_0' opts_dict['compset'] = 'F2000climo' opts_dict['mach'] = 'cheyenne' opts_dict['esize'] = 350 opts_dict['tslice'] = 1 opts_dict['res'] = 'f19_f19' opts_dict['sumfile'] = 'ens.summary.nc' opts_dict['indir'] = './' opts_dict['sumfiledir'] = './' opts_dict['jsonfile'] = 'exclude_empty.json' opts_dict['verbose'] = False opts_dict['mpi_enable'] = True opts_dict['mpi_disable'] = False opts_dict['maxnorm'] = False opts_dict['gmonly'] = True opts_dict['popens'] = False opts_dict['cumul'] = False opts_dict['regx'] = 'test' opts_dict['startMon'] = 1 opts_dict['endMon'] = 1 opts_dict['fIndex'] = 151 # This creates the dictionary of input arguments opts_dict = pyEnsLib.getopt_parseconfig(opts, optkeys, 'ES', opts_dict) verbose = opts_dict['verbose'] st = opts_dict['esize'] esize = int(st) if opts_dict['popens'] == True: print( "ERROR: Please use pyEnsSumPop.py for a POP ensemble (not --popens) => EXITING...." ) sys.exit() if not (opts_dict['tag'] and opts_dict['compset'] and opts_dict['mach'] or opts_dict['res']): print( 'ERROR: Please specify --tag, --compset, --mach and --res options => EXITING....' ) sys.exit() if opts_dict['mpi_disable'] == True: opts_dict['mpi_enable'] = False # Now find file names in indir input_dir = opts_dict['indir'] # The var list that will be excluded ex_varlist = [] inc_varlist = [] # Create a mpi simplecomm object if opts_dict['mpi_enable']: me = simplecomm.create_comm() else: me = simplecomm.create_comm(not opts_dict['mpi_enable']) if me.get_rank() == 0: print('STATUS: Running pyEnsSum.py') if me.get_rank() == 0 and (verbose == True): print(opts_dict) print('STATUS: Ensemble size for summary = ', esize) exclude = False if me.get_rank() == 0: if opts_dict['jsonfile']: inc_varlist = [] # Read in the excluded or included var list ex_varlist, exclude = pyEnsLib.read_jsonlist( opts_dict['jsonfile'], 'ES') if exclude == False: inc_varlist = ex_varlist ex_varlist = [] # Broadcast the excluded var list to each processor if opts_dict['mpi_enable']: exclude = me.partition(exclude, func=Duplicate(), involved=True) if exclude: ex_varlist = me.partition(ex_varlist, func=Duplicate(), involved=True) else: inc_varlist = me.partition(inc_varlist, func=Duplicate(), involved=True) in_files = [] if (os.path.exists(input_dir)): # Get the list of files in_files_temp = os.listdir(input_dir) in_files = sorted(in_files_temp) # Make sure we have enough num_files = len(in_files) if me.get_rank() == 0 and (verbose == True): print('VERBOSE: Number of files in input directory = ', num_files) if (num_files < esize): if me.get_rank() == 0 and (verbose == True): print('VERBOSE: Number of files in input directory (',num_files,\ ') is less than specified ensemble size of ', esize) sys.exit(2) if (num_files > esize): if me.get_rank() == 0 and (verbose == True): print('VERBOSE: Note that the number of files in ', input_dir, \ 'is greater than specified ensemble size of ', esize ,\ '\nwill just use the first ', esize, 'files') else: if me.get_rank() == 0: print('ERROR: Input directory: ', input_dir, ' not found') sys.exit(2) if opts_dict['cumul']: if opts_dict['regx']: in_files_list = get_cumul_filelist(opts_dict, opts_dict['indir'], opts_dict['regx']) in_files = me.partition(in_files_list, func=EqualLength(), involved=True) if me.get_rank() == 0 and (verbose == True): print('VERBOSE: in_files = ', in_files) # Check full file names in input directory (don't open yet) full_in_files = [] if me.get_rank() == 0 and opts_dict['verbose']: print('VERBOSE: Input files are: ') for onefile in in_files[0:esize]: fname = input_dir + '/' + onefile if me.get_rank() == 0 and opts_dict['verbose']: print(fname) if (os.path.isfile(fname)): full_in_files.append(fname) else: if me.get_rank() == 0: print("ERROR: Could not locate file ", fname, " => EXITING....") sys.exit() #open just the first file first_file = nc.Dataset(full_in_files[0], "r") # Store dimensions of the input fields if me.get_rank() == 0 and (verbose == True): print("VERBOSE: Getting spatial dimensions") nlev = -1 nilev = -1 ncol = -1 nlat = -1 nlon = -1 lonkey = '' latkey = '' # Look at first file and get dims input_dims = first_file.dimensions ndims = len(input_dims) for key in input_dims: if key == "lev": nlev = len(input_dims["lev"]) elif key == "ilev": nilev = len(input_dims["ilev"]) elif key == "ncol": ncol = len(input_dims["ncol"]) elif (key == "nlon") or (key == "lon"): nlon = len(input_dims[key]) lonkey = key elif (key == "nlat") or (key == "lat"): nlat = len(input_dims[key]) latkey = key if (nlev == -1): if me.get_rank() == 0: print( "ERROR: could not locate a valid dimension (lev) => EXITING...." ) sys.exit() if ((ncol == -1) and ((nlat == -1) or (nlon == -1))): if me.get_rank() == 0: print("ERROR: Need either lat/lon or ncol => EXITING....") sys.exit() # Check if this is SE or FV data if (ncol != -1): is_SE = True else: is_SE = False # output dimensions if me.get_rank() == 0 and (verbose == True): print('lev = ', nlev) if (is_SE == True): print('ncol = ', ncol) else: print('nlat = ', nlat) print('nlon = ', nlon) # Get 2d vars, 3d vars and all vars (For now include all variables) vars_dict_all = first_file.variables # Remove the excluded variables (specified in json file) from variable dictionary if exclude: vars_dict = vars_dict_all for i in ex_varlist: if i in vars_dict: del vars_dict[i] #Given an included var list, remove all the variables that are not on the list else: vars_dict = vars_dict_all.copy() for k, v in vars_dict_all.items(): if (k not in inc_varlist) and (vars_dict_all[k].typecode() == 'f'): del vars_dict[k] num_vars = len(vars_dict) str_size = 0 d2_var_names = [] d3_var_names = [] num_2d = 0 num_3d = 0 # Which are 2d, which are 3d and max str_size for k, v in vars_dict.items(): var = k vd = v.dimensions # all the variable's dimensions (names) vr = len(v.dimensions) # num dimension vs = v.shape # dim values is_2d = False is_3d = False if (is_SE == True): # (time, lev, ncol) or (time, ncol) if ((vr == 2) and (vs[1] == ncol)): is_2d = True num_2d += 1 elif ((vr == 3) and (vs[2] == ncol and vs[1] == nlev)): is_3d = True num_3d += 1 else: # (time, lev, nlon, nlon) or (time, nlat, nlon) if ((vr == 3) and (vs[1] == nlat and vs[2] == nlon)): is_2d = True num_2d += 1 elif ((vr == 4) and (vs[2] == nlat and vs[3] == nlon and (vs[1] == nlev or vs[1] == nilev))): is_3d = True num_3d += 1 if (is_3d == True): str_size = max(str_size, len(k)) d3_var_names.append(k) elif (is_2d == True): str_size = max(str_size, len(k)) d2_var_names.append(k) if me.get_rank() == 0 and (verbose == True): print('VERBOSE: Number of variables found: ', num_3d + num_2d) print('VERBOSE: 3D variables: ' + str(num_3d) + ', 2D variables: ' + str(num_2d)) # Now sort these and combine (this sorts caps first, then lower case - # which is what we want) d2_var_names.sort() d3_var_names.sort() if esize < num_2d + num_3d: if me.get_rank() == 0: print( "************************************************************************************************************************************" ) print(" ERROR: the total number of 3D and 2D variables " + str(num_2d + num_3d) + " is larger than the number of ensemble files " + str(esize)) print( " Cannot generate ensemble summary file, please remove more variables from your included variable list," ) print( " or add more variables in your excluded variable list => EXITING...." ) print( "************************************************************************************************************************************" ) sys.exit() # All vars is 3d vars first (sorted), the 2d vars all_var_names = list(d3_var_names) all_var_names += d2_var_names n_all_var_names = len(all_var_names) # Rank 0 - Create new summary ensemble file this_sumfile = opts_dict["sumfile"] #check if directory is valid sum_dir = os.path.dirname(this_sumfile) if len(sum_dir) == 0: sum_dir = '.' if (os.path.exists(sum_dir) == False): if me.get_rank() == 0: print('ERROR: Summary file directory: ', sum_dir, ' not found') sys.exit(2) if (me.get_rank() == 0): if (verbose == True): print("VERBOSE: Creating ", this_sumfile, " ...") if os.path.exists(this_sumfile): os.unlink(this_sumfile) nc_sumfile = nc.Dataset(this_sumfile, "w", format="NETCDF4_CLASSIC") # Set dimensions if (verbose == True): print("VERBOSE: Setting dimensions .....") if (is_SE == True): nc_sumfile.createDimension('ncol', ncol) else: nc_sumfile.createDimension('nlat', nlat) nc_sumfile.createDimension('nlon', nlon) nc_sumfile.createDimension('nlev', nlev) nc_sumfile.createDimension('ens_size', esize) nc_sumfile.createDimension('nvars', num_3d + num_2d) nc_sumfile.createDimension('nvars3d', num_3d) nc_sumfile.createDimension('nvars2d', num_2d) nc_sumfile.createDimension('str_size', str_size) # Set global attributes now = time.strftime("%c") if (verbose == True): print("VERBOSE: Setting global attributes .....") nc_sumfile.creation_date = now nc_sumfile.title = 'CAM verification ensemble summary file' nc_sumfile.tag = opts_dict["tag"] nc_sumfile.compset = opts_dict["compset"] nc_sumfile.resolution = opts_dict["res"] nc_sumfile.machine = opts_dict["mach"] # Create variables if (verbose == True): print("VERBOSE: Creating variables .....") v_lev = nc_sumfile.createVariable("lev", 'f8', ('nlev', )) v_vars = nc_sumfile.createVariable("vars", 'S1', ('nvars', 'str_size')) v_var3d = nc_sumfile.createVariable("var3d", 'S1', ('nvars3d', 'str_size')) v_var2d = nc_sumfile.createVariable("var2d", 'S1', ('nvars2d', 'str_size')) v_gm = nc_sumfile.createVariable("global_mean", 'f8', ('nvars', 'ens_size')) v_standardized_gm = nc_sumfile.createVariable("standardized_gm", 'f8', ('nvars', 'ens_size')) v_loadings_gm = nc_sumfile.createVariable('loadings_gm', 'f8', ('nvars', 'nvars')) v_mu_gm = nc_sumfile.createVariable('mu_gm', 'f8', ('nvars', )) v_sigma_gm = nc_sumfile.createVariable('sigma_gm', 'f8', ('nvars', )) v_sigma_scores_gm = nc_sumfile.createVariable('sigma_scores_gm', 'f8', ('nvars', )) # Assign vars, var3d and var2d if (verbose == True): print("VERBOSE: Assigning vars, var3d, and var2d .....") eq_all_var_names = [] eq_d3_var_names = [] eq_d2_var_names = [] l_eq = len(all_var_names) for i in range(l_eq): tt = list(all_var_names[i]) l_tt = len(tt) if (l_tt < str_size): extra = list(' ') * (str_size - l_tt) tt.extend(extra) eq_all_var_names.append(tt) l_eq = len(d3_var_names) for i in range(l_eq): tt = list(d3_var_names[i]) l_tt = len(tt) if (l_tt < str_size): extra = list(' ') * (str_size - l_tt) tt.extend(extra) eq_d3_var_names.append(tt) l_eq = len(d2_var_names) for i in range(l_eq): tt = list(d2_var_names[i]) l_tt = len(tt) if (l_tt < str_size): extra = list(' ') * (str_size - l_tt) tt.extend(extra) eq_d2_var_names.append(tt) v_vars[:] = eq_all_var_names[:] v_var3d[:] = eq_d3_var_names[:] v_var2d[:] = eq_d2_var_names[:] # Time-invarient metadata if (verbose == True): print("VERBOSE: Assigning time invariant metadata .....") # lev_data = np.zeros(num_lev,dtype=np.float64) lev_data = first_file.variables["lev"] v_lev[:] = lev_data[:] #end of rank=0 work # All: tslice = opts_dict['tslice'] if not opts_dict['cumul']: # Partition the var list var3_list_loc = me.partition(d3_var_names, func=EqualStride(), involved=True) var2_list_loc = me.partition(d2_var_names, func=EqualStride(), involved=True) else: var3_list_loc = d3_var_names var2_list_loc = d2_var_names #close first_file first_file.close() # Calculate global means # if me.get_rank() == 0 and (verbose == True): print("VERBOSE: Calculating global means .....") if not opts_dict['cumul']: gm3d, gm2d, var_list = pyEnsLib.generate_global_mean_for_summary( full_in_files, var3_list_loc, var2_list_loc, is_SE, False, opts_dict) if me.get_rank() == 0 and (verbose == True): print("VERBOSE: Finished calculating global means .....") #gather to rank = 0 if opts_dict['mpi_enable']: if not opts_dict['cumul']: # Gather the 3d variable results from all processors to the master processor slice_index = get_stride_list(len(d3_var_names), me) # Gather global means 3d results gm3d = gather_npArray(gm3d, me, slice_index, (len(d3_var_names), len(full_in_files))) # Gather 2d variable results from all processors to the master processor slice_index = get_stride_list(len(d2_var_names), me) # Gather global means 2d results gm2d = gather_npArray(gm2d, me, slice_index, (len(d2_var_names), len(full_in_files))) #gather variables ro exclude (in pre_pca) var_list = gather_list(var_list, me) else: gmall = np.concatenate((temp1, temp2), axis=0) gmall = pyEnsLib.gather_npArray_pop( gmall, me, (me.get_size(), len(d3_var_names) + len(d2_var_names))) # rank =0 : complete calculations for summary file if me.get_rank() == 0: if not opts_dict['cumul']: gmall = np.concatenate((gm3d, gm2d), axis=0) else: gmall_temp = np.transpose(gmall[:, :]) gmall = gmall_temp #PCA prep and calculation mu_gm, sigma_gm, standardized_global_mean, loadings_gm, scores_gm, b_exit = pyEnsLib.pre_PCA( gmall, all_var_names, var_list, me) #if PCA calc encounters an error, then remove the summary file and exit if b_exit: nc_sumfile.close() os.unlink(this_sumfile) print("STATUS: Summary could not be created.") sys.exit(2) v_gm[:, :] = gmall[:, :] v_standardized_gm[:, :] = standardized_global_mean[:, :] v_mu_gm[:] = mu_gm[:] v_sigma_gm[:] = sigma_gm[:] v_loadings_gm[:, :] = loadings_gm[:, :] v_sigma_scores_gm[:] = scores_gm[:] print("STATUS: Summary file is complete.") nc_sumfile.close()
def main(argv): # Get command line stuff and store in a dictionary s = 'nyear= nmonth= npert= tag= res= mach= compset= sumfile= indir= tslice= verbose jsonfile= mpi_enable mpi_disable nrand= rand seq= jsondir= esize=' optkeys = s.split() try: opts, args = getopt.getopt(argv, "h", optkeys) except getopt.GetoptError: pyEnsLib.EnsSumPop_usage() sys.exit(2) # Put command line options in a dictionary - also set defaults opts_dict = {} # Defaults opts_dict['tag'] = 'cesm2_1_0' opts_dict['compset'] = 'G' opts_dict['mach'] = 'cheyenne' opts_dict['tslice'] = 0 opts_dict['nyear'] = 1 opts_dict['nmonth'] = 12 opts_dict['esize'] = 40 opts_dict['npert'] = 0 #for backwards compatible opts_dict['nbin'] = 40 opts_dict['minrange'] = 0.0 opts_dict['maxrange'] = 4.0 opts_dict['res'] = 'T62_g17' opts_dict['sumfile'] = 'pop.ens.summary.nc' opts_dict['indir'] = './' opts_dict['jsonfile'] = 'pop_ensemble.json' opts_dict['verbose'] = True opts_dict['mpi_enable'] = True opts_dict['mpi_disable'] = False #opts_dict['zscoreonly'] = True opts_dict['popens'] = True opts_dict['nrand'] = 40 opts_dict['rand'] = False opts_dict['seq'] = 0 opts_dict['jsondir'] = './' # This creates the dictionary of input arguments #print "before parseconfig" opts_dict = pyEnsLib.getopt_parseconfig(opts, optkeys, 'ESP', opts_dict) verbose = opts_dict['verbose'] nbin = opts_dict['nbin'] if opts_dict['mpi_disable']: opts_dict['mpi_enable'] = False #still have npert for backwards compatibility - check if it was set #and override esize if opts_dict['npert'] > 0: user_size = opts_dict['npert'] print( 'WARNING: User specified value for --npert will override --esize. Please consider using --esize instead of --npert in the future.' ) opts_dict['esize'] = user_size # Now find file names in indir input_dir = opts_dict['indir'] # Create a mpi simplecomm object if opts_dict['mpi_enable']: me = simplecomm.create_comm() else: me = simplecomm.create_comm(False) if opts_dict['jsonfile']: # Read in the included var list Var2d, Var3d = pyEnsLib.read_jsonlist(opts_dict['jsonfile'], 'ESP') str_size = 0 for str in Var3d: if str_size < len(str): str_size = len(str) for str in Var2d: if str_size < len(str): str_size = len(str) if me.get_rank() == 0: print('STATUS: Running pyEnsSumPop!') if verbose: print("VERBOSE: opts_dict = ") print(opts_dict) in_files = [] if (os.path.exists(input_dir)): # Pick up the 'nrand' random number of input files to generate summary files if opts_dict['rand']: in_files = pyEnsLib.Random_pickup_pop(input_dir, opts_dict, opts_dict['nrand']) else: # Get the list of files in_files_temp = os.listdir(input_dir) in_files = sorted(in_files_temp) num_files = len(in_files) else: if me.get_rank() == 0: print('ERROR: Input directory: ', input_dir, ' not found => EXITING....') sys.exit(2) #make sure we have enough files files_needed = opts_dict['nmonth'] * opts_dict['esize'] * opts_dict['nyear'] if (num_files < files_needed): if me.get_rank() == 0: print( 'ERROR: Input directory does not contain enough files (must be esize*nyear*nmonth = ', files_needed, ' ) and it has only ', num_files, ' files).') sys.exit(2) #Partition the input file list (ideally we have one processor per month) in_file_list = me.partition(in_files, func=EqualStride(), involved=True) # Check the files in the input directory full_in_files = [] if me.get_rank() == 0 and opts_dict['verbose']: print('VERBOSE: Input files are:') for onefile in in_file_list: fname = input_dir + '/' + onefile if opts_dict['verbose']: print("my_rank = ", me.get_rank(), " ", fname) if (os.path.isfile(fname)): full_in_files.append(fname) else: print("ERROR: Could not locate file: " + fname + " => EXITING....") sys.exit() #open just the first file (all procs) first_file = nc.Dataset(full_in_files[0], "r") # Store dimensions of the input fields if (verbose == True) and me.get_rank() == 0: print("VERBOSE: Getting spatial dimensions") nlev = -1 nlat = -1 nlon = -1 # Look at first file and get dims input_dims = first_file.dimensions ndims = len(input_dims) # Make sure all files have the same dimensions if (verbose == True) and me.get_rank() == 0: print("VERBOSE: Checking dimensions ...") for key in input_dims: if key == "z_t": nlev = len(input_dims["z_t"]) elif key == "nlon": nlon = len(input_dims["nlon"]) elif key == "nlat": nlat = len(input_dims["nlat"]) # Rank 0: prepare new summary ensemble file this_sumfile = opts_dict["sumfile"] if (me.get_rank() == 0): if os.path.exists(this_sumfile): os.unlink(this_sumfile) if verbose: print("VERBOSE: Creating ", this_sumfile, " ...") nc_sumfile = nc.Dataset(this_sumfile, "w", format="NETCDF4_CLASSIC") # Set dimensions if verbose: print("VERBOSE: Setting dimensions .....") nc_sumfile.createDimension('nlat', nlat) nc_sumfile.createDimension('nlon', nlon) nc_sumfile.createDimension('nlev', nlev) nc_sumfile.createDimension('time', None) nc_sumfile.createDimension('ens_size', opts_dict['esize']) nc_sumfile.createDimension('nbin', opts_dict['nbin']) nc_sumfile.createDimension('nvars', len(Var3d) + len(Var2d)) nc_sumfile.createDimension('nvars3d', len(Var3d)) nc_sumfile.createDimension('nvars2d', len(Var2d)) nc_sumfile.createDimension('str_size', str_size) # Set global attributes now = time.strftime("%c") if verbose: print("VERBOSE: Setting global attributes .....") nc_sumfile.creation_date = now nc_sumfile.title = 'POP verification ensemble summary file' nc_sumfile.tag = opts_dict["tag"] nc_sumfile.compset = opts_dict["compset"] nc_sumfile.resolution = opts_dict["res"] nc_sumfile.machine = opts_dict["mach"] # Create variables if verbose: print("VERBOSE: Creating variables .....") v_lev = nc_sumfile.createVariable("z_t", 'f', ('nlev', )) v_vars = nc_sumfile.createVariable("vars", 'S1', ('nvars', 'str_size')) v_var3d = nc_sumfile.createVariable("var3d", 'S1', ('nvars3d', 'str_size')) v_var2d = nc_sumfile.createVariable("var2d", 'S1', ('nvars2d', 'str_size')) v_time = nc_sumfile.createVariable("time", 'd', ('time', )) v_ens_avg3d = nc_sumfile.createVariable( "ens_avg3d", 'f', ('time', 'nvars3d', 'nlev', 'nlat', 'nlon')) v_ens_stddev3d = nc_sumfile.createVariable( "ens_stddev3d", 'f', ('time', 'nvars3d', 'nlev', 'nlat', 'nlon')) v_ens_avg2d = nc_sumfile.createVariable( "ens_avg2d", 'f', ('time', 'nvars2d', 'nlat', 'nlon')) v_ens_stddev2d = nc_sumfile.createVariable( "ens_stddev2d", 'f', ('time', 'nvars2d', 'nlat', 'nlon')) v_RMSZ = nc_sumfile.createVariable( "RMSZ", 'f', ('time', 'nvars', 'ens_size', 'nbin')) # Assign vars, var3d and var2d if verbose: print("VERBOSE: Assigning vars, var3d, and var2d .....") eq_all_var_names = [] eq_d3_var_names = [] eq_d2_var_names = [] all_var_names = list(Var3d) all_var_names += Var2d l_eq = len(all_var_names) for i in range(l_eq): tt = list(all_var_names[i]) l_tt = len(tt) if (l_tt < str_size): extra = list(' ') * (str_size - l_tt) tt.extend(extra) eq_all_var_names.append(tt) l_eq = len(Var3d) for i in range(l_eq): tt = list(Var3d[i]) l_tt = len(tt) if (l_tt < str_size): extra = list(' ') * (str_size - l_tt) tt.extend(extra) eq_d3_var_names.append(tt) l_eq = len(Var2d) for i in range(l_eq): tt = list(Var2d[i]) l_tt = len(tt) if (l_tt < str_size): extra = list(' ') * (str_size - l_tt) tt.extend(extra) eq_d2_var_names.append(tt) v_vars[:] = eq_all_var_names[:] v_var3d[:] = eq_d3_var_names[:] v_var2d[:] = eq_d2_var_names[:] # Time-invarient metadata if verbose: print("VERBOSE: Assigning time invariant metadata .....") vars_dict = first_file.variables lev_data = vars_dict["z_t"] v_lev[:] = lev_data[:] #end of rank 0 #All: # Time-varient metadata if verbose: if me.get_rank() == 0: print("VERBOSE: Assigning time variant metadata .....") vars_dict = first_file.variables time_value = vars_dict['time'] time_array = np.array([time_value]) time_array = pyEnsLib.gather_npArray_pop(time_array, me, (me.get_size(), )) if me.get_rank() == 0: v_time[:] = time_array[:] #Assign zero values to first time slice of RMSZ and avg and stddev for 2d & 3d #in case of a calculation problem before finishing e_size = opts_dict['esize'] b_size = opts_dict['nbin'] z_ens_avg3d = np.zeros((len(Var3d), nlev, nlat, nlon), dtype=np.float32) z_ens_stddev3d = np.zeros((len(Var3d), nlev, nlat, nlon), dtype=np.float32) z_ens_avg2d = np.zeros((len(Var2d), nlat, nlon), dtype=np.float32) z_ens_stddev2d = np.zeros((len(Var2d), nlat, nlon), dtype=np.float32) z_RMSZ = np.zeros(((len(Var3d) + len(Var2d)), e_size, b_size), dtype=np.float32) #rank 0 (put zero values in summary file) if me.get_rank() == 0: v_RMSZ[0, :, :, :] = z_RMSZ[:, :, :] v_ens_avg3d[0, :, :, :, :] = z_ens_avg3d[:, :, :, :] v_ens_stddev3d[0, :, :, :, :] = z_ens_stddev3d[:, :, :, :] v_ens_avg2d[0, :, :, :] = z_ens_avg2d[:, :, :] v_ens_stddev2d[0, :, :, :] = z_ens_stddev2d[:, :, :] #close file[0] first_file.close() # Calculate RMSZ scores if (verbose == True and me.get_rank() == 0): print("VERBOSE: Calculating RMSZ scores .....") zscore3d, zscore2d, ens_avg3d, ens_stddev3d, ens_avg2d, ens_stddev2d = pyEnsLib.calc_rmsz( full_in_files, Var3d, Var2d, opts_dict) if (verbose == True and me.get_rank() == 0): print("VERBOSE: Finished with RMSZ scores .....") # Collect from all processors if opts_dict['mpi_enable']: # Gather the 3d variable results from all processors to the master processor zmall = np.concatenate((zscore3d, zscore2d), axis=0) zmall = pyEnsLib.gather_npArray_pop( zmall, me, (me.get_size(), len(Var3d) + len(Var2d), len(full_in_files), nbin)) ens_avg3d = pyEnsLib.gather_npArray_pop( ens_avg3d, me, (me.get_size(), len(Var3d), nlev, (nlat), nlon)) ens_avg2d = pyEnsLib.gather_npArray_pop(ens_avg2d, me, (me.get_size(), len(Var2d), (nlat), nlon)) ens_stddev3d = pyEnsLib.gather_npArray_pop( ens_stddev3d, me, (me.get_size(), len(Var3d), nlev, (nlat), nlon)) ens_stddev2d = pyEnsLib.gather_npArray_pop(ens_stddev2d, me, (me.get_size(), len(Var2d), (nlat), nlon)) # Assign to summary file: if me.get_rank() == 0: v_RMSZ[:, :, :, :] = zmall[:, :, :, :] v_ens_avg3d[:, :, :, :, :] = ens_avg3d[:, :, :, :, :] v_ens_stddev3d[:, :, :, :, :] = ens_stddev3d[:, :, :, :, :] v_ens_avg2d[:, :, :, :] = ens_avg2d[:, :, :, :] v_ens_stddev2d[:, :, :, :] = ens_stddev2d[:, :, :, :] print("STATUS: PyEnsSumPop has completed.") nc_sumfile.close()
#!/usr/bin/env python from asaptools import simplecomm scomm = simplecomm.create_comm() rank = scomm.get_rank() size = scomm.get_size() if scomm.is_manager(): l = range(10) for i in l: scomm.ration(i) print '{0}/{1}: Sent {2!r}'.format(rank, size, i) for i in range(scomm.get_size() - 1): scomm.ration(None) print '{0}/{1}: Sent None'.format(rank, size) else: i = -1 while i is not None: i = scomm.ration() print '{0}/{1}: Recvd {2!r}'.format(rank, size, i) print '{0}/{1}: Out of loop'.format(rank, size) scomm.sync() if scomm.is_manager():