def testPartitionList(self): data = range(5 + self.rank) sresult = self.scomm.partition(data, func=EqualStride()) presult = self.pcomm.partition(data, func=EqualStride()) msg = test_info_msg('partition(list)', data, sresult, presult) print(msg) self.assertEqual(sresult, presult, msg)
def testPartitionListInvolved(self): data = range(5 + self.rank) sresult = self.scomm.partition(data, func=EqualStride(), involved=True) presult = self.pcomm.partition(data, func=EqualStride(), involved=True) msg = test_info_msg('partition(list, T)', data, sresult, presult) print msg self.assertEqual(sresult, presult, msg)
def testPartitionArrayInvolved(self): if self.gcomm.is_manager(): data = np.arange(10) else: data = None actual = self.gcomm.partition(data, func=EqualStride(), involved=True) expected = np.arange(self.rank, 10, self.size) np.testing.assert_array_equal(actual, expected)
def testPartitionArrayInvolved(self): if self.gcomm.is_manager(): data = np.arange(10) else: data = None actual = self.gcomm.partition(data, func=EqualStride(), involved=True) expected = np.arange(self.rank, 10, self.size) msg = test_info_msg(self.rank, self.size, 'partition(array, T)', data, actual, expected) print msg np.testing.assert_array_equal(actual, expected, msg)
def testMonoPartitionListInvolved(self): if self.monocomm.is_manager(): data = list(range(10 + self.grank)) else: data = None actual = self.monocomm.partition(data, func=EqualStride(), involved=True) expected = list( range(self.rank, 10 + self.color, self.monocomm.get_size())) self.assertEqual(actual, expected)
def testMonoPartitionListInvolved(self): if self.monocomm.is_manager(): data = range(10 + self.grank) else: data = None actual = self.monocomm.partition(data, func=EqualStride(), involved=True) expected = range(self.rank, 10 + self.color, self.monocomm.get_size()) msg = test_info_msg(self.grank, self.gsize, 'mono.partition(list,T)', data, actual, expected) print msg self.assertEqual(actual, expected, msg)
def testPartitionArray(self): if self.gcomm.is_manager(): data = np.arange(10) else: data = None actual = self.gcomm.partition(data, func=EqualStride()) if self.gcomm.is_manager(): expected = None else: expected = np.arange(self.rank - 1, 10, self.size - 1) if self.gcomm.is_manager(): self.assertEqual(actual, expected) else: np.testing.assert_array_equal(actual, expected)
def testMultiPartitionListInvolved(self): if self.multicomm.is_manager(): data = list(range(10 + self.grank)) else: data = None actual = self.multicomm.partition(data, func=EqualStride(), involved=True) expected = list( range(self.color, 10 + self.rank * len(self.groups), self.multicomm.get_size())) msg = test_info_msg(self.grank, self.gsize, 'multi.partition(list,T)', data, actual, expected) print(msg) self.assertEqual(actual, expected, msg)
def testPartitionStrArray(self): indata = list('abcdefghi') if self.gcomm.is_manager(): data = np.array(indata) else: data = None actual = self.gcomm.partition(data, func=EqualStride()) if self.gcomm.is_manager(): expected = None else: expected = np.array(indata[self.rank - 1::self.size - 1]) if self.gcomm.is_manager(): self.assertEqual(actual, expected) else: np.testing.assert_array_equal(actual, expected)
def testPartitionArray(self): if self.gcomm.is_manager(): data = np.arange(10) else: data = None actual = self.gcomm.partition(data, func=EqualStride()) if self.gcomm.is_manager(): expected = None else: expected = np.arange(self.rank - 1, 10, self.size - 1) msg = test_info_msg(self.rank, self.size, 'partition(array)', data, actual, expected) print msg if self.gcomm.is_manager(): self.assertEqual(actual, expected, msg) else: np.testing.assert_array_equal(actual, expected, msg)
def testPartitionStrArray(self): indata = list('abcdefghi') if self.gcomm.is_manager(): data = data = np.array(indata) else: data = None actual = self.gcomm.partition(data, func=EqualStride()) if self.gcomm.is_manager(): expected = None else: expected = np.array(indata[self.rank - 1::self.size - 1]) msg = test_info_msg(self.rank, self.size, 'partition(string-array)', data, actual, expected) print msg if self.gcomm.is_manager(): self.assertEqual(actual, expected, msg) else: np.testing.assert_array_equal(actual, expected, msg)
def main(argv): # Get command line stuff and store in a dictionary s = 'tag= compset= esize= tslice= res= sumfile= indir= sumfiledir= mach= verbose jsonfile= mpi_enable maxnorm gmonly popens cumul regx= startMon= endMon= fIndex= mpi_disable' optkeys = s.split() try: opts, args = getopt.getopt(argv, "h", optkeys) except getopt.GetoptError: pyEnsLib.EnsSum_usage() sys.exit(2) # Put command line options in a dictionary - also set defaults opts_dict = {} # Defaults opts_dict['tag'] = 'cesm2_0' opts_dict['compset'] = 'F2000climo' opts_dict['mach'] = 'cheyenne' opts_dict['esize'] = 350 opts_dict['tslice'] = 1 opts_dict['res'] = 'f19_f19' opts_dict['sumfile'] = 'ens.summary.nc' opts_dict['indir'] = './' opts_dict['sumfiledir'] = './' opts_dict['jsonfile'] = 'exclude_empty.json' opts_dict['verbose'] = False opts_dict['mpi_enable'] = True opts_dict['mpi_disable'] = False opts_dict['maxnorm'] = False opts_dict['gmonly'] = True opts_dict['popens'] = False opts_dict['cumul'] = False opts_dict['regx'] = 'test' opts_dict['startMon'] = 1 opts_dict['endMon'] = 1 opts_dict['fIndex'] = 151 # This creates the dictionary of input arguments opts_dict = pyEnsLib.getopt_parseconfig(opts, optkeys, 'ES', opts_dict) verbose = opts_dict['verbose'] st = opts_dict['esize'] esize = int(st) if opts_dict['popens'] == True: print( "ERROR: Please use pyEnsSumPop.py for a POP ensemble (not --popens) => EXITING...." ) sys.exit() if not (opts_dict['tag'] and opts_dict['compset'] and opts_dict['mach'] or opts_dict['res']): print( 'ERROR: Please specify --tag, --compset, --mach and --res options => EXITING....' ) sys.exit() if opts_dict['mpi_disable'] == True: opts_dict['mpi_enable'] = False # Now find file names in indir input_dir = opts_dict['indir'] # The var list that will be excluded ex_varlist = [] inc_varlist = [] # Create a mpi simplecomm object if opts_dict['mpi_enable']: me = simplecomm.create_comm() else: me = simplecomm.create_comm(not opts_dict['mpi_enable']) if me.get_rank() == 0: print('STATUS: Running pyEnsSum.py') if me.get_rank() == 0 and (verbose == True): print(opts_dict) print('STATUS: Ensemble size for summary = ', esize) exclude = False if me.get_rank() == 0: if opts_dict['jsonfile']: inc_varlist = [] # Read in the excluded or included var list ex_varlist, exclude = pyEnsLib.read_jsonlist( opts_dict['jsonfile'], 'ES') if exclude == False: inc_varlist = ex_varlist ex_varlist = [] # Broadcast the excluded var list to each processor if opts_dict['mpi_enable']: exclude = me.partition(exclude, func=Duplicate(), involved=True) if exclude: ex_varlist = me.partition(ex_varlist, func=Duplicate(), involved=True) else: inc_varlist = me.partition(inc_varlist, func=Duplicate(), involved=True) in_files = [] if (os.path.exists(input_dir)): # Get the list of files in_files_temp = os.listdir(input_dir) in_files = sorted(in_files_temp) # Make sure we have enough num_files = len(in_files) if me.get_rank() == 0 and (verbose == True): print('VERBOSE: Number of files in input directory = ', num_files) if (num_files < esize): if me.get_rank() == 0 and (verbose == True): print('VERBOSE: Number of files in input directory (',num_files,\ ') is less than specified ensemble size of ', esize) sys.exit(2) if (num_files > esize): if me.get_rank() == 0 and (verbose == True): print('VERBOSE: Note that the number of files in ', input_dir, \ 'is greater than specified ensemble size of ', esize ,\ '\nwill just use the first ', esize, 'files') else: if me.get_rank() == 0: print('ERROR: Input directory: ', input_dir, ' not found') sys.exit(2) if opts_dict['cumul']: if opts_dict['regx']: in_files_list = get_cumul_filelist(opts_dict, opts_dict['indir'], opts_dict['regx']) in_files = me.partition(in_files_list, func=EqualLength(), involved=True) if me.get_rank() == 0 and (verbose == True): print('VERBOSE: in_files = ', in_files) # Check full file names in input directory (don't open yet) full_in_files = [] if me.get_rank() == 0 and opts_dict['verbose']: print('VERBOSE: Input files are: ') for onefile in in_files[0:esize]: fname = input_dir + '/' + onefile if me.get_rank() == 0 and opts_dict['verbose']: print(fname) if (os.path.isfile(fname)): full_in_files.append(fname) else: if me.get_rank() == 0: print("ERROR: Could not locate file ", fname, " => EXITING....") sys.exit() #open just the first file first_file = nc.Dataset(full_in_files[0], "r") # Store dimensions of the input fields if me.get_rank() == 0 and (verbose == True): print("VERBOSE: Getting spatial dimensions") nlev = -1 nilev = -1 ncol = -1 nlat = -1 nlon = -1 lonkey = '' latkey = '' # Look at first file and get dims input_dims = first_file.dimensions ndims = len(input_dims) for key in input_dims: if key == "lev": nlev = len(input_dims["lev"]) elif key == "ilev": nilev = len(input_dims["ilev"]) elif key == "ncol": ncol = len(input_dims["ncol"]) elif (key == "nlon") or (key == "lon"): nlon = len(input_dims[key]) lonkey = key elif (key == "nlat") or (key == "lat"): nlat = len(input_dims[key]) latkey = key if (nlev == -1): if me.get_rank() == 0: print( "ERROR: could not locate a valid dimension (lev) => EXITING...." ) sys.exit() if ((ncol == -1) and ((nlat == -1) or (nlon == -1))): if me.get_rank() == 0: print("ERROR: Need either lat/lon or ncol => EXITING....") sys.exit() # Check if this is SE or FV data if (ncol != -1): is_SE = True else: is_SE = False # output dimensions if me.get_rank() == 0 and (verbose == True): print('lev = ', nlev) if (is_SE == True): print('ncol = ', ncol) else: print('nlat = ', nlat) print('nlon = ', nlon) # Get 2d vars, 3d vars and all vars (For now include all variables) vars_dict_all = first_file.variables # Remove the excluded variables (specified in json file) from variable dictionary if exclude: vars_dict = vars_dict_all for i in ex_varlist: if i in vars_dict: del vars_dict[i] #Given an included var list, remove all the variables that are not on the list else: vars_dict = vars_dict_all.copy() for k, v in vars_dict_all.items(): if (k not in inc_varlist) and (vars_dict_all[k].typecode() == 'f'): del vars_dict[k] num_vars = len(vars_dict) str_size = 0 d2_var_names = [] d3_var_names = [] num_2d = 0 num_3d = 0 # Which are 2d, which are 3d and max str_size for k, v in vars_dict.items(): var = k vd = v.dimensions # all the variable's dimensions (names) vr = len(v.dimensions) # num dimension vs = v.shape # dim values is_2d = False is_3d = False if (is_SE == True): # (time, lev, ncol) or (time, ncol) if ((vr == 2) and (vs[1] == ncol)): is_2d = True num_2d += 1 elif ((vr == 3) and (vs[2] == ncol and vs[1] == nlev)): is_3d = True num_3d += 1 else: # (time, lev, nlon, nlon) or (time, nlat, nlon) if ((vr == 3) and (vs[1] == nlat and vs[2] == nlon)): is_2d = True num_2d += 1 elif ((vr == 4) and (vs[2] == nlat and vs[3] == nlon and (vs[1] == nlev or vs[1] == nilev))): is_3d = True num_3d += 1 if (is_3d == True): str_size = max(str_size, len(k)) d3_var_names.append(k) elif (is_2d == True): str_size = max(str_size, len(k)) d2_var_names.append(k) if me.get_rank() == 0 and (verbose == True): print('VERBOSE: Number of variables found: ', num_3d + num_2d) print('VERBOSE: 3D variables: ' + str(num_3d) + ', 2D variables: ' + str(num_2d)) # Now sort these and combine (this sorts caps first, then lower case - # which is what we want) d2_var_names.sort() d3_var_names.sort() if esize < num_2d + num_3d: if me.get_rank() == 0: print( "************************************************************************************************************************************" ) print(" ERROR: the total number of 3D and 2D variables " + str(num_2d + num_3d) + " is larger than the number of ensemble files " + str(esize)) print( " Cannot generate ensemble summary file, please remove more variables from your included variable list," ) print( " or add more variables in your excluded variable list => EXITING...." ) print( "************************************************************************************************************************************" ) sys.exit() # All vars is 3d vars first (sorted), the 2d vars all_var_names = list(d3_var_names) all_var_names += d2_var_names n_all_var_names = len(all_var_names) # Rank 0 - Create new summary ensemble file this_sumfile = opts_dict["sumfile"] #check if directory is valid sum_dir = os.path.dirname(this_sumfile) if len(sum_dir) == 0: sum_dir = '.' if (os.path.exists(sum_dir) == False): if me.get_rank() == 0: print('ERROR: Summary file directory: ', sum_dir, ' not found') sys.exit(2) if (me.get_rank() == 0): if (verbose == True): print("VERBOSE: Creating ", this_sumfile, " ...") if os.path.exists(this_sumfile): os.unlink(this_sumfile) nc_sumfile = nc.Dataset(this_sumfile, "w", format="NETCDF4_CLASSIC") # Set dimensions if (verbose == True): print("VERBOSE: Setting dimensions .....") if (is_SE == True): nc_sumfile.createDimension('ncol', ncol) else: nc_sumfile.createDimension('nlat', nlat) nc_sumfile.createDimension('nlon', nlon) nc_sumfile.createDimension('nlev', nlev) nc_sumfile.createDimension('ens_size', esize) nc_sumfile.createDimension('nvars', num_3d + num_2d) nc_sumfile.createDimension('nvars3d', num_3d) nc_sumfile.createDimension('nvars2d', num_2d) nc_sumfile.createDimension('str_size', str_size) # Set global attributes now = time.strftime("%c") if (verbose == True): print("VERBOSE: Setting global attributes .....") nc_sumfile.creation_date = now nc_sumfile.title = 'CAM verification ensemble summary file' nc_sumfile.tag = opts_dict["tag"] nc_sumfile.compset = opts_dict["compset"] nc_sumfile.resolution = opts_dict["res"] nc_sumfile.machine = opts_dict["mach"] # Create variables if (verbose == True): print("VERBOSE: Creating variables .....") v_lev = nc_sumfile.createVariable("lev", 'f8', ('nlev', )) v_vars = nc_sumfile.createVariable("vars", 'S1', ('nvars', 'str_size')) v_var3d = nc_sumfile.createVariable("var3d", 'S1', ('nvars3d', 'str_size')) v_var2d = nc_sumfile.createVariable("var2d", 'S1', ('nvars2d', 'str_size')) v_gm = nc_sumfile.createVariable("global_mean", 'f8', ('nvars', 'ens_size')) v_standardized_gm = nc_sumfile.createVariable("standardized_gm", 'f8', ('nvars', 'ens_size')) v_loadings_gm = nc_sumfile.createVariable('loadings_gm', 'f8', ('nvars', 'nvars')) v_mu_gm = nc_sumfile.createVariable('mu_gm', 'f8', ('nvars', )) v_sigma_gm = nc_sumfile.createVariable('sigma_gm', 'f8', ('nvars', )) v_sigma_scores_gm = nc_sumfile.createVariable('sigma_scores_gm', 'f8', ('nvars', )) # Assign vars, var3d and var2d if (verbose == True): print("VERBOSE: Assigning vars, var3d, and var2d .....") eq_all_var_names = [] eq_d3_var_names = [] eq_d2_var_names = [] l_eq = len(all_var_names) for i in range(l_eq): tt = list(all_var_names[i]) l_tt = len(tt) if (l_tt < str_size): extra = list(' ') * (str_size - l_tt) tt.extend(extra) eq_all_var_names.append(tt) l_eq = len(d3_var_names) for i in range(l_eq): tt = list(d3_var_names[i]) l_tt = len(tt) if (l_tt < str_size): extra = list(' ') * (str_size - l_tt) tt.extend(extra) eq_d3_var_names.append(tt) l_eq = len(d2_var_names) for i in range(l_eq): tt = list(d2_var_names[i]) l_tt = len(tt) if (l_tt < str_size): extra = list(' ') * (str_size - l_tt) tt.extend(extra) eq_d2_var_names.append(tt) v_vars[:] = eq_all_var_names[:] v_var3d[:] = eq_d3_var_names[:] v_var2d[:] = eq_d2_var_names[:] # Time-invarient metadata if (verbose == True): print("VERBOSE: Assigning time invariant metadata .....") # lev_data = np.zeros(num_lev,dtype=np.float64) lev_data = first_file.variables["lev"] v_lev[:] = lev_data[:] #end of rank=0 work # All: tslice = opts_dict['tslice'] if not opts_dict['cumul']: # Partition the var list var3_list_loc = me.partition(d3_var_names, func=EqualStride(), involved=True) var2_list_loc = me.partition(d2_var_names, func=EqualStride(), involved=True) else: var3_list_loc = d3_var_names var2_list_loc = d2_var_names #close first_file first_file.close() # Calculate global means # if me.get_rank() == 0 and (verbose == True): print("VERBOSE: Calculating global means .....") if not opts_dict['cumul']: gm3d, gm2d, var_list = pyEnsLib.generate_global_mean_for_summary( full_in_files, var3_list_loc, var2_list_loc, is_SE, False, opts_dict) if me.get_rank() == 0 and (verbose == True): print("VERBOSE: Finished calculating global means .....") #gather to rank = 0 if opts_dict['mpi_enable']: if not opts_dict['cumul']: # Gather the 3d variable results from all processors to the master processor slice_index = get_stride_list(len(d3_var_names), me) # Gather global means 3d results gm3d = gather_npArray(gm3d, me, slice_index, (len(d3_var_names), len(full_in_files))) # Gather 2d variable results from all processors to the master processor slice_index = get_stride_list(len(d2_var_names), me) # Gather global means 2d results gm2d = gather_npArray(gm2d, me, slice_index, (len(d2_var_names), len(full_in_files))) #gather variables ro exclude (in pre_pca) var_list = gather_list(var_list, me) else: gmall = np.concatenate((temp1, temp2), axis=0) gmall = pyEnsLib.gather_npArray_pop( gmall, me, (me.get_size(), len(d3_var_names) + len(d2_var_names))) # rank =0 : complete calculations for summary file if me.get_rank() == 0: if not opts_dict['cumul']: gmall = np.concatenate((gm3d, gm2d), axis=0) else: gmall_temp = np.transpose(gmall[:, :]) gmall = gmall_temp #PCA prep and calculation mu_gm, sigma_gm, standardized_global_mean, loadings_gm, scores_gm, b_exit = pyEnsLib.pre_PCA( gmall, all_var_names, var_list, me) #if PCA calc encounters an error, then remove the summary file and exit if b_exit: nc_sumfile.close() os.unlink(this_sumfile) print("STATUS: Summary could not be created.") sys.exit(2) v_gm[:, :] = gmall[:, :] v_standardized_gm[:, :] = standardized_global_mean[:, :] v_mu_gm[:] = mu_gm[:] v_sigma_gm[:] = sigma_gm[:] v_loadings_gm[:, :] = loadings_gm[:, :] v_sigma_scores_gm[:] = scores_gm[:] print("STATUS: Summary file is complete.") nc_sumfile.close()
def main(argv): print 'Running pyEnsSumPop!' # Get command line stuff and store in a dictionary s = 'nyear= nmonth= npert= tag= res= mach= compset= sumfile= indir= tslice= verbose jsonfile= mpi_enable zscoreonly nrand= rand seq= jsondir=' optkeys = s.split() try: opts, args = getopt.getopt(argv, "h", optkeys) except getopt.GetoptError: pyEnsLib.EnsSumPop_usage() sys.exit(2) # Put command line options in a dictionary - also set defaults opts_dict = {} # Defaults opts_dict['tag'] = 'cesm1_2_0' opts_dict['compset'] = 'FC5' opts_dict['mach'] = 'yellowstone' opts_dict['tslice'] = 0 opts_dict['nyear'] = 3 opts_dict['nmonth'] = 12 opts_dict['npert'] = 40 opts_dict['nbin'] = 40 opts_dict['minrange'] = 0.0 opts_dict['maxrange'] = 4.0 opts_dict['res'] = 'ne30_ne30' opts_dict['sumfile'] = 'ens.pop.summary.nc' opts_dict['indir'] = './' opts_dict['jsonfile'] = '' opts_dict['verbose'] = True opts_dict['mpi_enable'] = False opts_dict['zscoreonly'] = False opts_dict['popens'] = True opts_dict['nrand'] = 40 opts_dict['rand'] = False opts_dict['seq'] = 0 opts_dict['jsondir'] = '/glade/scratch/haiyingx/' # This creates the dictionary of input arguments print "before parseconfig" opts_dict = pyEnsLib.getopt_parseconfig(opts, optkeys, 'ESP', opts_dict) verbose = opts_dict['verbose'] nbin = opts_dict['nbin'] if verbose: print opts_dict # Now find file names in indir input_dir = opts_dict['indir'] # Create a mpi simplecomm object if opts_dict['mpi_enable']: me = simplecomm.create_comm() else: me = simplecomm.create_comm(not opts_dict['mpi_enable']) if opts_dict['jsonfile']: # Read in the included var list Var2d, Var3d = pyEnsLib.read_jsonlist(opts_dict['jsonfile'], 'ESP') str_size = 0 for str in Var3d: if str_size < len(str): str_size = len(str) for str in Var2d: if str_size < len(str): str_size = len(str) in_files = [] if (os.path.exists(input_dir)): # Pick up the 'nrand' random number of input files to generate summary files if opts_dict['rand']: in_files = pyEnsLib.Random_pickup_pop(input_dir, opts_dict, opts_dict['nrand']) else: # Get the list of files in_files_temp = os.listdir(input_dir) in_files = sorted(in_files_temp) # Make sure we have enough num_files = len(in_files) else: print 'Input directory: ', input_dir, ' not found' sys.exit(2) # Create a mpi simplecomm object if opts_dict['mpi_enable']: me = simplecomm.create_comm() else: me = simplecomm.create_comm(not opts_dict['mpi_enable']) #Partition the input file list in_file_list = me.partition(in_files, func=EqualStride(), involved=True) # Open the files in the input directory o_files = [] for onefile in in_file_list: if (os.path.isfile(input_dir + '/' + onefile)): o_files.append(Nio.open_file(input_dir + '/' + onefile, "r")) else: print "COULD NOT LOCATE FILE " + input_dir + onefile + "! EXITING...." sys.exit() print in_file_list # Store dimensions of the input fields if (verbose == True): print "Getting spatial dimensions" nlev = -1 nlat = -1 nlon = -1 # Look at first file and get dims input_dims = o_files[0].dimensions ndims = len(input_dims) # Make sure all files have the same dimensions for key in input_dims: if key == "z_t": nlev = input_dims["z_t"] elif key == "nlon": nlon = input_dims["nlon"] elif key == "nlat": nlat = input_dims["nlat"] for count, this_file in enumerate(o_files): input_dims = this_file.dimensions if ( nlev != int(input_dims["z_t"]) or ( nlat != int(input_dims["nlat"]))\ or ( nlon != int(input_dims["nlon"]))): print "Dimension mismatch between ", in_file_list[ 0], 'and', in_file_list[count], '!!!' sys.exit() # Create new summary ensemble file this_sumfile = opts_dict["sumfile"] if verbose: print "Creating ", this_sumfile, " ..." if (me.get_rank() == 0): if os.path.exists(this_sumfile): os.unlink(this_sumfile) opt = Nio.options() opt.PreFill = False opt.Format = 'NetCDF4Classic' nc_sumfile = Nio.open_file(this_sumfile, 'w', options=opt) # Set dimensions if (verbose == True): print "Setting dimensions ....." nc_sumfile.create_dimension('nlat', nlat) nc_sumfile.create_dimension('nlon', nlon) nc_sumfile.create_dimension('nlev', nlev) nc_sumfile.create_dimension('time', None) nc_sumfile.create_dimension('ens_size', opts_dict['npert']) nc_sumfile.create_dimension('nbin', opts_dict['nbin']) nc_sumfile.create_dimension('nvars', len(Var3d) + len(Var2d)) nc_sumfile.create_dimension('nvars3d', len(Var3d)) nc_sumfile.create_dimension('nvars2d', len(Var2d)) nc_sumfile.create_dimension('str_size', str_size) # Set global attributes now = time.strftime("%c") if (verbose == True): print "Setting global attributes ....." setattr(nc_sumfile, 'creation_date', now) setattr(nc_sumfile, 'title', 'POP verification ensemble summary file') setattr(nc_sumfile, 'tag', opts_dict["tag"]) setattr(nc_sumfile, 'compset', opts_dict["compset"]) setattr(nc_sumfile, 'resolution', opts_dict["res"]) setattr(nc_sumfile, 'machine', opts_dict["mach"]) # Create variables if (verbose == True): print "Creating variables ....." v_lev = nc_sumfile.create_variable("lev", 'f', ('nlev', )) v_vars = nc_sumfile.create_variable("vars", 'S1', ('nvars', 'str_size')) v_var3d = nc_sumfile.create_variable("var3d", 'S1', ('nvars3d', 'str_size')) v_var2d = nc_sumfile.create_variable("var2d", 'S1', ('nvars2d', 'str_size')) v_time = nc_sumfile.create_variable("time", 'd', ('time', )) v_ens_avg3d = nc_sumfile.create_variable( "ens_avg3d", 'f', ('time', 'nvars3d', 'nlev', 'nlat', 'nlon')) v_ens_stddev3d = nc_sumfile.create_variable( "ens_stddev3d", 'f', ('time', 'nvars3d', 'nlev', 'nlat', 'nlon')) v_ens_avg2d = nc_sumfile.create_variable( "ens_avg2d", 'f', ('time', 'nvars2d', 'nlat', 'nlon')) v_ens_stddev2d = nc_sumfile.create_variable( "ens_stddev2d", 'f', ('time', 'nvars2d', 'nlat', 'nlon')) v_RMSZ = nc_sumfile.create_variable( "RMSZ", 'f', ('time', 'nvars', 'ens_size', 'nbin')) if not opts_dict['zscoreonly']: v_gm = nc_sumfile.create_variable("global_mean", 'f', ('time', 'nvars', 'ens_size')) # Assign vars, var3d and var2d if (verbose == True): print "Assigning vars, var3d, and var2d ....." eq_all_var_names = [] eq_d3_var_names = [] eq_d2_var_names = [] all_var_names = list(Var3d) all_var_names += Var2d l_eq = len(all_var_names) for i in range(l_eq): tt = list(all_var_names[i]) l_tt = len(tt) if (l_tt < str_size): extra = list(' ') * (str_size - l_tt) tt.extend(extra) eq_all_var_names.append(tt) l_eq = len(Var3d) for i in range(l_eq): tt = list(Var3d[i]) l_tt = len(tt) if (l_tt < str_size): extra = list(' ') * (str_size - l_tt) tt.extend(extra) eq_d3_var_names.append(tt) l_eq = len(Var2d) for i in range(l_eq): tt = list(Var2d[i]) l_tt = len(tt) if (l_tt < str_size): extra = list(' ') * (str_size - l_tt) tt.extend(extra) eq_d2_var_names.append(tt) v_vars[:] = eq_all_var_names[:] v_var3d[:] = eq_d3_var_names[:] v_var2d[:] = eq_d2_var_names[:] # Time-invarient metadata if (verbose == True): print "Assigning time invariant metadata ....." vars_dict = o_files[0].variables lev_data = vars_dict["z_t"] v_lev = lev_data # Time-varient metadata if verbose: print "Assigning time variant metadata ....." vars_dict = o_files[0].variables time_value = vars_dict['time'] time_array = np.array([time_value]) time_array = pyEnsLib.gather_npArray_pop(time_array, me, (me.get_size(), )) if me.get_rank() == 0: v_time[:] = time_array[:] # Calculate global mean, average, standard deviation if verbose: print "Calculating global means ....." is_SE = False tslice = 0 if not opts_dict['zscoreonly']: gm3d, gm2d = pyEnsLib.generate_global_mean_for_summary( o_files, Var3d, Var2d, is_SE, False, opts_dict) if verbose: print "Finish calculating global means ....." # Calculate RMSZ scores if (verbose == True): print "Calculating RMSZ scores ....." zscore3d, zscore2d, ens_avg3d, ens_stddev3d, ens_avg2d, ens_stddev2d, temp1, temp2 = pyEnsLib.calc_rmsz( o_files, Var3d, Var2d, is_SE, opts_dict) # Collect from all processors if opts_dict['mpi_enable']: # Gather the 3d variable results from all processors to the master processor # Gather global means 3d results if not opts_dict['zscoreonly']: gmall = np.concatenate((gm3d, gm2d), axis=0) #print "before gather, gmall.shape=",gmall.shape gmall = pyEnsLib.gather_npArray_pop( gmall, me, (me.get_size(), len(Var3d) + len(Var2d), len(o_files))) zmall = np.concatenate((zscore3d, zscore2d), axis=0) zmall = pyEnsLib.gather_npArray_pop( zmall, me, (me.get_size(), len(Var3d) + len(Var2d), len(o_files), nbin)) #print 'zmall=',zmall #print "after gather, gmall.shape=",gmall.shape ens_avg3d = pyEnsLib.gather_npArray_pop( ens_avg3d, me, (me.get_size(), len(Var3d), nlev, (nlat), nlon)) ens_avg2d = pyEnsLib.gather_npArray_pop(ens_avg2d, me, (me.get_size(), len(Var2d), (nlat), nlon)) ens_stddev3d = pyEnsLib.gather_npArray_pop( ens_stddev3d, me, (me.get_size(), len(Var3d), nlev, (nlat), nlon)) ens_stddev2d = pyEnsLib.gather_npArray_pop(ens_stddev2d, me, (me.get_size(), len(Var2d), (nlat), nlon)) # Assign to file: if me.get_rank() == 0: #Zscoreall=np.concatenate((zscore3d,zscore2d),axis=0) v_RMSZ[:, :, :, :] = zmall[:, :, :, :] v_ens_avg3d[:, :, :, :, :] = ens_avg3d[:, :, :, :, :] v_ens_stddev3d[:, :, :, :, :] = ens_stddev3d[:, :, :, :, :] v_ens_avg2d[:, :, :, :] = ens_avg2d[:, :, :, :] v_ens_stddev2d[:, :, :, :] = ens_stddev2d[:, :, :, :] if not opts_dict['zscoreonly']: v_gm[:, :, :] = gmall[:, :, :] print "All done"
v_var2d[:] = eq_d2_var_names[:] # Time-invarient metadata if me.get_rank() == 0 and (verbose == True): print "Assigning time invariant metadata ....." lev_data = vars_dict["lev"] v_lev = lev_data # Form ensembles, each missing one member; compute RMSZs and global means #for each variable, we also do max norm also (currently done in pyStats) tslice = opts_dict['tslice'] if not opts_dict['cumul']: # Partition the var list var3_list_loc=me.partition(d3_var_names,func=EqualStride(),involved=True) var2_list_loc=me.partition(d2_var_names,func=EqualStride(),involved=True) else: var3_list_loc=d3_var_names var2_list_loc=d2_var_names # Calculate global means # if me.get_rank() == 0 and (verbose == True): print "Calculating global means ....." if not opts_dict['cumul']: gm3d,gm2d,var_list = pyEnsLib.generate_global_mean_for_summary(o_files,var3_list_loc,var2_list_loc , is_SE, False,opts_dict) if me.get_rank() == 0 and (verbose == True): print "Finish calculating global means ....." # Calculate RMSZ scores if (not opts_dict['gmonly']) | (opts_dict['cumul']):
def main(argv): # Get command line stuff and store in a dictionary s = 'nyear= nmonth= npert= tag= res= mach= compset= sumfile= indir= tslice= verbose jsonfile= mpi_enable mpi_disable nrand= rand seq= jsondir= esize=' optkeys = s.split() try: opts, args = getopt.getopt(argv, "h", optkeys) except getopt.GetoptError: pyEnsLib.EnsSumPop_usage() sys.exit(2) # Put command line options in a dictionary - also set defaults opts_dict = {} # Defaults opts_dict['tag'] = 'cesm2_1_0' opts_dict['compset'] = 'G' opts_dict['mach'] = 'cheyenne' opts_dict['tslice'] = 0 opts_dict['nyear'] = 1 opts_dict['nmonth'] = 12 opts_dict['esize'] = 40 opts_dict['npert'] = 0 #for backwards compatible opts_dict['nbin'] = 40 opts_dict['minrange'] = 0.0 opts_dict['maxrange'] = 4.0 opts_dict['res'] = 'T62_g17' opts_dict['sumfile'] = 'pop.ens.summary.nc' opts_dict['indir'] = './' opts_dict['jsonfile'] = 'pop_ensemble.json' opts_dict['verbose'] = True opts_dict['mpi_enable'] = True opts_dict['mpi_disable'] = False #opts_dict['zscoreonly'] = True opts_dict['popens'] = True opts_dict['nrand'] = 40 opts_dict['rand'] = False opts_dict['seq'] = 0 opts_dict['jsondir'] = './' # This creates the dictionary of input arguments #print "before parseconfig" opts_dict = pyEnsLib.getopt_parseconfig(opts, optkeys, 'ESP', opts_dict) verbose = opts_dict['verbose'] nbin = opts_dict['nbin'] if opts_dict['mpi_disable']: opts_dict['mpi_enable'] = False #still have npert for backwards compatibility - check if it was set #and override esize if opts_dict['npert'] > 0: user_size = opts_dict['npert'] print( 'WARNING: User specified value for --npert will override --esize. Please consider using --esize instead of --npert in the future.' ) opts_dict['esize'] = user_size # Now find file names in indir input_dir = opts_dict['indir'] # Create a mpi simplecomm object if opts_dict['mpi_enable']: me = simplecomm.create_comm() else: me = simplecomm.create_comm(False) if opts_dict['jsonfile']: # Read in the included var list Var2d, Var3d = pyEnsLib.read_jsonlist(opts_dict['jsonfile'], 'ESP') str_size = 0 for str in Var3d: if str_size < len(str): str_size = len(str) for str in Var2d: if str_size < len(str): str_size = len(str) if me.get_rank() == 0: print('STATUS: Running pyEnsSumPop!') if verbose: print("VERBOSE: opts_dict = ") print(opts_dict) in_files = [] if (os.path.exists(input_dir)): # Pick up the 'nrand' random number of input files to generate summary files if opts_dict['rand']: in_files = pyEnsLib.Random_pickup_pop(input_dir, opts_dict, opts_dict['nrand']) else: # Get the list of files in_files_temp = os.listdir(input_dir) in_files = sorted(in_files_temp) num_files = len(in_files) else: if me.get_rank() == 0: print('ERROR: Input directory: ', input_dir, ' not found => EXITING....') sys.exit(2) #make sure we have enough files files_needed = opts_dict['nmonth'] * opts_dict['esize'] * opts_dict['nyear'] if (num_files < files_needed): if me.get_rank() == 0: print( 'ERROR: Input directory does not contain enough files (must be esize*nyear*nmonth = ', files_needed, ' ) and it has only ', num_files, ' files).') sys.exit(2) #Partition the input file list (ideally we have one processor per month) in_file_list = me.partition(in_files, func=EqualStride(), involved=True) # Check the files in the input directory full_in_files = [] if me.get_rank() == 0 and opts_dict['verbose']: print('VERBOSE: Input files are:') for onefile in in_file_list: fname = input_dir + '/' + onefile if opts_dict['verbose']: print("my_rank = ", me.get_rank(), " ", fname) if (os.path.isfile(fname)): full_in_files.append(fname) else: print("ERROR: Could not locate file: " + fname + " => EXITING....") sys.exit() #open just the first file (all procs) first_file = nc.Dataset(full_in_files[0], "r") # Store dimensions of the input fields if (verbose == True) and me.get_rank() == 0: print("VERBOSE: Getting spatial dimensions") nlev = -1 nlat = -1 nlon = -1 # Look at first file and get dims input_dims = first_file.dimensions ndims = len(input_dims) # Make sure all files have the same dimensions if (verbose == True) and me.get_rank() == 0: print("VERBOSE: Checking dimensions ...") for key in input_dims: if key == "z_t": nlev = len(input_dims["z_t"]) elif key == "nlon": nlon = len(input_dims["nlon"]) elif key == "nlat": nlat = len(input_dims["nlat"]) # Rank 0: prepare new summary ensemble file this_sumfile = opts_dict["sumfile"] if (me.get_rank() == 0): if os.path.exists(this_sumfile): os.unlink(this_sumfile) if verbose: print("VERBOSE: Creating ", this_sumfile, " ...") nc_sumfile = nc.Dataset(this_sumfile, "w", format="NETCDF4_CLASSIC") # Set dimensions if verbose: print("VERBOSE: Setting dimensions .....") nc_sumfile.createDimension('nlat', nlat) nc_sumfile.createDimension('nlon', nlon) nc_sumfile.createDimension('nlev', nlev) nc_sumfile.createDimension('time', None) nc_sumfile.createDimension('ens_size', opts_dict['esize']) nc_sumfile.createDimension('nbin', opts_dict['nbin']) nc_sumfile.createDimension('nvars', len(Var3d) + len(Var2d)) nc_sumfile.createDimension('nvars3d', len(Var3d)) nc_sumfile.createDimension('nvars2d', len(Var2d)) nc_sumfile.createDimension('str_size', str_size) # Set global attributes now = time.strftime("%c") if verbose: print("VERBOSE: Setting global attributes .....") nc_sumfile.creation_date = now nc_sumfile.title = 'POP verification ensemble summary file' nc_sumfile.tag = opts_dict["tag"] nc_sumfile.compset = opts_dict["compset"] nc_sumfile.resolution = opts_dict["res"] nc_sumfile.machine = opts_dict["mach"] # Create variables if verbose: print("VERBOSE: Creating variables .....") v_lev = nc_sumfile.createVariable("z_t", 'f', ('nlev', )) v_vars = nc_sumfile.createVariable("vars", 'S1', ('nvars', 'str_size')) v_var3d = nc_sumfile.createVariable("var3d", 'S1', ('nvars3d', 'str_size')) v_var2d = nc_sumfile.createVariable("var2d", 'S1', ('nvars2d', 'str_size')) v_time = nc_sumfile.createVariable("time", 'd', ('time', )) v_ens_avg3d = nc_sumfile.createVariable( "ens_avg3d", 'f', ('time', 'nvars3d', 'nlev', 'nlat', 'nlon')) v_ens_stddev3d = nc_sumfile.createVariable( "ens_stddev3d", 'f', ('time', 'nvars3d', 'nlev', 'nlat', 'nlon')) v_ens_avg2d = nc_sumfile.createVariable( "ens_avg2d", 'f', ('time', 'nvars2d', 'nlat', 'nlon')) v_ens_stddev2d = nc_sumfile.createVariable( "ens_stddev2d", 'f', ('time', 'nvars2d', 'nlat', 'nlon')) v_RMSZ = nc_sumfile.createVariable( "RMSZ", 'f', ('time', 'nvars', 'ens_size', 'nbin')) # Assign vars, var3d and var2d if verbose: print("VERBOSE: Assigning vars, var3d, and var2d .....") eq_all_var_names = [] eq_d3_var_names = [] eq_d2_var_names = [] all_var_names = list(Var3d) all_var_names += Var2d l_eq = len(all_var_names) for i in range(l_eq): tt = list(all_var_names[i]) l_tt = len(tt) if (l_tt < str_size): extra = list(' ') * (str_size - l_tt) tt.extend(extra) eq_all_var_names.append(tt) l_eq = len(Var3d) for i in range(l_eq): tt = list(Var3d[i]) l_tt = len(tt) if (l_tt < str_size): extra = list(' ') * (str_size - l_tt) tt.extend(extra) eq_d3_var_names.append(tt) l_eq = len(Var2d) for i in range(l_eq): tt = list(Var2d[i]) l_tt = len(tt) if (l_tt < str_size): extra = list(' ') * (str_size - l_tt) tt.extend(extra) eq_d2_var_names.append(tt) v_vars[:] = eq_all_var_names[:] v_var3d[:] = eq_d3_var_names[:] v_var2d[:] = eq_d2_var_names[:] # Time-invarient metadata if verbose: print("VERBOSE: Assigning time invariant metadata .....") vars_dict = first_file.variables lev_data = vars_dict["z_t"] v_lev[:] = lev_data[:] #end of rank 0 #All: # Time-varient metadata if verbose: if me.get_rank() == 0: print("VERBOSE: Assigning time variant metadata .....") vars_dict = first_file.variables time_value = vars_dict['time'] time_array = np.array([time_value]) time_array = pyEnsLib.gather_npArray_pop(time_array, me, (me.get_size(), )) if me.get_rank() == 0: v_time[:] = time_array[:] #Assign zero values to first time slice of RMSZ and avg and stddev for 2d & 3d #in case of a calculation problem before finishing e_size = opts_dict['esize'] b_size = opts_dict['nbin'] z_ens_avg3d = np.zeros((len(Var3d), nlev, nlat, nlon), dtype=np.float32) z_ens_stddev3d = np.zeros((len(Var3d), nlev, nlat, nlon), dtype=np.float32) z_ens_avg2d = np.zeros((len(Var2d), nlat, nlon), dtype=np.float32) z_ens_stddev2d = np.zeros((len(Var2d), nlat, nlon), dtype=np.float32) z_RMSZ = np.zeros(((len(Var3d) + len(Var2d)), e_size, b_size), dtype=np.float32) #rank 0 (put zero values in summary file) if me.get_rank() == 0: v_RMSZ[0, :, :, :] = z_RMSZ[:, :, :] v_ens_avg3d[0, :, :, :, :] = z_ens_avg3d[:, :, :, :] v_ens_stddev3d[0, :, :, :, :] = z_ens_stddev3d[:, :, :, :] v_ens_avg2d[0, :, :, :] = z_ens_avg2d[:, :, :] v_ens_stddev2d[0, :, :, :] = z_ens_stddev2d[:, :, :] #close file[0] first_file.close() # Calculate RMSZ scores if (verbose == True and me.get_rank() == 0): print("VERBOSE: Calculating RMSZ scores .....") zscore3d, zscore2d, ens_avg3d, ens_stddev3d, ens_avg2d, ens_stddev2d = pyEnsLib.calc_rmsz( full_in_files, Var3d, Var2d, opts_dict) if (verbose == True and me.get_rank() == 0): print("VERBOSE: Finished with RMSZ scores .....") # Collect from all processors if opts_dict['mpi_enable']: # Gather the 3d variable results from all processors to the master processor zmall = np.concatenate((zscore3d, zscore2d), axis=0) zmall = pyEnsLib.gather_npArray_pop( zmall, me, (me.get_size(), len(Var3d) + len(Var2d), len(full_in_files), nbin)) ens_avg3d = pyEnsLib.gather_npArray_pop( ens_avg3d, me, (me.get_size(), len(Var3d), nlev, (nlat), nlon)) ens_avg2d = pyEnsLib.gather_npArray_pop(ens_avg2d, me, (me.get_size(), len(Var2d), (nlat), nlon)) ens_stddev3d = pyEnsLib.gather_npArray_pop( ens_stddev3d, me, (me.get_size(), len(Var3d), nlev, (nlat), nlon)) ens_stddev2d = pyEnsLib.gather_npArray_pop(ens_stddev2d, me, (me.get_size(), len(Var2d), (nlat), nlon)) # Assign to summary file: if me.get_rank() == 0: v_RMSZ[:, :, :, :] = zmall[:, :, :, :] v_ens_avg3d[:, :, :, :, :] = ens_avg3d[:, :, :, :, :] v_ens_stddev3d[:, :, :, :, :] = ens_stddev3d[:, :, :, :, :] v_ens_avg2d[:, :, :, :] = ens_avg2d[:, :, :, :] v_ens_stddev2d[:, :, :, :] = ens_stddev2d[:, :, :, :] print("STATUS: PyEnsSumPop has completed.") nc_sumfile.close()
def main(argv): # Get command line stuff and store in a dictionary s = """verbose sumfile= indir= input_globs= tslice= nPC= sigMul= minPCFail= minRunFail= numRunFile= printVarTest popens jsonfile= mpi_enable nbin= minrange= maxrange= outfile= casejson= npick= pepsi_gm pop_tol= web_enabled pop_threshold= prn_std_mean fIndex= lev= eet= json_case= """ optkeys = s.split() try: opts, args = getopt.getopt(argv, "h", optkeys) except getopt.GetoptError: pyEnsLib.CECT_usage() sys.exit(2) # Set the default value for options opts_dict = {} opts_dict['input_globs'] = '' opts_dict['indir'] = '' opts_dict['tslice'] = 1 opts_dict['nPC'] = 50 opts_dict['sigMul'] = 2 opts_dict['verbose'] = False opts_dict['minPCFail'] = 3 opts_dict['minRunFail'] = 2 opts_dict['numRunFile'] = 3 opts_dict['printVarTest'] = False opts_dict['popens'] = False opts_dict['jsonfile'] = '' opts_dict['mpi_enable'] = False opts_dict['nbin'] = 40 opts_dict['minrange'] = 0.0 opts_dict['maxrange'] = 4.0 opts_dict['outfile'] = 'testcase.result' opts_dict['casejson'] = '' opts_dict['npick'] = 10 opts_dict['pepsi_gm'] = False opts_dict['test_failure'] = True opts_dict['pop_tol'] = 3.0 opts_dict['pop_threshold'] = 0.90 opts_dict['prn_std_mean'] = False opts_dict['lev'] = 0 opts_dict['eet'] = 0 opts_dict['json_case'] = '' opts_dict['sumfile'] = '' opts_dict['web_enabled'] = False # Call utility library getopt_parseconfig to parse the option keys # and save to the dictionary caller = 'CECT' gmonly = False opts_dict = pyEnsLib.getopt_parseconfig(opts, optkeys, caller, opts_dict) popens = opts_dict['popens'] #some mods for POP-ECT if popens == True: opts_dict['tslice'] = 0 opts_dict['numRunFile'] = 1 opts_dict['eet'] = 0 opts_dict['mpi_enable'] = False #print opts_dict # Create a mpi simplecomm object if opts_dict['mpi_enable']: me = simplecomm.create_comm() else: me = simplecomm.create_comm(not opts_dict['mpi_enable']) # Print out timestamp, input ensemble file and new run directory dt = datetime.now() verbose = opts_dict['verbose'] if me.get_rank() == 0: print '--------pyCECT--------' print ' ' print dt.strftime("%A, %d. %B %Y %I:%M%p") print ' ' if not opts_dict['web_enabled']: print 'Ensemble summary file = ' + opts_dict['sumfile'] print ' ' print 'Testcase file directory = ' + opts_dict['indir'] print ' ' print ' ' # Ensure sensible EET value if opts_dict['eet'] and opts_dict['numRunFile'] > opts_dict['eet']: pyEnsLib.CECT_usage() sys.exit(2) ifiles = [] in_files = [] # Random pick pop files from not_pick_files list if opts_dict['casejson']: with open(opts_dict['casejson']) as fin: result = json.load(fin) in_files_first = result['not_pick_files'] in_files = random.sample(in_files_first, opts_dict['npick']) print 'Testcase files:' print '\n'.join(in_files) elif opts_dict['json_case']: json_file = opts_dict['json_case'] if (os.path.exists(json_file)): fd = open(json_file) metainfo = json.load(fd) if 'CaseName' in metainfo: casename = metainfo['CaseName'] if (os.path.exists(opts_dict['indir'])): for name in casename: wildname = '*.' + name + '.*' full_glob_str = os.path.join(opts_dict['indir'], wildname) glob_file = glob.glob(full_glob_str) in_files.extend(glob_file) else: print "ERROR: " + opts_dict['json_case'] + " does not exist." sys.exit() print "in_files=", in_files else: wildname = '*' + str(opts_dict['input_globs']) + '*' # Open all input files if (os.path.exists(opts_dict['indir'])): full_glob_str = os.path.join(opts_dict['indir'], wildname) glob_files = glob.glob(full_glob_str) in_files.extend(glob_files) num_file = len(in_files) if num_file == 0: print "ERROR: no matching files for wildcard=" + wildname + " found in specified --indir" sys.exit() else: print "Found " + str( num_file) + " matching files in specified --indir" if opts_dict['numRunFile'] > num_file: print "ERROR: more files needed (" + str( opts_dict['numRunFile'] ) + ") than available in the indir (" + str(num_file) + ")." sys.exit() #in_files_temp=os.listdir(opts_dict['indir']) in_files.sort() #print in_files if popens: #Partition the input file list in_files_list = me.partition(in_files, func=EqualStride(), involved=True) else: # Random pick non pop files in_files_list = pyEnsLib.Random_pickup(in_files, opts_dict) #in_files_list=in_files for frun_file in in_files_list: if frun_file.find(opts_dict['indir']) != -1: frun_temp = frun_file else: frun_temp = opts_dict['indir'] + '/' + frun_file if (os.path.isfile(frun_temp)): ifiles.append(Nio.open_file(frun_temp, "r")) else: print "ERROR: COULD NOT LOCATE FILE " + frun_temp sys.exit() if opts_dict['web_enabled']: if len(opts_dict['sumfile']) == 0: opts_dict[ 'sumfile'] = '/glade/p/cesmdata/cseg/inputdata/validation/' opts_dict['sumfile'], machineid, compiler = pyEnsLib.search_sumfile( opts_dict, ifiles) if len(machineid) != 0 and len(compiler) != 0: print ' ' print 'Validation file : machineid = ' + machineid + ', compiler = ' + compiler print 'Found summary file : ' + opts_dict['sumfile'] print ' ' else: print 'Warning: machine and compiler are unknown' if popens: # Read in the included var list if not os.path.exists(opts_dict['jsonfile']): print "ERROR: POP-ECT requires the specification of a valid json file via --jsonfile." sys.exit() Var2d, Var3d = pyEnsLib.read_jsonlist(opts_dict['jsonfile'], 'ESP') print ' ' print 'Z-score tolerance = ' + '{:3.2f}'.format(opts_dict['pop_tol']) print 'ZPR = ' + '{:.2%}'.format(opts_dict['pop_threshold']) zmall, n_timeslice = pyEnsLib.pop_compare_raw_score( opts_dict, ifiles, me.get_rank(), Var3d, Var2d) #zmall = np.concatenate((Zscore3d,Zscore2d),axis=0) np.set_printoptions(threshold=np.nan) if opts_dict['mpi_enable']: zmall = pyEnsLib.gather_npArray_pop( zmall, me, (me.get_size(), len(Var3d) + len(Var2d), len(ifiles), opts_dict['nbin'])) if me.get_rank() == 0: fout = open(opts_dict['outfile'], "w") for i in range(me.get_size()): for j in zmall[i]: np.savetxt(fout, j, fmt='%-7.2e') #cam else: # Read all variables from the ensemble summary file ens_var_name, ens_avg, ens_stddev, ens_rmsz, ens_gm, num_3d, mu_gm, sigma_gm, loadings_gm, sigma_scores_gm, is_SE_sum, std_gm = pyEnsLib.read_ensemble_summary( opts_dict['sumfile']) if len(ens_rmsz) == 0: gmonly = True # Add ensemble rmsz and global mean to the dictionary "variables" variables = {} if not gmonly: for k, v in ens_rmsz.iteritems(): pyEnsLib.addvariables(variables, k, 'zscoreRange', v) for k, v in ens_gm.iteritems(): pyEnsLib.addvariables(variables, k, 'gmRange', v) # Get 3d variable name list and 2d variable name list separately var_name3d = [] var_name2d = [] for vcount, v in enumerate(ens_var_name): if vcount < num_3d: var_name3d.append(v) else: var_name2d.append(v) # Get ncol and nlev value npts3d, npts2d, is_SE = pyEnsLib.get_ncol_nlev(ifiles[0]) if (is_SE ^ is_SE_sum): print 'Warning: please note the ensemble summary file is different from the testing files, they use different grids' # Compare the new run and the ensemble summary file to get rmsz score results = {} countzscore = np.zeros(len(ifiles), dtype=np.int32) countgm = np.zeros(len(ifiles), dtype=np.int32) if not gmonly: for fcount, fid in enumerate(ifiles): otimeSeries = fid.variables for var_name in ens_var_name: orig = otimeSeries[var_name] Zscore, has_zscore = pyEnsLib.calculate_raw_score( var_name, orig[opts_dict['tslice']], npts3d, npts2d, ens_avg, ens_stddev, is_SE, opts_dict, 0, 0, 0) if has_zscore: # Add the new run rmsz zscore to the dictionary "results" pyEnsLib.addresults(results, 'zscore', Zscore, var_name, 'f' + str(fcount)) # Evaluate the new run rmsz score if is in the range of the ensemble summary rmsz zscore range for fcount, fid in enumerate(ifiles): countzscore[fcount] = pyEnsLib.evaluatestatus( 'zscore', 'zscoreRange', variables, 'ens', results, 'f' + str(fcount)) # Calculate the new run global mean mean3d, mean2d, varlist = pyEnsLib.generate_global_mean_for_summary( ifiles, var_name3d, var_name2d, is_SE, opts_dict['pepsi_gm'], opts_dict) means = np.concatenate((mean3d, mean2d), axis=0) # Add the new run global mean to the dictionary "results" for i in range(means.shape[1]): for j in range(means.shape[0]): pyEnsLib.addresults(results, 'means', means[j][i], ens_var_name[j], 'f' + str(i)) # Evaluate the new run global mean if it is in the range of the ensemble summary global mean range for fcount, fid in enumerate(ifiles): countgm[fcount] = pyEnsLib.evaluatestatus('means', 'gmRange', variables, 'gm', results, 'f' + str(fcount)) # Calculate the PCA scores of the new run new_scores, var_list, comp_std_gm = pyEnsLib.standardized( means, mu_gm, sigma_gm, loadings_gm, ens_var_name, opts_dict, ens_avg, me) run_index, decision = pyEnsLib.comparePCAscores( ifiles, new_scores, sigma_scores_gm, opts_dict, me) # If there is failure, plot out standardized mean and compared standardized mean in box plots if opts_dict['prn_std_mean'] and decision == 'FAILED': import seaborn as sns category = { "all_outside99": [], "two_outside99": [], "one_outside99": [], "all_oneside_outside1QR": [] } b = list(pyEnsLib.chunk(ens_var_name, 10)) for f, alist in enumerate(b): for fc, avar in enumerate(alist): dist_995 = np.percentile(std_gm[avar], 99.5) dist_75 = np.percentile(std_gm[avar], 75) dist_25 = np.percentile(std_gm[avar], 25) dist_05 = np.percentile(std_gm[avar], 0.5) c = 0 d = 0 p = 0 q = 0 for i in range(comp_std_gm[f + fc].size): if comp_std_gm[f + fc][i] > dist_995: c = c + 1 elif comp_std_gm[f + fc][i] < dist_05: d = d + 1 elif (comp_std_gm[f + fc][i] < dist_995 and comp_std_gm[f + fc][i] > dist_75): p = p + 1 elif (comp_std_gm[f + fc][i] > dist_05 and comp_std_gm[f + fc][i] < dist_25): q = q + 1 if c == 3 or d == 3: category["all_outside99"].append((avar, f + fc)) elif c == 2 or d == 2: category["two_outside99"].append((avar, f + fc)) elif c == 1 or d == 1: category["one_outside99"].append((avar, f + fc)) if p == 3 or q == 3: category["all_oneside_outside1QR"].append( (avar, f + fc)) part_name = opts_dict['indir'].split('/')[-1] if not part_name: part_name = opts_dict['indir'].split('/')[-2] for key in sorted(category): list_array = [] list_array2 = [] list_var = [] value = category[key] print "value len=", key, len(value) for each_var in value: list_array.append(std_gm[each_var[0]]) list_array2.append(comp_std_gm[each_var[1]]) list_var.append(each_var[0]) if len(value) != 0: ax = sns.boxplot(data=list_array, whis=[0.5, 99.5], fliersize=0.0) sns.stripplot(data=list_array2, jitter=True, color="r") sns.plt.xticks(range(len(list_array)), list_var, fontsize=8, rotation=-45) if decision == 'FAILED': sns.plt.savefig(part_name + "_" + key + "_fail.png") else: sns.plt.savefig(part_name + "_" + key + "_pass.png") sns.plt.clf() ''' if len(run_index)>0: json_file=opts_dict['json_case'] if (os.path.exists(json_file)): fd=open(json_file) metainfo=json.load(fd) caseindex=metainfo['CaseIndex'] enspath=str(metainfo['EnsPath'][0]) #print caseindex if (os.path.exists(enspath)): i=0 comp_file=[] search = '\.[0-9]{3}\.' for name in in_files_list: s=re.search(search,name) in_files_index=s.group(0) if in_files_index[1:4] in caseindex: ens_index=str(caseindex[in_files_index[1:4]]) wildname='*.'+ens_index+'.*' full_glob_str=os.path.join(enspath,wildname) glob_file=glob.glob(full_glob_str) comp_file.extend(glob_file) print "comp_file=",comp_file pyEnsLib.plot_variable(in_files_list,comp_file,opts_dict,var_list,run_index,me) ''' # Print out if opts_dict['printVarTest']: print '*********************************************** ' print 'Variable-based testing (for reference only - not used to determine pass/fail)' print '*********************************************** ' for fcount, fid in enumerate(ifiles): print ' ' print 'Run ' + str(fcount + 1) + ":" print ' ' if not gmonly: print '***' + str(countzscore[fcount]), " of " + str( len(ens_var_name) ) + ' variables are outside of ensemble RMSZ distribution***' pyEnsLib.printsummary(results, 'ens', 'zscore', 'zscoreRange', (fcount), variables, 'RMSZ') print ' ' print '***' + str(countgm[fcount]), " of " + str( len(ens_var_name) ) + ' variables are outside of ensemble global mean distribution***' pyEnsLib.printsummary(results, 'gm', 'means', 'gmRange', fcount, variables, 'global mean') print ' ' print '----------------------------------------------------------------------------' if me.get_rank() == 0: print ' ' print "Testing complete." print ' '
def main(argv): # Get command line stuff and store in a dictionary s='verbose sumfile= indir= input_globs= tslice= nPC= sigMul= minPCFail= minRunFail= numRunFile= printVarTest popens jsonfile= mpi_enable nbin= minrange= maxrange= outfile= casejson= npick= pepsi_gm test_failure pop_tol= pop_threshold=' optkeys = s.split() try: opts, args = getopt.getopt(argv,"h",optkeys) except getopt.GetoptError: pyEnsLib.CECT_usage() sys.exit(2) # Set the default value for options opts_dict = {} opts_dict['input_globs'] = '' opts_dict['indir'] = '' opts_dict['tslice'] = 1 opts_dict['nPC'] = 50 opts_dict['sigMul'] = 2 opts_dict['verbose'] = False opts_dict['minPCFail'] = 3 opts_dict['minRunFail'] = 2 opts_dict['numRunFile'] = 3 opts_dict['printVarTest'] = False opts_dict['popens'] = False opts_dict['jsonfile'] = '' opts_dict['mpi_enable'] = False opts_dict['nbin'] = 40 opts_dict['minrange'] = 0.0 opts_dict['maxrange'] = 4.0 opts_dict['outfile'] = 'testcase.result' opts_dict['casejson'] = '' opts_dict['npick'] = 10 opts_dict['pepsi_gm'] = False opts_dict['test_failure'] = True opts_dict['pop_tol'] = 3.0 opts_dict['pop_threshold'] = 0.90 # Call utility library getopt_parseconfig to parse the option keys # and save to the dictionary caller = 'CECT' gmonly = False opts_dict = pyEnsLib.getopt_parseconfig(opts,optkeys,caller,opts_dict) popens = opts_dict['popens'] # Print out timestamp, input ensemble file and new run directory dt=datetime.now() verbose = opts_dict['verbose'] print('--------pyCECT--------') print(' ') print(dt.strftime("%A, %d. %B %Y %I:%M%p")) print(' ') print('Ensemble summary file = '+opts_dict['sumfile']) print(' ') print('Testcase file directory = '+opts_dict['indir'] ) print(' ') print(' ') # Create a mpi simplecomm object if opts_dict['mpi_enable']: me=simplecomm.create_comm() else: me=simplecomm.create_comm(not opts_dict['mpi_enable']) ifiles=[] in_files=[] # Random pick pop files from not_pick_files list if opts_dict['casejson']: with open(opts_dict['casejson']) as fin: result=json.load(fin) in_files_first=result['not_pick_files'] in_files=random.sample(in_files_first,opts_dict['npick']) print('Testcase files:') print('\n'.join(in_files)) else: wildname='*'+opts_dict['input_globs']+'*' # Open all input files if (os.path.exists(opts_dict['indir'])): full_glob_str=os.path.join(opts_dict['indir'],wildname) glob_files=glob.glob(full_glob_str) in_files.extend(glob_files) #in_files_temp=os.listdir(opts_dict['indir']) in_files.sort() if popens: #Partition the input file list in_files_list=me.partition(in_files,func=EqualStride(),involved=True) else: # Random pick non pop files in_files_list=pyEnsLib.Random_pickup(in_files,opts_dict) for frun_file in in_files_list: if frun_file.find(opts_dict['indir']) != -1: frun_temp=frun_file else: frun_temp=opts_dict['indir']+'/'+frun_file if (os.path.isfile(frun_temp)): ifiles.append(Nio.open_file(frun_temp,"r")) else: print("COULD NOT LOCATE FILE " +frun_temp+" EXISTING") sys.exit() if popens: # Read in the included var list Var2d,Var3d=pyEnsLib.read_jsonlist(opts_dict['jsonfile'],'ESP') print(' ') print('Z-score tolerance = '+'{:3.2f}'.format(opts_dict['pop_tol'])) print('ZPR = '+'{:.2%}'.format(opts_dict['pop_threshold'])) zmall,n_timeslice=pyEnsLib.compare_raw_score(opts_dict,ifiles,me.get_rank(),Var3d,Var2d) #zmall = np.concatenate((Zscore3d,Zscore2d),axis=0) np.set_printoptions(threshold=np.nan) if opts_dict['mpi_enable']: zmall = pyEnsLib.gather_npArray_pop(zmall,me,(me.get_size(),len(Var3d)+len(Var2d),len(ifiles),opts_dict['nbin'])) if me.get_rank()==0: fout = open(opts_dict['outfile'],"w") for i in range(me.get_size()): for j in zmall[i]: np.savetxt(fout,j,fmt='%-7.2e') else: # Read all variables from the ensemble summary file ens_var_name,ens_avg,ens_stddev,ens_rmsz,ens_gm,num_3d,mu_gm,sigma_gm,loadings_gm,sigma_scores_gm,is_SE_sum=pyEnsLib.read_ensemble_summary(opts_dict['sumfile']) if len(ens_rmsz) == 0: gmonly = True # Add ensemble rmsz and global mean to the dictionary "variables" variables={} if not gmonly: for k,v in ens_rmsz.iteritems(): pyEnsLib.addvariables(variables,k,'zscoreRange',v) for k,v in ens_gm.iteritems(): pyEnsLib.addvariables(variables,k,'gmRange',v) # Get 3d variable name list and 2d variable name list seperately var_name3d=[] var_name2d=[] for vcount,v in enumerate(ens_var_name): if vcount < num_3d: var_name3d.append(v) else: var_name2d.append(v) # Get ncol and nlev value npts3d,npts2d,is_SE=pyEnsLib.get_ncol_nlev(ifiles[0]) if (is_SE ^ is_SE_sum): print('Warning: please note the ensemble summary file is different from the testing files, they use different grids') # Compare the new run and the ensemble summary file to get rmsz score results={} countzscore=np.zeros(len(ifiles),dtype=np.int32) countgm=np.zeros(len(ifiles),dtype=np.int32) if not gmonly: for fcount,fid in enumerate(ifiles): otimeSeries = fid.variables for var_name in ens_var_name: orig=otimeSeries[var_name] Zscore,has_zscore=pyEnsLib.calculate_raw_score(var_name,orig[opts_dict['tslice']],npts3d,npts2d,ens_avg,ens_stddev,is_SE,opts_dict,0,0,0) if has_zscore: # Add the new run rmsz zscore to the dictionary "results" pyEnsLib.addresults(results,'zscore',Zscore,var_name,'f'+str(fcount)) # Evaluate the new run rmsz score if is in the range of the ensemble summary rmsz zscore range for fcount,fid in enumerate(ifiles): countzscore[fcount]=pyEnsLib.evaluatestatus('zscore','zscoreRange',variables,'ens',results,'f'+str(fcount)) # Calculate the new run global mean mean3d,mean2d=pyEnsLib.generate_global_mean_for_summary(ifiles,var_name3d,var_name2d,is_SE,opts_dict['pepsi_gm'],opts_dict) means=np.concatenate((mean3d,mean2d),axis=0) # Add the new run global mean to the dictionary "results" for i in range(means.shape[1]): for j in range(means.shape[0]): pyEnsLib.addresults(results,'means',means[j][i],ens_var_name[j],'f'+str(i)) # Evaluate the new run global mean if it is in the range of the ensemble summary global mean range for fcount,fid in enumerate(ifiles): countgm[fcount]=pyEnsLib.evaluatestatus('means','gmRange',variables,'gm',results,'f'+str(fcount)) # Calculate the PCA scores of the new run new_scores=pyEnsLib.standardized(means,mu_gm,sigma_gm,loadings_gm) pyEnsLib.comparePCAscores(ifiles,new_scores,sigma_scores_gm,opts_dict) # Print out if opts_dict['printVarTest']: print('*********************************************** ') print('Variable-based testing (for reference only - not used to determine pass/fail)') print('*********************************************** ') for fcount,fid in enumerate(ifiles): print(' ') print('Run '+str(fcount+1)+":") print(' ') if not gmonly: print('***'+str(countzscore[fcount])," of "+str(len(ens_var_name))+' variables are outside of ensemble RMSZ distribution***') pyEnsLib.printsummary(results,'ens','zscore','zscoreRange',(fcount),variables,'RMSZ') print(' ') print('***'+str(countgm[fcount])," of "+str(len(ens_var_name))+' variables are outside of ensemble global mean distribution***') pyEnsLib.printsummary(results,'gm','means','gmRange',fcount,variables,'global mean') print(' ') print('----------------------------------------------------------------------------') if __name__ == "__main__": main(sys.argv[1:]) print(' ') print("Testing complete.")
def testPartitionListInvolved(self): data = range(5 + self.rank) sresult = self.scomm.partition(data, func=EqualStride(), involved=True) presult = self.pcomm.partition(data, func=EqualStride(), involved=True) self.assertEqual(sresult, presult)
def testPartitionList(self): data = range(5 + self.rank) sresult = self.scomm.partition(data, func=EqualStride()) presult = self.pcomm.partition(data, func=EqualStride()) self.assertEqual(sresult, presult)
def main(argv): # Get command line stuff and store in a dictionary s = """verbose sumfile= indir= input_globs= tslice= nPC= sigMul= minPCFail= minRunFail= numRunFile= printVars popens jsonfile= mpi_enable nbin= minrange= maxrange= outfile= casejson= npick= pepsi_gm pop_tol= web_enabled pop_threshold= printStdMean fIndex= lev= eet= saveResults json_case= """ optkeys = s.split() try: opts, args = getopt.getopt(argv, "h", optkeys) except getopt.GetoptError: pyEnsLib.CECT_usage() sys.exit(2) # Set the default value for options opts_dict = {} opts_dict['input_globs'] = '' opts_dict['indir'] = '' opts_dict['tslice'] = 1 opts_dict['nPC'] = 50 opts_dict['sigMul'] = 2 opts_dict['verbose'] = False opts_dict['minPCFail'] = 3 opts_dict['minRunFail'] = 2 opts_dict['numRunFile'] = 3 opts_dict['printVars'] = False opts_dict['popens'] = False opts_dict['jsonfile'] = '' opts_dict['mpi_enable'] = False opts_dict['nbin'] = 40 opts_dict['minrange'] = 0.0 opts_dict['maxrange'] = 4.0 opts_dict['outfile'] = 'testcase.result' opts_dict['casejson'] = '' opts_dict['npick'] = 10 opts_dict['pepsi_gm'] = False opts_dict['test_failure'] = True opts_dict['pop_tol'] = 3.0 opts_dict['pop_threshold'] = 0.90 opts_dict['printStdMean'] = False opts_dict['lev'] = 0 opts_dict['eet'] = 0 opts_dict['json_case'] = '' opts_dict['sumfile'] = '' opts_dict['web_enabled'] = False opts_dict['saveResults'] = False # Call utility library getopt_parseconfig to parse the option keys # and save to the dictionary caller = 'CECT' opts_dict = pyEnsLib.getopt_parseconfig(opts, optkeys, caller, opts_dict) popens = opts_dict['popens'] #some mods for POP-ECT if popens == True: opts_dict['tslice'] = 0 opts_dict['numRunFile'] = 1 opts_dict['eet'] = 0 opts_dict['mpi_enable'] = False # Create a mpi simplecomm object if opts_dict['mpi_enable']: me = simplecomm.create_comm() else: me = simplecomm.create_comm(not opts_dict['mpi_enable']) # Print out timestamp, input ensemble file and new run directory dt = datetime.now() verbose = opts_dict['verbose'] if me.get_rank() == 0: print(' ') print('--------pyCECT--------') print(' ') print(dt.strftime("%A, %d. %B %Y %I:%M%p")) print(' ') if not opts_dict['web_enabled']: print('Ensemble summary file = ' + opts_dict['sumfile']) print(' ') print('Testcase file directory = ' + opts_dict['indir']) print(' ') print(' ') #make sure these are valid if opts_dict['web_enabled'] == False and os.path.isfile( opts_dict['sumfile']) == False: print("ERROR: Summary file name is not valid.") sys.exit() if os.path.exists(opts_dict['indir']) == False: print("ERROR: --indir path is not valid.") sys.exit() # Ensure sensible EET value if opts_dict['eet'] and opts_dict['numRunFile'] > opts_dict['eet']: pyEnsLib.CECT_usage() sys.exit(2) ifiles = [] in_files = [] # Random pick pop files from not_pick_files list if opts_dict['casejson']: with open(opts_dict['casejson']) as fin: result = json.load(fin) in_files_first = result['not_pick_files'] in_files = random.sample(in_files_first, opts_dict['npick']) print('Testcase files:') print('\n'.join(in_files)) elif opts_dict['json_case']: json_file = opts_dict['json_case'] if (os.path.exists(json_file)): fd = open(json_file) metainfo = json.load(fd) if 'CaseName' in metainfo: casename = metainfo['CaseName'] if (os.path.exists(opts_dict['indir'])): for name in casename: wildname = '*.' + name + '.*' full_glob_str = os.path.join(opts_dict['indir'], wildname) glob_file = glob.glob(full_glob_str) in_files.extend(glob_file) else: print("ERROR: " + opts_dict['json_case'] + " does not exist.") sys.exit() print("in_files=", in_files) else: wildname = '*' + str(opts_dict['input_globs']) + '*' # Open all input files if (os.path.exists(opts_dict['indir'])): full_glob_str = os.path.join(opts_dict['indir'], wildname) glob_files = glob.glob(full_glob_str) in_files.extend(glob_files) num_file = len(in_files) if num_file == 0: print("ERROR: no matching files for wildcard=" + wildname + " found in specified --indir") sys.exit() else: print("Found " + str(num_file) + " matching files in specified --indir") if opts_dict['numRunFile'] > num_file: print("ERROR: more files needed (" + str(opts_dict['numRunFile']) + ") than available in the indir (" + str(num_file) + ").") sys.exit() in_files.sort() #print in_files if popens: #Partition the input file list in_files_list = me.partition(in_files, func=EqualStride(), involved=True) else: # Random pick cam files in_files_list = pyEnsLib.Random_pickup(in_files, opts_dict) for frun_file in in_files_list: if frun_file.find(opts_dict['indir']) != -1: frun_temp = frun_file else: frun_temp = opts_dict['indir'] + '/' + frun_file if (os.path.isfile(frun_temp)): ifiles.append(frun_temp) else: print("ERROR: COULD NOT LOCATE FILE " + frun_temp) sys.exit() if opts_dict['web_enabled']: if len(opts_dict['sumfile']) == 0: opts_dict[ 'sumfile'] = '/glade/p/cesmdata/cseg/inputdata/validation/' #need to open ifiles opts_dict['sumfile'], machineid, compiler = pyEnsLib.search_sumfile( opts_dict, ifiles) if len(machineid) != 0 and len(compiler) != 0: print(' ') print('Validation file : machineid = ' + machineid + ', compiler = ' + compiler) print('Found summary file : ' + opts_dict['sumfile']) print(' ') else: print('Warning: machine and compiler are unknown') if popens: # Read in the included var list if not os.path.exists(opts_dict['jsonfile']): print( "ERROR: POP-ECT requires the specification of a valid json file via --jsonfile." ) sys.exit() Var2d, Var3d = pyEnsLib.read_jsonlist(opts_dict['jsonfile'], 'ESP') print(' ') print('Z-score tolerance = ' + '{:3.2f}'.format(opts_dict['pop_tol'])) print('ZPR = ' + '{:.2%}'.format(opts_dict['pop_threshold'])) zmall, n_timeslice = pyEnsLib.pop_compare_raw_score( opts_dict, ifiles, me.get_rank(), Var3d, Var2d) np.set_printoptions(threshold=sys.maxsize) if opts_dict['mpi_enable']: zmall = pyEnsLib.gather_npArray_pop( zmall, me, (me.get_size(), len(Var3d) + len(Var2d), len(ifiles), opts_dict['nbin'])) if me.get_rank() == 0: fout = open(opts_dict['outfile'], "w") for i in range(me.get_size()): for j in zmall[i]: np.savetxt(fout, j, fmt='%-7.2e') #cam else: # Read all variables from the ensemble summary file ens_var_name, ens_avg, ens_stddev, ens_rmsz, ens_gm, num_3d, mu_gm, sigma_gm, loadings_gm, sigma_scores_gm, is_SE_sum, std_gm, std_gm_array, str_size = pyEnsLib.read_ensemble_summary( opts_dict['sumfile']) #Only doing gm # Add ensemble rmsz and global mean to the dictionary "variables" variables = {} for k, v in ens_gm.items(): pyEnsLib.addvariables(variables, k, 'gmRange', v) # Get 3d variable name list and 2d variable name list separately var_name3d = [] var_name2d = [] for vcount, v in enumerate(ens_var_name): if vcount < num_3d: var_name3d.append(v) else: var_name2d.append(v) # Get ncol and nlev value npts3d, npts2d, is_SE = pyEnsLib.get_ncol_nlev(ifiles[0]) if (is_SE ^ is_SE_sum): print( 'Warning: please note the ensemble summary file is different from the testing files: they use different grids' ) # Compare the new run and the ensemble summary file results = {} countgm = np.zeros(len(ifiles), dtype=np.int32) # Calculate the new run global mean mean3d, mean2d, varlist = pyEnsLib.generate_global_mean_for_summary( ifiles, var_name3d, var_name2d, is_SE, opts_dict['pepsi_gm'], opts_dict) means = np.concatenate((mean3d, mean2d), axis=0) # Add the new run global mean to the dictionary "results" for i in range(means.shape[1]): for j in range(means.shape[0]): pyEnsLib.addresults(results, 'means', means[j][i], ens_var_name[j], 'f' + str(i)) # Evaluate the new run global mean if it is in the range of the ensemble summary global mean range for fcount, fid in enumerate(ifiles): countgm[fcount] = pyEnsLib.evaluatestatus('means', 'gmRange', variables, 'gm', results, 'f' + str(fcount)) # Calculate the PCA scores of the new run new_scores, var_list, comp_std_gm = pyEnsLib.standardized( means, mu_gm, sigma_gm, loadings_gm, ens_var_name, opts_dict, ens_avg, me) run_index, decision = pyEnsLib.comparePCAscores( ifiles, new_scores, sigma_scores_gm, opts_dict, me) # If there is failure, plot out standardized mean and compared standardized mean in box plots # if opts_dict['printStdMean'] and decision == 'FAILED': if opts_dict['printStdMean']: import seaborn as sns import matplotlib matplotlib.use('Agg') #don't display figures import matplotlib.pyplot as plt print(" ") print( '***************************************************************************** ' ) print( 'Test run variable standardized means (for reference only - not used to determine pass/fail)' ) print( '***************************************************************************** ' ) print(" ") category = { "all_outside99": [], "two_outside99": [], "one_outside99": [], "all_oneside_outside1QR": [] } b = list(pyEnsLib.chunk(ens_var_name, 10)) for f, alist in enumerate(b): for fc, avar in enumerate(alist): dist_995 = np.percentile(std_gm[avar], 99.5) dist_75 = np.percentile(std_gm[avar], 75) dist_25 = np.percentile(std_gm[avar], 25) dist_05 = np.percentile(std_gm[avar], 0.5) c = 0 d = 0 p = 0 q = 0 for i in range(comp_std_gm[f + fc].size): if comp_std_gm[f + fc][i] > dist_995: c = c + 1 elif comp_std_gm[f + fc][i] < dist_05: d = d + 1 elif (comp_std_gm[f + fc][i] < dist_995 and comp_std_gm[f + fc][i] > dist_75): p = p + 1 elif (comp_std_gm[f + fc][i] > dist_05 and comp_std_gm[f + fc][i] < dist_25): q = q + 1 if c == 3 or d == 3: category["all_outside99"].append((avar, f + fc)) elif c == 2 or d == 2: category["two_outside99"].append((avar, f + fc)) elif c == 1 or d == 1: category["one_outside99"].append((avar, f + fc)) if p == 3 or q == 3: category["all_oneside_outside1QR"].append( (avar, f + fc)) part_name = opts_dict['indir'].split('/')[-1] if not part_name: part_name = opts_dict['indir'].split('/')[-2] for key in sorted(category): list_array = [] list_array2 = [] list_var = [] value = category[key] if key == "all_outside99": print( "*** ", len(value), " variables have 3 test run global means outside of the 99th percentile." ) elif key == "two_outside99": print( "*** ", len(value), " variables have 2 test run global means outside of the 99th percentile." ) elif key == "one_outside99": print( "*** ", len(value), " variables have 1 test run global mean outside of the 99th percentile." ) elif key == "all_oneside_outside1QR": print( "*** ", len(value), " variables have all test run global means outside of the first quartile (but not outside the 99th percentile)." ) if len(value) > 0: print(" => generating plot ...") if len(value) > 20: print( " NOTE: truncating to only plot the first 20 variables." ) value = value[0:20] for each_var in value: list_array.append(std_gm[each_var[0]]) list_array2.append(comp_std_gm[each_var[1]]) name = each_var[0] if isinstance(name, str) == False: name = name.decode("utf-8") list_var.append(name) if len(value) != 0: ax = sns.boxplot(data=list_array, whis=[0.5, 99.5], fliersize=0.0) sns.stripplot(data=list_array2, jitter=True, color="r") plt.xticks(list(range(len(list_array))), list_var, fontsize=8, rotation=-45) if decision == 'FAILED': plt.savefig(part_name + "_" + key + "_fail.png") else: plt.savefig(part_name + "_" + key + "_pass.png") plt.close() ## # Print file with info about new test runs....to a netcdf file ## if opts_dict['saveResults']: num_vars = comp_std_gm.shape[0] tsize = comp_std_gm.shape[1] esize = std_gm_array.shape[1] this_savefile = 'savefile.nc' if (verbose == True): print("VERBOSE: Creating ", this_savefile, " ...") if os.path.exists(this_savefile): os.unlink(this_savefile) nc_savefile = nc.Dataset(this_savefile, "w", format="NETCDF4_CLASSIC") nc_savefile.createDimension('ens_size', esize) nc_savefile.createDimension('test_size', tsize) nc_savefile.createDimension('nvars', num_vars) nc_savefile.createDimension('str_size', str_size) # Set global attributes now = time.strftime("%c") nc_savefile.creation_date = now nc_savefile.title = 'PyCECT compare results file' nc_savefile.summaryfile = opts_dict['sumfile'] #nc_savefile.testfiles = in_files #variables v_vars = nc_savefile.createVariable("vars", 'S1', ('nvars', 'str_size')) v_std_gm = nc_savefile.createVariable("std_gm", 'f8', ('nvars', 'test_size')) v_scores = nc_savefile.createVariable("scores", 'f8', ('nvars', 'test_size')) v_ens_sigma_scores = nc_savefile.createVariable( 'ens_sigma_scores', 'f8', ('nvars', )) v_ens_std_gm = nc_savefile.createVariable("ens_std_gm", 'f8', ('nvars', 'ens_size')) #hard-coded size str_out = nc.stringtochar(np.array(ens_var_name, 'S10')) v_vars[:] = str_out v_std_gm[:, :] = comp_std_gm[:, :] v_scores[:, :] = new_scores[:, :] v_ens_sigma_scores[:] = sigma_scores_gm[:] v_ens_std_gm[:, :] = std_gm_array[:, :] nc_savefile.close() # Print variables (optional) if opts_dict['printVars']: print(" ") print( '***************************************************************************** ' ) print( 'Variable global mean information (for reference only - not used to determine pass/fail)' ) print( '***************************************************************************** ' ) for fcount, fid in enumerate(ifiles): print(' ') print('Run ' + str(fcount + 1) + ":") print(' ') print( '***' + str(countgm[fcount]), " of " + str(len(ens_var_name)) + ' variables are outside of ensemble global mean distribution***' ) pyEnsLib.printsummary(results, 'gm', 'means', 'gmRange', fcount, variables, 'global mean') print(' ') print( '----------------------------------------------------------------------------' ) if me.get_rank() == 0: print(' ') print("Testing complete.") print(' ')
def main(argv): # Get command line stuff and store in a dictionary s = 'tag= compset= esize= tslice= res= sumfile= indir= sumfiledir= mach= verbose jsonfile= mpi_enable maxnorm gmonly popens cumul regx= startMon= endMon= fIndex=' optkeys = s.split() try: opts, args = getopt.getopt(argv, "h", optkeys) except getopt.GetoptError: pyEnsLib.EnsSum_usage() sys.exit(2) # Put command line options in a dictionary - also set defaults opts_dict = {} # Defaults opts_dict['tag'] = 'cesm2_0_beta08' opts_dict['compset'] = 'F2000' opts_dict['mach'] = 'cheyenne' opts_dict['esize'] = 350 opts_dict['tslice'] = 1 opts_dict['res'] = 'f19_f19' opts_dict['sumfile'] = 'ens.summary.nc' opts_dict['indir'] = './' opts_dict['sumfiledir'] = './' opts_dict['jsonfile'] = 'exclude_empty.json' opts_dict['verbose'] = False opts_dict['mpi_enable'] = False opts_dict['maxnorm'] = False opts_dict['gmonly'] = True opts_dict['popens'] = False opts_dict['cumul'] = False opts_dict['regx'] = 'test' opts_dict['startMon'] = 1 opts_dict['endMon'] = 1 opts_dict['fIndex'] = 151 # This creates the dictionary of input arguments opts_dict = pyEnsLib.getopt_parseconfig(opts, optkeys, 'ES', opts_dict) verbose = opts_dict['verbose'] st = opts_dict['esize'] esize = int(st) if not (opts_dict['tag'] and opts_dict['compset'] and opts_dict['mach'] or opts_dict['res']): print 'Please specify --tag, --compset, --mach and --res options' sys.exit() # Now find file names in indir input_dir = opts_dict['indir'] # The var list that will be excluded ex_varlist = [] inc_varlist = [] # Create a mpi simplecomm object if opts_dict['mpi_enable']: me = simplecomm.create_comm() else: me = simplecomm.create_comm(not opts_dict['mpi_enable']) if me.get_rank() == 0: print 'Running pyEnsSum!' if me.get_rank() == 0 and (verbose == True): print opts_dict print 'Ensemble size for summary = ', esize exclude = False if me.get_rank() == 0: if opts_dict['jsonfile']: inc_varlist = [] # Read in the excluded or included var list ex_varlist, exclude = pyEnsLib.read_jsonlist( opts_dict['jsonfile'], 'ES') if exclude == False: inc_varlist = ex_varlist ex_varlist = [] # Read in the included var list #inc_varlist=pyEnsLib.read_jsonlist(opts_dict['jsonfile'],'ES') # Broadcast the excluded var list to each processor #if opts_dict['mpi_enable']: # ex_varlist=me.partition(ex_varlist,func=Duplicate(),involved=True) # Broadcast the excluded var list to each processor if opts_dict['mpi_enable']: exclude = me.partition(exclude, func=Duplicate(), involved=True) if exclude: ex_varlist = me.partition(ex_varlist, func=Duplicate(), involved=True) else: inc_varlist = me.partition(inc_varlist, func=Duplicate(), involved=True) in_files = [] if (os.path.exists(input_dir)): # Get the list of files in_files_temp = os.listdir(input_dir) in_files = sorted(in_files_temp) # Make sure we have enough num_files = len(in_files) if me.get_rank() == 0 and (verbose == True): print 'Number of files in input directory = ', num_files if (num_files < esize): if me.get_rank() == 0 and (verbose == True): print 'Number of files in input directory (',num_files,\ ') is less than specified ensemble size of ', esize sys.exit(2) if (num_files > esize): if me.get_rank() == 0 and (verbose == True): print 'NOTE: Number of files in ', input_dir, \ 'is greater than specified ensemble size of ', esize ,\ '\nwill just use the first ', esize, 'files' else: if me.get_rank() == 0: print 'Input directory: ', input_dir, ' not found' sys.exit(2) if opts_dict['cumul']: if opts_dict['regx']: in_files_list = get_cumul_filelist(opts_dict, opts_dict['indir'], opts_dict['regx']) in_files = me.partition(in_files_list, func=EqualLength(), involved=True) if me.get_rank() == 0 and (verbose == True): print 'in_files=', in_files # Open the files in the input directory o_files = [] if me.get_rank() == 0 and opts_dict['verbose']: print 'Input files are: ' print "\n".join(in_files) #for i in in_files: # print "in_files =",i for onefile in in_files[0:esize]: if (os.path.isfile(input_dir + '/' + onefile)): o_files.append(Nio.open_file(input_dir + '/' + onefile, "r")) else: if me.get_rank() == 0: print "COULD NOT LOCATE FILE " + input_dir + onefile + "! EXITING...." sys.exit() # Store dimensions of the input fields if me.get_rank() == 0 and (verbose == True): print "Getting spatial dimensions" nlev = -1 nilev = -1 ncol = -1 nlat = -1 nlon = -1 lonkey = '' latkey = '' # Look at first file and get dims input_dims = o_files[0].dimensions ndims = len(input_dims) for key in input_dims: if key == "lev": nlev = input_dims["lev"] elif key == "ilev": nilev = input_dims["ilev"] elif key == "ncol": ncol = input_dims["ncol"] elif (key == "nlon") or (key == "lon"): nlon = input_dims[key] lonkey = key elif (key == "nlat") or (key == "lat"): nlat = input_dims[key] latkey = key if (nlev == -1): if me.get_rank() == 0: print "COULD NOT LOCATE valid dimension lev => EXITING...." sys.exit() if ((ncol == -1) and ((nlat == -1) or (nlon == -1))): if me.get_rank() == 0: print "Need either lat/lon or ncol => EXITING...." sys.exit() # Check if this is SE or FV data if (ncol != -1): is_SE = True else: is_SE = False # Make sure all files have the same dimensions if me.get_rank() == 0 and (verbose == True): print "Checking dimensions across files...." print 'lev = ', nlev if (is_SE == True): print 'ncol = ', ncol else: print 'nlat = ', nlat print 'nlon = ', nlon for count, this_file in enumerate(o_files): input_dims = this_file.dimensions if (is_SE == True): if (nlev != int(input_dims["lev"]) or (ncol != int(input_dims["ncol"]))): if me.get_rank() == 0: print "Dimension mismatch between ", in_files[ 0], 'and', in_files[0], '!!!' sys.exit() else: if ( nlev != int(input_dims["lev"]) or ( nlat != int(input_dims[latkey]))\ or ( nlon != int(input_dims[lonkey]))): if me.get_rank() == 0: print "Dimension mismatch between ", in_files[ 0], 'and', in_files[0], '!!!' sys.exit() # Get 2d vars, 3d vars and all vars (For now include all variables) vars_dict_all = o_files[0].variables # Remove the excluded variables (specified in json file) from variable dictionary #print len(vars_dict_all) if exclude: vars_dict = vars_dict_all for i in ex_varlist: if i in vars_dict: del vars_dict[i] #Given an included var list, remove all float var that are not on the list else: vars_dict = vars_dict_all.copy() for k, v in vars_dict_all.iteritems(): if (k not in inc_varlist) and (vars_dict_all[k].typecode() == 'f'): #print vars_dict_all[k].typecode() #print k del vars_dict[k] num_vars = len(vars_dict) #print num_vars #if me.get_rank() == 0: # for k,v in vars_dict.iteritems(): # print 'vars_dict',k,vars_dict[k].typecode() str_size = 0 d2_var_names = [] d3_var_names = [] num_2d = 0 num_3d = 0 # Which are 2d, which are 3d and max str_size for k, v in vars_dict.iteritems(): var = k vd = v.dimensions # all the variable's dimensions (names) vr = v.rank # num dimension vs = v.shape # dim values is_2d = False is_3d = False if (is_SE == True): # (time, lev, ncol) or (time, ncol) if ((vr == 2) and (vs[1] == ncol)): is_2d = True num_2d += 1 elif ((vr == 3) and (vs[2] == ncol and vs[1] == nlev)): is_3d = True num_3d += 1 else: # (time, lev, nlon, nlon) or (time, nlat, nlon) if ((vr == 3) and (vs[1] == nlat and vs[2] == nlon)): is_2d = True num_2d += 1 elif ((vr == 4) and (vs[2] == nlat and vs[3] == nlon and (vs[1] == nlev or vs[1] == nilev))): is_3d = True num_3d += 1 if (is_3d == True): str_size = max(str_size, len(k)) d3_var_names.append(k) elif (is_2d == True): str_size = max(str_size, len(k)) d2_var_names.append(k) #else: # print 'var=',k if me.get_rank() == 0 and (verbose == True): print 'Number of variables found: ', num_3d + num_2d print '3D variables: ' + str(num_3d) + ', 2D variables: ' + str(num_2d) # Now sort these and combine (this sorts caps first, then lower case - # which is what we want) d2_var_names.sort() d3_var_names.sort() if esize < num_2d + num_3d: if me.get_rank() == 0: print "************************************************************************************************************************************" print " Error: the total number of 3D and 2D variables " + str( num_2d + num_3d ) + " is larger than the number of ensemble files " + str(esize) print " Cannot generate ensemble summary file, please remove more variables from your included variable list," print " or add more varaibles in your excluded variable list!!!" print "************************************************************************************************************************************" sys.exit() # All vars is 3d vars first (sorted), the 2d vars all_var_names = list(d3_var_names) all_var_names += d2_var_names n_all_var_names = len(all_var_names) #if me.get_rank() == 0 and (verbose == True): # print 'num vars = ', n_all_var_names, '(3d = ', num_3d, ' and 2d = ', num_2d, ")" # Create new summary ensemble file this_sumfile = opts_dict["sumfile"] if me.get_rank() == 0 and (verbose == True): print "Creating ", this_sumfile, " ..." if (me.get_rank() == 0 | opts_dict["popens"]): if os.path.exists(this_sumfile): os.unlink(this_sumfile) opt = Nio.options() opt.PreFill = False opt.Format = 'NetCDF4Classic' nc_sumfile = Nio.open_file(this_sumfile, 'w', options=opt) # Set dimensions if me.get_rank() == 0 and (verbose == True): print "Setting dimensions ....." if (is_SE == True): nc_sumfile.create_dimension('ncol', ncol) else: nc_sumfile.create_dimension('nlat', nlat) nc_sumfile.create_dimension('nlon', nlon) nc_sumfile.create_dimension('nlev', nlev) nc_sumfile.create_dimension('ens_size', esize) nc_sumfile.create_dimension('nvars', num_3d + num_2d) nc_sumfile.create_dimension('nvars3d', num_3d) nc_sumfile.create_dimension('nvars2d', num_2d) nc_sumfile.create_dimension('str_size', str_size) # Set global attributes now = time.strftime("%c") if me.get_rank() == 0 and (verbose == True): print "Setting global attributes ....." setattr(nc_sumfile, 'creation_date', now) setattr(nc_sumfile, 'title', 'CAM verification ensemble summary file') setattr(nc_sumfile, 'tag', opts_dict["tag"]) setattr(nc_sumfile, 'compset', opts_dict["compset"]) setattr(nc_sumfile, 'resolution', opts_dict["res"]) setattr(nc_sumfile, 'machine', opts_dict["mach"]) # Create variables if me.get_rank() == 0 and (verbose == True): print "Creating variables ....." v_lev = nc_sumfile.create_variable("lev", 'f', ('nlev', )) v_vars = nc_sumfile.create_variable("vars", 'S1', ('nvars', 'str_size')) v_var3d = nc_sumfile.create_variable("var3d", 'S1', ('nvars3d', 'str_size')) v_var2d = nc_sumfile.create_variable("var2d", 'S1', ('nvars2d', 'str_size')) if not opts_dict['gmonly']: if (is_SE == True): v_ens_avg3d = nc_sumfile.create_variable( "ens_avg3d", 'f', ('nvars3d', 'nlev', 'ncol')) v_ens_stddev3d = nc_sumfile.create_variable( "ens_stddev3d", 'f', ('nvars3d', 'nlev', 'ncol')) v_ens_avg2d = nc_sumfile.create_variable( "ens_avg2d", 'f', ('nvars2d', 'ncol')) v_ens_stddev2d = nc_sumfile.create_variable( "ens_stddev2d", 'f', ('nvars2d', 'ncol')) else: v_ens_avg3d = nc_sumfile.create_variable( "ens_avg3d", 'f', ('nvars3d', 'nlev', 'nlat', 'nlon')) v_ens_stddev3d = nc_sumfile.create_variable( "ens_stddev3d", 'f', ('nvars3d', 'nlev', 'nlat', 'nlon')) v_ens_avg2d = nc_sumfile.create_variable( "ens_avg2d", 'f', ('nvars2d', 'nlat', 'nlon')) v_ens_stddev2d = nc_sumfile.create_variable( "ens_stddev2d", 'f', ('nvars2d', 'nlat', 'nlon')) v_RMSZ = nc_sumfile.create_variable("RMSZ", 'f', ('nvars', 'ens_size')) v_gm = nc_sumfile.create_variable("global_mean", 'f', ('nvars', 'ens_size')) v_standardized_gm = nc_sumfile.create_variable("standardized_gm", 'f', ('nvars', 'ens_size')) v_loadings_gm = nc_sumfile.create_variable('loadings_gm', 'f', ('nvars', 'nvars')) v_mu_gm = nc_sumfile.create_variable('mu_gm', 'f', ('nvars', )) v_sigma_gm = nc_sumfile.create_variable('sigma_gm', 'f', ('nvars', )) v_sigma_scores_gm = nc_sumfile.create_variable('sigma_scores_gm', 'f', ('nvars', )) # Assign vars, var3d and var2d if me.get_rank() == 0 and (verbose == True): print "Assigning vars, var3d, and var2d ....." eq_all_var_names = [] eq_d3_var_names = [] eq_d2_var_names = [] l_eq = len(all_var_names) for i in range(l_eq): tt = list(all_var_names[i]) l_tt = len(tt) if (l_tt < str_size): extra = list(' ') * (str_size - l_tt) tt.extend(extra) eq_all_var_names.append(tt) l_eq = len(d3_var_names) for i in range(l_eq): tt = list(d3_var_names[i]) l_tt = len(tt) if (l_tt < str_size): extra = list(' ') * (str_size - l_tt) tt.extend(extra) eq_d3_var_names.append(tt) l_eq = len(d2_var_names) for i in range(l_eq): tt = list(d2_var_names[i]) l_tt = len(tt) if (l_tt < str_size): extra = list(' ') * (str_size - l_tt) tt.extend(extra) eq_d2_var_names.append(tt) v_vars[:] = eq_all_var_names[:] v_var3d[:] = eq_d3_var_names[:] v_var2d[:] = eq_d2_var_names[:] # Time-invarient metadata if me.get_rank() == 0 and (verbose == True): print "Assigning time invariant metadata ....." lev_data = vars_dict["lev"] v_lev = lev_data # Form ensembles, each missing one member; compute RMSZs and global means #for each variable, we also do max norm also (currently done in pyStats) tslice = opts_dict['tslice'] if not opts_dict['cumul']: # Partition the var list var3_list_loc = me.partition(d3_var_names, func=EqualStride(), involved=True) var2_list_loc = me.partition(d2_var_names, func=EqualStride(), involved=True) else: var3_list_loc = d3_var_names var2_list_loc = d2_var_names # Calculate global means # if me.get_rank() == 0 and (verbose == True): print "Calculating global means ....." if not opts_dict['cumul']: gm3d, gm2d, var_list = pyEnsLib.generate_global_mean_for_summary( o_files, var3_list_loc, var2_list_loc, is_SE, False, opts_dict) if me.get_rank() == 0 and (verbose == True): print "Finish calculating global means ....." # Calculate RMSZ scores if (not opts_dict['gmonly']) | (opts_dict['cumul']): if me.get_rank() == 0 and (verbose == True): print "Calculating RMSZ scores ....." zscore3d, zscore2d, ens_avg3d, ens_stddev3d, ens_avg2d, ens_stddev2d, temp1, temp2 = pyEnsLib.calc_rmsz( o_files, var3_list_loc, var2_list_loc, is_SE, opts_dict) # Calculate max norm ensemble if opts_dict['maxnorm']: if me.get_rank() == 0 and (verbose == True): print "Calculating max norm of ensembles ....." pyEnsLib.calculate_maxnormens(opts_dict, var3_list_loc) pyEnsLib.calculate_maxnormens(opts_dict, var2_list_loc) if opts_dict['mpi_enable'] & (not opts_dict['popens']): if not opts_dict['cumul']: # Gather the 3d variable results from all processors to the master processor slice_index = get_stride_list(len(d3_var_names), me) # Gather global means 3d results gm3d = gather_npArray(gm3d, me, slice_index, (len(d3_var_names), len(o_files))) if not opts_dict['gmonly']: # Gather zscore3d results zscore3d = gather_npArray(zscore3d, me, slice_index, (len(d3_var_names), len(o_files))) # Gather ens_avg3d and ens_stddev3d results shape_tuple3d = get_shape(ens_avg3d.shape, len(d3_var_names), me.get_rank()) ens_avg3d = gather_npArray(ens_avg3d, me, slice_index, shape_tuple3d) ens_stddev3d = gather_npArray(ens_stddev3d, me, slice_index, shape_tuple3d) # Gather 2d variable results from all processors to the master processor slice_index = get_stride_list(len(d2_var_names), me) # Gather global means 2d results gm2d = gather_npArray(gm2d, me, slice_index, (len(d2_var_names), len(o_files))) var_list = gather_list(var_list, me) if not opts_dict['gmonly']: # Gather zscore2d results zscore2d = gather_npArray(zscore2d, me, slice_index, (len(d2_var_names), len(o_files))) # Gather ens_avg3d and ens_stddev2d results shape_tuple2d = get_shape(ens_avg2d.shape, len(d2_var_names), me.get_rank()) ens_avg2d = gather_npArray(ens_avg2d, me, slice_index, shape_tuple2d) ens_stddev2d = gather_npArray(ens_stddev2d, me, slice_index, shape_tuple2d) else: gmall = np.concatenate((temp1, temp2), axis=0) gmall = pyEnsLib.gather_npArray_pop( gmall, me, (me.get_size(), len(d3_var_names) + len(d2_var_names))) # Assign to file: if me.get_rank() == 0 | opts_dict['popens']: if not opts_dict['cumul']: gmall = np.concatenate((gm3d, gm2d), axis=0) if not opts_dict['gmonly']: Zscoreall = np.concatenate((zscore3d, zscore2d), axis=0) v_RMSZ[:, :] = Zscoreall[:, :] if not opts_dict['gmonly']: if (is_SE == True): v_ens_avg3d[:, :, :] = ens_avg3d[:, :, :] v_ens_stddev3d[:, :, :] = ens_stddev3d[:, :, :] v_ens_avg2d[:, :] = ens_avg2d[:, :] v_ens_stddev2d[:, :] = ens_stddev2d[:, :] else: v_ens_avg3d[:, :, :, :] = ens_avg3d[:, :, :, :] v_ens_stddev3d[:, :, :, :] = ens_stddev3d[:, :, :, :] v_ens_avg2d[:, :, :] = ens_avg2d[:, :, :] v_ens_stddev2d[:, :, :] = ens_stddev2d[:, :, :] else: gmall_temp = np.transpose(gmall[:, :]) gmall = gmall_temp mu_gm, sigma_gm, standardized_global_mean, loadings_gm, scores_gm = pyEnsLib.pre_PCA( gmall, all_var_names, var_list, me) v_gm[:, :] = gmall[:, :] v_standardized_gm[:, :] = standardized_global_mean[:, :] v_mu_gm[:] = mu_gm[:] v_sigma_gm[:] = sigma_gm[:].astype(np.float32) v_loadings_gm[:, :] = loadings_gm[:, :] v_sigma_scores_gm[:] = scores_gm[:] if me.get_rank() == 0: print "All Done"
def _inspect_input_files(self): """ Inspect the input data files themselves. We check the file contents here, which means opening and reading heading information from the files. """ # Set the I/O backend according to what is specified iobackend.set_backend(self._backend) # Initialize the list of variable names for each category udim = None timeta = [] xtra_timeta = [] tvmeta = [] # Initialize the local dictionary of time-series variables and sizes all_tsvars = {} file_times = {} #===== INSPECT FIRST INPUT FILE (ON MASTER PROCESS ONLY) ===== # Open first file if self._simplecomm.is_manager(): ifile = iobackend.NCFile(self._input_filenames[0]) # Look for the 'unlimited' dimension try: udim = next( dim for dim in ifile.dimensions if ifile.unlimited(dim)) except StopIteration: err_msg = 'Unlimited dimension not found.' raise LookupError(err_msg) # Get the first file's time values file_times[self._input_filenames[0]] = ifile.variables[udim][:] # Categorize each variable (only looking at first file) for var_name, var in ifile.variables.iteritems(): if udim not in var.dimensions: if var_name not in self._exclude_list: timeta.append(var_name) elif var_name in self._metadata_names or (self._1d_metadata and len(var.dimensions) == 1): tvmeta.append(var_name) elif self._time_series_names is None or var_name in self._time_series_names: all_tsvars[var_name] = var.datatype.itemsize * var.size # Close the first file ifile.close() # Find variables only in the metadata file if self._metadata_filename is not None: ifile = iobackend.NCFile(self._metadata_filename) for var_name, var in ifile.variables.iteritems(): if udim not in var.dimensions and var_name not in timeta: xtra_timeta.append(var_name) ifile.close() self._simplecomm.sync() # Send information to worker processes self._unlimited_dim = self._simplecomm.partition( udim, func=Duplicate(), involved=True) self._time_invariant_metadata = self._simplecomm.partition( timeta, func=Duplicate(), involved=True) self._time_invariant_metafile_vars = self._simplecomm.partition( xtra_timeta, func=Duplicate(), involved=True) self._time_variant_metadata = self._simplecomm.partition( tvmeta, func=Duplicate(), involved=True) all_tsvars = self._simplecomm.partition( all_tsvars, func=Duplicate(), involved=True) self._simplecomm.sync() if self._simplecomm.is_manager(): self._vprint(' First input file inspected.', verbosity=2) #===== INSPECT REMAINING INPUT FILES (IN PARALLEL) ===== # Get the list of variable names and missing variables var_names = set( all_tsvars.keys() + self._time_invariant_metadata + self._time_invariant_metafile_vars + self._time_variant_metadata) missing_vars = set() # Partition the remaining filenames to inspect input_filenames = self._simplecomm.partition( self._input_filenames[1:], func=EqualStride(), involved=True) # Make a pass through remaining files and: # (1) Make sure it has the 'unlimited' dimension # (2) Make sure this dimension is truely 'unlimited' # (3) Check that this dimension has a corresponding variable # (4) Check if there are any missing variables # (5) Get the time values from the files for ifilename in input_filenames: ifile = iobackend.NCFile(ifilename) # Determine the unlimited dimension if self._unlimited_dim not in ifile.dimensions: err_msg = 'Unlimited dimension not found in file "{0}"'.format( ifilename) raise LookupError(err_msg) if not ifile.unlimited(self._unlimited_dim): err_msg = 'Dimension "{0}" not unlimited in file "{1}"'.format( self._unlimited_dim, ifilename) raise LookupError(err_msg) if self._unlimited_dim not in ifile.variables: err_msg = 'Unlimited dimension variable not found in file "{0}"'.format( ifilename) raise LookupError(err_msg) # Get the time values (list of NDArrays) file_times[ifilename] = ifile.variables[self._unlimited_dim][:] # Get the missing variables var_names_next = set(ifile.variables.keys()) missing_vars.update(var_names - var_names_next) # Close the file ifile.close() self._simplecomm.sync() if self._simplecomm.is_manager(): self._vprint(' Remaining input files inspected.', verbosity=2) #===== CHECK FOR MISSING VARIABLES ===== # Gather all missing variables on the master process if self._simplecomm.get_size() > 1: if self._simplecomm.is_manager(): for _ in range(1, self._simplecomm.get_size()): missing_vars.update(self._simplecomm.collect()[1]) else: self._simplecomm.collect(missing_vars) self._simplecomm.sync() # Check for missing variables only on master process if self._simplecomm.is_manager(): # Remove metafile variables from missing vars set missing_vars -= set(self._time_invariant_metafile_vars) # Make sure that the list of variables in each file is the same if len(missing_vars) != 0: warning = ("WARNING: Some variables are not in all input files:{0} " "{1}").format(linesep, ', '.join(sorted(missing_vars))) self._vprint(warning, header=False, verbosity=0) self._vprint(' Checked for missing variables.', verbosity=2) #===== SORT INPUT FILES BY TIME ===== # Gather the file time values onto the master process if self._simplecomm.get_size() > 1: if self._simplecomm.is_manager(): for _ in range(1, self._simplecomm.get_size()): file_times.update(self._simplecomm.collect()[1]) else: self._simplecomm.collect(file_times) self._simplecomm.sync() # Check the order of the input files based on the time values if self._simplecomm.is_manager(): # Determine the sort order based on the first time in the time # values old_order = range(len(self._input_filenames)) new_order = sorted( old_order, key=lambda i: file_times[self._input_filenames[i]][0]) # Re-order the list of input filenames and time values new_filenames = [self._input_filenames[i] for i in new_order] new_values = [file_times[self._input_filenames[i]] for i in new_order] # Now, check that the largest time in each file is less than the smallest time # in the next file (so that the time spans of each file do not # overlap) for i in xrange(1, len(new_values)): if new_values[i - 1][-1] >= new_values[i][0]: err_msg = ('Times in input files {0} and {1} appear to ' 'overlap').format(new_filenames[i - 1], new_filenames[i]) raise ValueError(err_msg) else: new_filenames = None # Now that this is validated, save the time values and filename in the # new order self._input_filenames = self._simplecomm.partition( new_filenames, func=Duplicate(), involved=True) if self._simplecomm.is_manager(): self._vprint(' Input files sorted by time.', verbosity=2) #===== FINALIZING OUTPUT ===== self._simplecomm.sync() # Debug output if self._simplecomm.is_manager(): self._vprint(' Time-Invariant Metadata: {0}'.format( ', '.join(self._time_invariant_metadata)), verbosity=1) if len(self._time_invariant_metafile_vars) > 0: self._vprint(' Additional Time-Invariant Metadata: {0}'.format( ', '.join(self._time_invariant_metafile_vars)), verbosity=1) self._vprint(' Time-Variant Metadata: {0}'.format( ', '.join(self._time_variant_metadata)), verbosity=1) self._vprint( ' Time-Series Variables: {0}'.format(', '.join(all_tsvars.keys())), verbosity=1) # Add 'once' variable if writing to a once file # NOTE: This is a "cheat"! There is no 'once' variable. It's just # a catch for all metadata IFF the 'once-file' is enabled. if self._use_once_file: all_tsvars['once'] = max(all_tsvars.values()) # Partition the time-series variables across processors self._time_series_variables = self._simplecomm.partition( all_tsvars.items(), func=WeightBalanced(), involved=True)
def main(argv): print('Running pyEnsSum!') # Get command line stuff and store in a dictionary s = 'tag= compset= esize= tslice= res= sumfile= indir= sumfiledir= mach= verbose jsonfile= mpi_enable maxnorm gmonly popens cumul regx= startMon= endMon= fIndex=' optkeys = s.split() try: opts, args = getopt.getopt(argv, "h", optkeys) except getopt.GetoptError: pyEnsLib.EnsSum_usage() sys.exit(2) # Put command line options in a dictionary - also set defaults opts_dict={} # Defaults opts_dict['tag'] = '' opts_dict['compset'] = '' opts_dict['mach'] = '' opts_dict['esize'] = 151 opts_dict['tslice'] = 0 opts_dict['res'] = '' opts_dict['sumfile'] = 'ens.summary.nc' opts_dict['indir'] = './' opts_dict['sumfiledir'] = './' opts_dict['jsonfile'] = '' opts_dict['verbose'] = True opts_dict['mpi_enable'] = False opts_dict['maxnorm'] = False opts_dict['gmonly'] = False opts_dict['popens'] = False opts_dict['cumul'] = False opts_dict['regx'] = 'test' opts_dict['startMon'] = 1 opts_dict['endMon'] = 1 opts_dict['fIndex'] = 151 # This creates the dictionary of input arguments opts_dict = pyEnsLib.getopt_parseconfig(opts,optkeys,'ES',opts_dict) verbose = opts_dict['verbose'] st = opts_dict['esize'] esize = int(st) if (verbose == True): print(opts_dict) print('Ensemble size for summary = ', esize) if not (opts_dict['tag'] and opts_dict['compset'] and opts_dict['mach'] or opts_dict['res']): print('Please specify --tag, --compset, --mach and --res options') sys.exit() # Now find file names in indir input_dir = opts_dict['indir'] # The var list that will be excluded ex_varlist=[] # Create a mpi simplecomm object if opts_dict['mpi_enable']: me=simplecomm.create_comm() else: me=simplecomm.create_comm(not opts_dict['mpi_enable']) if me.get_rank() == 0: if opts_dict['jsonfile']: # Read in the excluded var list ex_varlist=pyEnsLib.read_jsonlist(opts_dict['jsonfile'],'ES') # Broadcast the excluded var list to each processor if opts_dict['mpi_enable']: ex_varlist=me.partition(ex_varlist,func=Duplicate(),involved=True) in_files=[] if(os.path.exists(input_dir)): # Get the list of files in_files_temp = os.listdir(input_dir) in_files=sorted(in_files_temp) #print in_files # Make sure we have enough num_files = len(in_files) if (verbose == True): print('Number of files in input directory = ', num_files) if (num_files < esize): print('Number of files in input directory (',num_files, ') is less than specified ensemble size of ', esize) sys.exit(2) if (num_files > esize): print('NOTE: Number of files in ', input_dir, 'is greater than specified ensemble size of ', esize, '\nwill just use the first ', esize, 'files') else: print('Input directory: ',input_dir,' not found') sys.exit(2) if opts_dict['cumul']: if opts_dict['regx']: in_files_list=get_cumul_filelist(opts_dict,opts_dict['indir'],opts_dict['regx']) in_files=me.partition(in_files_list,func=EqualLength(),involved=True) if me.get_rank()==0: print('in_files=',in_files) # Open the files in the input directory o_files=[] for onefile in in_files[0:esize]: if (os.path.isfile(input_dir+'/' + onefile)): o_files.append(Nio.open_file(input_dir+'/' + onefile,"r")) else: print("COULD NOT LOCATE FILE "+ input_dir + onefile + "! EXITING....") sys.exit() # Store dimensions of the input fields if (verbose == True): print("Getting spatial dimensions") nlev = -1 ncol = -1 nlat = -1 nlon = -1 lonkey='' latkey='' # Look at first file and get dims input_dims = o_files[0].dimensions ndims = len(input_dims) for key in input_dims: if key == "lev": nlev = input_dims["lev"] elif key == "ncol": ncol = input_dims["ncol"] elif (key == "nlon") or (key =="lon"): nlon = input_dims[key] lonkey=key elif (key == "nlat") or (key == "lat"): nlat = input_dims[key] latkey=key if (nlev == -1) : print("COULD NOT LOCATE valid dimension lev => EXITING....") sys.exit() if (( ncol == -1) and ((nlat == -1) or (nlon == -1))): print("Need either lat/lon or ncol => EXITING....") sys.exit() # Check if this is SE or FV data if (ncol != -1): is_SE = True else: is_SE = False # Make sure all files have the same dimensions if (verbose == True): print("Checking dimensions across files....") print('lev = ', nlev) if (is_SE == True): print('ncol = ', ncol) else: print('nlat = ', nlat) print('nlon = ', nlon) for count, this_file in enumerate(o_files): input_dims = this_file.dimensions if (is_SE == True): if ( nlev != int(input_dims["lev"]) or ( ncol != int(input_dims["ncol"]))): print("Dimension mismatch between ", in_files[0], 'and', in_files[0], '!!!') sys.exit() else: if ( nlev != int(input_dims["lev"]) or ( nlat != int(input_dims[latkey]))\ or ( nlon != int(input_dims[lonkey]))): print("Dimension mismatch between ", in_files[0], 'and', in_files[0], '!!!') sys.exit() # Get 2d vars, 3d vars and all vars (For now include all variables) vars_dict = o_files[0].variables # Remove the excluded variables (specified in json file) from variable dictionary if ex_varlist: for i in ex_varlist: if i in vars_dict: del vars_dict[i] num_vars = len(vars_dict) if (verbose == True): print('Number of variables (including metadata) found = ', num_vars) str_size = 0 d2_var_names = [] d3_var_names = [] num_2d = 0 num_3d = 0 # Which are 2d, which are 3d and max str_size for k,v in vars_dict.iteritems(): var = k vd = v.dimensions # all the variable's dimensions (names) vr = v.rank # num dimension vs = v.shape # dim values is_2d = False is_3d = False if (is_SE == True): # (time, lev, ncol) or (time, ncol) if ((vr == 2) and (vs[1] == ncol)): is_2d = True num_2d += 1 elif ((vr == 3) and (vs[2] == ncol and vs[1] == nlev )): is_3d = True num_3d += 1 else: # (time, lev, nlon, nlon) or (time, nlat, nlon) if ((vr == 3) and (vs[1] == nlat and vs[2] == nlon)): is_2d = True num_2d += 1 elif ((vr == 4) and (vs[2] == nlat and vs[3] == nlon and vs[1] == nlev )): is_3d = True num_3d += 1 if (is_3d == True) : str_size = max(str_size, len(k)) d3_var_names.append(k) elif (is_2d == True): str_size = max(str_size, len(k)) d2_var_names.append(k) # Now sort these and combine (this sorts caps first, then lower case - # which is what we want) d2_var_names.sort() d3_var_names.sort() # All vars is 3d vars first (sorted), the 2d vars all_var_names = list(d3_var_names) all_var_names += d2_var_names n_all_var_names = len(all_var_names) if (verbose == True): print('num vars = ', n_all_var_names, '(3d = ', num_3d, ' and 2d = ', num_2d, ")") # Create new summary ensemble file this_sumfile = opts_dict["sumfile"] if (verbose == True): print("Creating ", this_sumfile, " ...") if(me.get_rank() ==0 | opts_dict["popens"]): if os.path.exists(this_sumfile): os.unlink(this_sumfile) opt = Nio.options() opt.PreFill = False opt.Format = 'NetCDF4Classic' nc_sumfile = Nio.open_file(this_sumfile, 'w', options=opt) # Set dimensions if (verbose == True): print("Setting dimensions .....") if (is_SE == True): nc_sumfile.create_dimension('ncol', ncol) else: nc_sumfile.create_dimension('nlat', nlat) nc_sumfile.create_dimension('nlon', nlon) nc_sumfile.create_dimension('nlev', nlev) nc_sumfile.create_dimension('ens_size', esize) nc_sumfile.create_dimension('nvars', num_3d + num_2d) nc_sumfile.create_dimension('nvars3d', num_3d) nc_sumfile.create_dimension('nvars2d', num_2d) nc_sumfile.create_dimension('str_size', str_size) # Set global attributes now = time.strftime("%c") if (verbose == True): print("Setting global attributes .....") setattr(nc_sumfile, 'creation_date',now) setattr(nc_sumfile, 'title', 'CAM verification ensemble summary file') setattr(nc_sumfile, 'tag', opts_dict["tag"]) setattr(nc_sumfile, 'compset', opts_dict["compset"]) setattr(nc_sumfile, 'resolution', opts_dict["res"]) setattr(nc_sumfile, 'machine', opts_dict["mach"]) # Create variables if (verbose == True): print("Creating variables .....") v_lev = nc_sumfile.create_variable("lev", 'f', ('nlev',)) v_vars = nc_sumfile.create_variable("vars", 'S1', ('nvars', 'str_size')) v_var3d = nc_sumfile.create_variable("var3d", 'S1', ('nvars3d', 'str_size')) v_var2d = nc_sumfile.create_variable("var2d", 'S1', ('nvars2d', 'str_size')) if not opts_dict['gmonly']: if (is_SE == True): v_ens_avg3d = nc_sumfile.create_variable("ens_avg3d", 'f', ('nvars3d', 'nlev', 'ncol')) v_ens_stddev3d = nc_sumfile.create_variable("ens_stddev3d", 'f', ('nvars3d', 'nlev', 'ncol')) v_ens_avg2d = nc_sumfile.create_variable("ens_avg2d", 'f', ('nvars2d', 'ncol')) v_ens_stddev2d = nc_sumfile.create_variable("ens_stddev2d", 'f', ('nvars2d', 'ncol')) else: v_ens_avg3d = nc_sumfile.create_variable("ens_avg3d", 'f', ('nvars3d', 'nlev', 'nlat', 'nlon')) v_ens_stddev3d = nc_sumfile.create_variable("ens_stddev3d", 'f', ('nvars3d', 'nlev', 'nlat', 'nlon')) v_ens_avg2d = nc_sumfile.create_variable("ens_avg2d", 'f', ('nvars2d', 'nlat', 'nlon')) v_ens_stddev2d = nc_sumfile.create_variable("ens_stddev2d", 'f', ('nvars2d', 'nlat', 'nlon')) v_RMSZ = nc_sumfile.create_variable("RMSZ", 'f', ('nvars', 'ens_size')) v_gm = nc_sumfile.create_variable("global_mean", 'f', ('nvars', 'ens_size')) v_loadings_gm = nc_sumfile.create_variable('loadings_gm','f',('nvars','nvars')) v_mu_gm = nc_sumfile.create_variable('mu_gm','f',('nvars',)) v_sigma_gm = nc_sumfile.create_variable('sigma_gm','f',('nvars',)) v_sigma_scores_gm = nc_sumfile.create_variable('sigma_scores_gm','f',('nvars',)) # Assign vars, var3d and var2d if (verbose == True): print("Assigning vars, var3d, and var2d .....") eq_all_var_names =[] eq_d3_var_names = [] eq_d2_var_names = [] l_eq = len(all_var_names) for i in range(l_eq): tt = list(all_var_names[i]) l_tt = len(tt) if (l_tt < str_size): extra = list(' ')*(str_size - l_tt) tt.extend(extra) eq_all_var_names.append(tt) l_eq = len(d3_var_names) for i in range(l_eq): tt = list(d3_var_names[i]) l_tt = len(tt) if (l_tt < str_size): extra = list(' ')*(str_size - l_tt) tt.extend(extra) eq_d3_var_names.append(tt) l_eq = len(d2_var_names) for i in range(l_eq): tt = list(d2_var_names[i]) l_tt = len(tt) if (l_tt < str_size): extra = list(' ')*(str_size - l_tt) tt.extend(extra) eq_d2_var_names.append(tt) v_vars[:] = eq_all_var_names[:] v_var3d[:] = eq_d3_var_names[:] v_var2d[:] = eq_d2_var_names[:] # Time-invarient metadata if (verbose == True): print("Assigning time invariant metadata .....") lev_data = vars_dict["lev"] v_lev = lev_data # Form ensembles, each missing one member; compute RMSZs and global means #for each variable, we also do max norm also (currently done in pyStats) tslice = opts_dict['tslice'] if not opts_dict['cumul']: # Partition the var list var3_list_loc=me.partition(d3_var_names,func=EqualStride(),involved=True) var2_list_loc=me.partition(d2_var_names,func=EqualStride(),involved=True) else: var3_list_loc=d3_var_names var2_list_loc=d2_var_names # Calculate global means # if (verbose == True): print("Calculating global means .....") if not opts_dict['cumul']: gm3d,gm2d = pyEnsLib.generate_global_mean_for_summary(o_files,var3_list_loc,var2_list_loc , is_SE, False,opts_dict) if (verbose == True): print("Finish calculating global means .....") # Calculate RMSZ scores if (verbose == True): print("Calculating RMSZ scores .....") if (not opts_dict['gmonly']) | (opts_dict['cumul']): zscore3d,zscore2d,ens_avg3d,ens_stddev3d,ens_avg2d,ens_stddev2d,temp1,temp2=pyEnsLib.calc_rmsz(o_files,var3_list_loc,var2_list_loc,is_SE,opts_dict) # Calculate max norm ensemble if opts_dict['maxnorm']: if (verbose == True): print("Calculating max norm of ensembles .....") pyEnsLib.calculate_maxnormens(opts_dict,var3_list_loc) pyEnsLib.calculate_maxnormens(opts_dict,var2_list_loc) if opts_dict['mpi_enable'] & ( not opts_dict['popens']): if not opts_dict['cumul']: # Gather the 3d variable results from all processors to the master processor slice_index=get_stride_list(len(d3_var_names),me) # Gather global means 3d results gm3d=gather_npArray(gm3d,me,slice_index,(len(d3_var_names),len(o_files))) if not opts_dict['gmonly']: # Gather zscore3d results zscore3d=gather_npArray(zscore3d,me,slice_index,(len(d3_var_names),len(o_files))) # Gather ens_avg3d and ens_stddev3d results shape_tuple3d=get_shape(ens_avg3d.shape,len(d3_var_names),me.get_rank()) ens_avg3d=gather_npArray(ens_avg3d,me,slice_index,shape_tuple3d) ens_stddev3d=gather_npArray(ens_stddev3d,me,slice_index,shape_tuple3d) # Gather 2d variable results from all processors to the master processor slice_index=get_stride_list(len(d2_var_names),me) # Gather global means 2d results gm2d=gather_npArray(gm2d,me,slice_index,(len(d2_var_names),len(o_files))) if not opts_dict['gmonly']: # Gather zscore2d results zscore2d=gather_npArray(zscore2d,me,slice_index,(len(d2_var_names),len(o_files))) # Gather ens_avg3d and ens_stddev2d results shape_tuple2d=get_shape(ens_avg2d.shape,len(d2_var_names),me.get_rank()) ens_avg2d=gather_npArray(ens_avg2d,me,slice_index,shape_tuple2d) ens_stddev2d=gather_npArray(ens_stddev2d,me,slice_index,shape_tuple2d) else: gmall=np.concatenate((temp1,temp2),axis=0) gmall=pyEnsLib.gather_npArray_pop(gmall,me,(me.get_size(),len(d3_var_names)+len(d2_var_names))) # Assign to file: if me.get_rank() == 0 | opts_dict['popens'] : if not opts_dict['cumul']: gmall=np.concatenate((gm3d,gm2d),axis=0) if not opts_dict['gmonly']: Zscoreall=np.concatenate((zscore3d,zscore2d),axis=0) v_RMSZ[:,:]=Zscoreall[:,:] if not opts_dict['gmonly']: if (is_SE == True): v_ens_avg3d[:,:,:]=ens_avg3d[:,:,:] v_ens_stddev3d[:,:,:]=ens_stddev3d[:,:,:] v_ens_avg2d[:,:]=ens_avg2d[:,:] v_ens_stddev2d[:,:]=ens_stddev2d[:,:] else: v_ens_avg3d[:,:,:,:]=ens_avg3d[:,:,:,:] v_ens_stddev3d[:,:,:,:]=ens_stddev3d[:,:,:,:] v_ens_avg2d[:,:,:]=ens_avg2d[:,:,:] v_ens_stddev2d[:,:,:]=ens_stddev2d[:,:,:] else: gmall_temp=np.transpose(gmall[:,:]) gmall=gmall_temp mu_gm,sigma_gm,standardized_global_mean,loadings_gm,scores_gm=pyEnsLib.pre_PCA(gmall) v_gm[:,:]=gmall[:,:] v_mu_gm[:]=mu_gm[:] v_sigma_gm[:]=sigma_gm[:].astype(np.float32) v_loadings_gm[:,:]=loadings_gm[:,:] v_sigma_scores_gm[:]=scores_gm[:] print("All Done") def get_cumul_filelist(opts_dict,indir,regx): if not opts_dict['indir']: print('input dir is not specified') sys.exit(2) #regx='(pgi(.)*-(01|02))' regx_list=["mon","gnu","pgi"] all_files=[] for prefix in regx_list: for i in range(opts_dict['fIndex'],opts_dict['fIndex']+opts_dict['esize']/3): for j in range(opts_dict['startMon'],opts_dict['endMon']+1): mon_str=str(j).zfill(2) regx='(^'+prefix+'(.)*'+str(i)+'(.)*-('+mon_str+'))' print('regx=',regx) res=[f for f in os.listdir(indir) if re.search(regx,f)] in_files=sorted(res) all_files.extend(in_files) print("all_files=",all_files) #in_files=res return all_files # # Get the shape of all variable list in tuple for all processor # def get_shape(shape_tuple,shape1,rank): lst=list(shape_tuple) lst[0]=shape1 shape_tuple=tuple(lst) return shape_tuple # # Get the mpi partition list for each processor # def get_stride_list(len_of_list,me): slice_index=[] for i in range(me.get_size()): index_arr=np.arange(len_of_list) slice_index.append(index_arr[i::me.get_size()]) return slice_index # # Gather arrays from each processor by the var_list to the master processor and make it an array # def gather_npArray(npArray,me,slice_index,array_shape): the_array=np.zeros(array_shape,dtype=np.float32) if me.get_rank()==0: k=0 for j in slice_index[me.get_rank()]: the_array[j,:]=npArray[k,:] k=k+1 for i in range(1,me.get_size()): if me.get_rank() == 0: rank,npArray=me.collect() k=0 for j in slice_index[rank]: the_array[j,:]=npArray[k,:] k=k+1 if me.get_rank() != 0: message={"from_rank":me.get_rank(),"shape":npArray.shape} me.collect(npArray) me.sync() return the_array if __name__ == "__main__": main(sys.argv[1:])