Example #1
0
def main(argv):

    # Get command line stuff and store in a dictionary
    s = 'tag= compset= esize= tslice= res= sumfile= indir= sumfiledir= mach= verbose jsonfile= mpi_enable maxnorm gmonly popens cumul regx= startMon= endMon= fIndex= mpi_disable'
    optkeys = s.split()
    try:
        opts, args = getopt.getopt(argv, "h", optkeys)
    except getopt.GetoptError:
        pyEnsLib.EnsSum_usage()
        sys.exit(2)

    # Put command line options in a dictionary - also set defaults
    opts_dict = {}

    # Defaults
    opts_dict['tag'] = 'cesm2_0'
    opts_dict['compset'] = 'F2000climo'
    opts_dict['mach'] = 'cheyenne'
    opts_dict['esize'] = 350
    opts_dict['tslice'] = 1
    opts_dict['res'] = 'f19_f19'
    opts_dict['sumfile'] = 'ens.summary.nc'
    opts_dict['indir'] = './'
    opts_dict['sumfiledir'] = './'
    opts_dict['jsonfile'] = 'exclude_empty.json'
    opts_dict['verbose'] = False
    opts_dict['mpi_enable'] = True
    opts_dict['mpi_disable'] = False
    opts_dict['maxnorm'] = False
    opts_dict['gmonly'] = True
    opts_dict['popens'] = False
    opts_dict['cumul'] = False
    opts_dict['regx'] = 'test'
    opts_dict['startMon'] = 1
    opts_dict['endMon'] = 1
    opts_dict['fIndex'] = 151

    # This creates the dictionary of input arguments
    opts_dict = pyEnsLib.getopt_parseconfig(opts, optkeys, 'ES', opts_dict)

    verbose = opts_dict['verbose']

    st = opts_dict['esize']
    esize = int(st)

    if opts_dict['popens'] == True:
        print(
            "ERROR: Please use pyEnsSumPop.py for a POP ensemble (not --popens)  => EXITING...."
        )
        sys.exit()

    if not (opts_dict['tag'] and opts_dict['compset'] and opts_dict['mach']
            or opts_dict['res']):
        print(
            'ERROR: Please specify --tag, --compset, --mach and --res options  => EXITING....'
        )
        sys.exit()

    if opts_dict['mpi_disable'] == True:
        opts_dict['mpi_enable'] = False

    # Now find file names in indir
    input_dir = opts_dict['indir']
    # The var list that will be excluded
    ex_varlist = []
    inc_varlist = []

    # Create a mpi simplecomm object
    if opts_dict['mpi_enable']:
        me = simplecomm.create_comm()
    else:
        me = simplecomm.create_comm(not opts_dict['mpi_enable'])

    if me.get_rank() == 0:
        print('STATUS: Running pyEnsSum.py')

    if me.get_rank() == 0 and (verbose == True):
        print(opts_dict)
        print('STATUS: Ensemble size for summary = ', esize)

    exclude = False
    if me.get_rank() == 0:
        if opts_dict['jsonfile']:
            inc_varlist = []
            # Read in the excluded or included var list
            ex_varlist, exclude = pyEnsLib.read_jsonlist(
                opts_dict['jsonfile'], 'ES')
            if exclude == False:
                inc_varlist = ex_varlist
                ex_varlist = []

    # Broadcast the excluded var list to each processor
    if opts_dict['mpi_enable']:
        exclude = me.partition(exclude, func=Duplicate(), involved=True)
        if exclude:
            ex_varlist = me.partition(ex_varlist,
                                      func=Duplicate(),
                                      involved=True)
        else:
            inc_varlist = me.partition(inc_varlist,
                                       func=Duplicate(),
                                       involved=True)

    in_files = []
    if (os.path.exists(input_dir)):
        # Get the list of files
        in_files_temp = os.listdir(input_dir)
        in_files = sorted(in_files_temp)

        # Make sure we have enough
        num_files = len(in_files)
        if me.get_rank() == 0 and (verbose == True):
            print('VERBOSE: Number of files in input directory = ', num_files)
        if (num_files < esize):
            if me.get_rank() == 0 and (verbose == True):
                print('VERBOSE: Number of files in input directory (',num_files,\
                 ') is less than specified ensemble size of ', esize)
            sys.exit(2)
        if (num_files > esize):
            if me.get_rank() == 0 and (verbose == True):
                print('VERBOSE: Note that the number of files in ', input_dir, \
                 'is greater than specified ensemble size of ', esize ,\
                 '\nwill just use the first ',  esize, 'files')
    else:
        if me.get_rank() == 0:
            print('ERROR: Input directory: ', input_dir, ' not found')
        sys.exit(2)

    if opts_dict['cumul']:
        if opts_dict['regx']:
            in_files_list = get_cumul_filelist(opts_dict, opts_dict['indir'],
                                               opts_dict['regx'])
        in_files = me.partition(in_files_list,
                                func=EqualLength(),
                                involved=True)
        if me.get_rank() == 0 and (verbose == True):
            print('VERBOSE: in_files  = ', in_files)

    # Check full file names in input directory (don't open yet)
    full_in_files = []
    if me.get_rank() == 0 and opts_dict['verbose']:
        print('VERBOSE: Input files are: ')

    for onefile in in_files[0:esize]:
        fname = input_dir + '/' + onefile
        if me.get_rank() == 0 and opts_dict['verbose']:
            print(fname)
        if (os.path.isfile(fname)):
            full_in_files.append(fname)
        else:
            if me.get_rank() == 0:
                print("ERROR: Could not locate file ", fname,
                      " => EXITING....")
            sys.exit()

    #open just the first file
    first_file = nc.Dataset(full_in_files[0], "r")

    # Store dimensions of the input fields
    if me.get_rank() == 0 and (verbose == True):
        print("VERBOSE: Getting spatial dimensions")
    nlev = -1
    nilev = -1
    ncol = -1
    nlat = -1
    nlon = -1
    lonkey = ''
    latkey = ''
    # Look at first file and get dims
    input_dims = first_file.dimensions
    ndims = len(input_dims)

    for key in input_dims:
        if key == "lev":
            nlev = len(input_dims["lev"])
        elif key == "ilev":
            nilev = len(input_dims["ilev"])
        elif key == "ncol":
            ncol = len(input_dims["ncol"])
        elif (key == "nlon") or (key == "lon"):
            nlon = len(input_dims[key])
            lonkey = key
        elif (key == "nlat") or (key == "lat"):
            nlat = len(input_dims[key])
            latkey = key

    if (nlev == -1):
        if me.get_rank() == 0:
            print(
                "ERROR: could not locate a valid dimension (lev) => EXITING...."
            )
        sys.exit()

    if ((ncol == -1) and ((nlat == -1) or (nlon == -1))):
        if me.get_rank() == 0:
            print("ERROR: Need either lat/lon or ncol  => EXITING....")
        sys.exit()

    # Check if this is SE or FV data
    if (ncol != -1):
        is_SE = True
    else:
        is_SE = False

    # output dimensions
    if me.get_rank() == 0 and (verbose == True):
        print('lev = ', nlev)
        if (is_SE == True):
            print('ncol = ', ncol)
        else:
            print('nlat = ', nlat)
            print('nlon = ', nlon)

    # Get 2d vars, 3d vars and all vars (For now include all variables)
    vars_dict_all = first_file.variables

    # Remove the excluded variables (specified in json file) from variable dictionary
    if exclude:
        vars_dict = vars_dict_all
        for i in ex_varlist:
            if i in vars_dict:
                del vars_dict[i]
    #Given an included var list, remove all the variables that are not on the list
    else:
        vars_dict = vars_dict_all.copy()
        for k, v in vars_dict_all.items():
            if (k not in inc_varlist) and (vars_dict_all[k].typecode() == 'f'):
                del vars_dict[k]

    num_vars = len(vars_dict)

    str_size = 0
    d2_var_names = []
    d3_var_names = []
    num_2d = 0
    num_3d = 0

    # Which are 2d, which are 3d and max str_size
    for k, v in vars_dict.items():
        var = k
        vd = v.dimensions  # all the variable's dimensions (names)
        vr = len(v.dimensions)  # num dimension
        vs = v.shape  # dim values
        is_2d = False
        is_3d = False
        if (is_SE == True):  # (time, lev, ncol) or (time, ncol)
            if ((vr == 2) and (vs[1] == ncol)):
                is_2d = True
                num_2d += 1
            elif ((vr == 3) and (vs[2] == ncol and vs[1] == nlev)):
                is_3d = True
                num_3d += 1
        else:  # (time, lev, nlon, nlon) or (time, nlat, nlon)
            if ((vr == 3) and (vs[1] == nlat and vs[2] == nlon)):
                is_2d = True
                num_2d += 1
            elif ((vr == 4) and (vs[2] == nlat and vs[3] == nlon and
                                 (vs[1] == nlev or vs[1] == nilev))):
                is_3d = True
                num_3d += 1

        if (is_3d == True):
            str_size = max(str_size, len(k))
            d3_var_names.append(k)
        elif (is_2d == True):
            str_size = max(str_size, len(k))
            d2_var_names.append(k)

    if me.get_rank() == 0 and (verbose == True):
        print('VERBOSE: Number of variables found:  ', num_3d + num_2d)
        print('VERBOSE: 3D variables: ' + str(num_3d) + ', 2D variables: ' +
              str(num_2d))

    # Now sort these and combine (this sorts caps first, then lower case -
    # which is what we want)
    d2_var_names.sort()
    d3_var_names.sort()

    if esize < num_2d + num_3d:
        if me.get_rank() == 0:
            print(
                "************************************************************************************************************************************"
            )
            print("  ERROR: the total number of 3D and 2D variables " +
                  str(num_2d + num_3d) +
                  " is larger than the number of ensemble files " + str(esize))
            print(
                "  Cannot generate ensemble summary file, please remove more variables from your included variable list,"
            )
            print(
                "  or add more variables in your excluded variable list  => EXITING...."
            )
            print(
                "************************************************************************************************************************************"
            )
        sys.exit()
    # All vars is 3d vars first (sorted), the 2d vars
    all_var_names = list(d3_var_names)
    all_var_names += d2_var_names
    n_all_var_names = len(all_var_names)

    # Rank 0 - Create new summary ensemble file
    this_sumfile = opts_dict["sumfile"]

    #check if directory is valid
    sum_dir = os.path.dirname(this_sumfile)
    if len(sum_dir) == 0:
        sum_dir = '.'
    if (os.path.exists(sum_dir) == False):
        if me.get_rank() == 0:
            print('ERROR: Summary file directory: ', sum_dir, ' not found')
        sys.exit(2)

    if (me.get_rank() == 0):

        if (verbose == True):
            print("VERBOSE: Creating ", this_sumfile, "  ...")

        if os.path.exists(this_sumfile):
            os.unlink(this_sumfile)
        nc_sumfile = nc.Dataset(this_sumfile, "w", format="NETCDF4_CLASSIC")

        # Set dimensions
        if (verbose == True):
            print("VERBOSE: Setting dimensions .....")
        if (is_SE == True):
            nc_sumfile.createDimension('ncol', ncol)
        else:
            nc_sumfile.createDimension('nlat', nlat)
            nc_sumfile.createDimension('nlon', nlon)

        nc_sumfile.createDimension('nlev', nlev)
        nc_sumfile.createDimension('ens_size', esize)
        nc_sumfile.createDimension('nvars', num_3d + num_2d)
        nc_sumfile.createDimension('nvars3d', num_3d)
        nc_sumfile.createDimension('nvars2d', num_2d)
        nc_sumfile.createDimension('str_size', str_size)

        # Set global attributes
        now = time.strftime("%c")
        if (verbose == True):
            print("VERBOSE: Setting global attributes .....")
        nc_sumfile.creation_date = now
        nc_sumfile.title = 'CAM verification ensemble summary file'
        nc_sumfile.tag = opts_dict["tag"]
        nc_sumfile.compset = opts_dict["compset"]
        nc_sumfile.resolution = opts_dict["res"]
        nc_sumfile.machine = opts_dict["mach"]

        # Create variables
        if (verbose == True):
            print("VERBOSE: Creating variables .....")
        v_lev = nc_sumfile.createVariable("lev", 'f8', ('nlev', ))
        v_vars = nc_sumfile.createVariable("vars", 'S1', ('nvars', 'str_size'))
        v_var3d = nc_sumfile.createVariable("var3d", 'S1',
                                            ('nvars3d', 'str_size'))
        v_var2d = nc_sumfile.createVariable("var2d", 'S1',
                                            ('nvars2d', 'str_size'))

        v_gm = nc_sumfile.createVariable("global_mean", 'f8',
                                         ('nvars', 'ens_size'))
        v_standardized_gm = nc_sumfile.createVariable("standardized_gm", 'f8',
                                                      ('nvars', 'ens_size'))
        v_loadings_gm = nc_sumfile.createVariable('loadings_gm', 'f8',
                                                  ('nvars', 'nvars'))
        v_mu_gm = nc_sumfile.createVariable('mu_gm', 'f8', ('nvars', ))
        v_sigma_gm = nc_sumfile.createVariable('sigma_gm', 'f8', ('nvars', ))
        v_sigma_scores_gm = nc_sumfile.createVariable('sigma_scores_gm', 'f8',
                                                      ('nvars', ))

        # Assign vars, var3d and var2d
        if (verbose == True):
            print("VERBOSE: Assigning vars, var3d, and var2d .....")

        eq_all_var_names = []
        eq_d3_var_names = []
        eq_d2_var_names = []

        l_eq = len(all_var_names)
        for i in range(l_eq):
            tt = list(all_var_names[i])
            l_tt = len(tt)
            if (l_tt < str_size):
                extra = list(' ') * (str_size - l_tt)
                tt.extend(extra)
            eq_all_var_names.append(tt)

        l_eq = len(d3_var_names)
        for i in range(l_eq):
            tt = list(d3_var_names[i])
            l_tt = len(tt)
            if (l_tt < str_size):
                extra = list(' ') * (str_size - l_tt)
                tt.extend(extra)
            eq_d3_var_names.append(tt)

        l_eq = len(d2_var_names)
        for i in range(l_eq):
            tt = list(d2_var_names[i])
            l_tt = len(tt)
            if (l_tt < str_size):
                extra = list(' ') * (str_size - l_tt)
                tt.extend(extra)
            eq_d2_var_names.append(tt)

        v_vars[:] = eq_all_var_names[:]
        v_var3d[:] = eq_d3_var_names[:]
        v_var2d[:] = eq_d2_var_names[:]

        # Time-invarient metadata
        if (verbose == True):
            print("VERBOSE: Assigning time invariant metadata .....")


#        lev_data = np.zeros(num_lev,dtype=np.float64)
        lev_data = first_file.variables["lev"]
        v_lev[:] = lev_data[:]
    #end of rank=0 work

    # All:
    tslice = opts_dict['tslice']
    if not opts_dict['cumul']:
        # Partition the var list
        var3_list_loc = me.partition(d3_var_names,
                                     func=EqualStride(),
                                     involved=True)
        var2_list_loc = me.partition(d2_var_names,
                                     func=EqualStride(),
                                     involved=True)
    else:
        var3_list_loc = d3_var_names
        var2_list_loc = d2_var_names

    #close first_file
    first_file.close()

    # Calculate global means #
    if me.get_rank() == 0 and (verbose == True):
        print("VERBOSE: Calculating global means .....")
    if not opts_dict['cumul']:
        gm3d, gm2d, var_list = pyEnsLib.generate_global_mean_for_summary(
            full_in_files, var3_list_loc, var2_list_loc, is_SE, False,
            opts_dict)
    if me.get_rank() == 0 and (verbose == True):
        print("VERBOSE: Finished calculating global means .....")

    #gather to rank = 0
    if opts_dict['mpi_enable']:

        if not opts_dict['cumul']:
            # Gather the 3d variable results from all processors to the master processor
            slice_index = get_stride_list(len(d3_var_names), me)

            # Gather global means 3d results
            gm3d = gather_npArray(gm3d, me, slice_index,
                                  (len(d3_var_names), len(full_in_files)))

            # Gather 2d variable results from all processors to the master processor
            slice_index = get_stride_list(len(d2_var_names), me)

            # Gather global means 2d results
            gm2d = gather_npArray(gm2d, me, slice_index,
                                  (len(d2_var_names), len(full_in_files)))

            #gather variables ro exclude (in pre_pca)
            var_list = gather_list(var_list, me)

        else:
            gmall = np.concatenate((temp1, temp2), axis=0)
            gmall = pyEnsLib.gather_npArray_pop(
                gmall, me,
                (me.get_size(), len(d3_var_names) + len(d2_var_names)))

    # rank =0 : complete calculations for summary file
    if me.get_rank() == 0:
        if not opts_dict['cumul']:
            gmall = np.concatenate((gm3d, gm2d), axis=0)
        else:
            gmall_temp = np.transpose(gmall[:, :])
            gmall = gmall_temp

        #PCA prep and calculation
        mu_gm, sigma_gm, standardized_global_mean, loadings_gm, scores_gm, b_exit = pyEnsLib.pre_PCA(
            gmall, all_var_names, var_list, me)

        #if PCA calc encounters an error, then remove the summary file and exit
        if b_exit:
            nc_sumfile.close()
            os.unlink(this_sumfile)
            print("STATUS: Summary could not be created.")
            sys.exit(2)

        v_gm[:, :] = gmall[:, :]
        v_standardized_gm[:, :] = standardized_global_mean[:, :]
        v_mu_gm[:] = mu_gm[:]
        v_sigma_gm[:] = sigma_gm[:]
        v_loadings_gm[:, :] = loadings_gm[:, :]
        v_sigma_scores_gm[:] = scores_gm[:]

        print("STATUS: Summary file is complete.")

        nc_sumfile.close()
def main(argv):


    # Get command line stuff and store in a dictionary
    s='verbose sumfile= indir= input_globs= tslice= nPC= sigMul= minPCFail= minRunFail= numRunFile= printVarTest popens jsonfile= mpi_enable nbin= minrange= maxrange= outfile= casejson= npick= pepsi_gm test_failure pop_tol= pop_threshold='
    optkeys = s.split()
    try:
        opts, args = getopt.getopt(argv,"h",optkeys)
    except getopt.GetoptError:
        pyEnsLib.CECT_usage()
        sys.exit(2)


    # Set the default value for options
    opts_dict = {}
    opts_dict['input_globs'] = ''
    opts_dict['indir'] = ''
    opts_dict['tslice'] = 1
    opts_dict['nPC'] = 50
    opts_dict['sigMul'] = 2
    opts_dict['verbose'] = False
    opts_dict['minPCFail'] = 3
    opts_dict['minRunFail'] = 2
    opts_dict['numRunFile'] = 3
    opts_dict['printVarTest'] = False
    opts_dict['popens'] = False
    opts_dict['jsonfile'] = ''
    opts_dict['mpi_enable'] = False
    opts_dict['nbin'] = 40
    opts_dict['minrange'] = 0.0
    opts_dict['maxrange'] = 4.0
    opts_dict['outfile'] = 'testcase.result'
    opts_dict['casejson'] = ''
    opts_dict['npick'] = 10
    opts_dict['pepsi_gm'] = False
    opts_dict['test_failure'] = True
    opts_dict['pop_tol'] = 3.0
    opts_dict['pop_threshold'] = 0.90
    # Call utility library getopt_parseconfig to parse the option keys
    # and save to the dictionary
    caller = 'CECT'
    gmonly = False
    opts_dict = pyEnsLib.getopt_parseconfig(opts,optkeys,caller,opts_dict)
    popens = opts_dict['popens']

    # Print out timestamp, input ensemble file and new run directory
    dt=datetime.now()
    verbose = opts_dict['verbose']
    print('--------pyCECT--------')
    print(' ')
    print(dt.strftime("%A, %d. %B %Y %I:%M%p"))
    print(' ')
    print('Ensemble summary file = '+opts_dict['sumfile'])
    print(' ')
    print('Testcase file directory = '+opts_dict['indir']    )
    print(' ')
    print(' ')

    # Create a mpi simplecomm object
    if opts_dict['mpi_enable']:
        me=simplecomm.create_comm()
    else:
        me=simplecomm.create_comm(not opts_dict['mpi_enable'])

    ifiles=[]
    in_files=[]
    # Random pick pop files from not_pick_files list
    if opts_dict['casejson']:
       with open(opts_dict['casejson']) as fin:
            result=json.load(fin)
            in_files_first=result['not_pick_files']
            in_files=random.sample(in_files_first,opts_dict['npick'])
            print('Testcase files:')
            print('\n'.join(in_files))

    else:
       wildname='*'+opts_dict['input_globs']+'*'
       # Open all input files
       if (os.path.exists(opts_dict['indir'])):
          full_glob_str=os.path.join(opts_dict['indir'],wildname)
          glob_files=glob.glob(full_glob_str)
          in_files.extend(glob_files)
          #in_files_temp=os.listdir(opts_dict['indir'])
    in_files.sort()

    if popens:
        #Partition the input file list
        in_files_list=me.partition(in_files,func=EqualStride(),involved=True)

    else:
        # Random pick non pop files
        in_files_list=pyEnsLib.Random_pickup(in_files,opts_dict)
    for frun_file in in_files_list:
         if frun_file.find(opts_dict['indir']) != -1:
            frun_temp=frun_file
         else:
            frun_temp=opts_dict['indir']+'/'+frun_file
         if (os.path.isfile(frun_temp)):
             ifiles.append(Nio.open_file(frun_temp,"r"))
         else:
             print("COULD NOT LOCATE FILE " +frun_temp+" EXISTING")
             sys.exit()

    if popens:

        # Read in the included var list
        Var2d,Var3d=pyEnsLib.read_jsonlist(opts_dict['jsonfile'],'ESP')
        print(' ')
        print('Z-score tolerance = '+'{:3.2f}'.format(opts_dict['pop_tol']))
        print('ZPR = '+'{:.2%}'.format(opts_dict['pop_threshold']))
        zmall,n_timeslice=pyEnsLib.compare_raw_score(opts_dict,ifiles,me.get_rank(),Var3d,Var2d)
        #zmall = np.concatenate((Zscore3d,Zscore2d),axis=0)
        np.set_printoptions(threshold=np.nan)

        if opts_dict['mpi_enable']:
            zmall = pyEnsLib.gather_npArray_pop(zmall,me,(me.get_size(),len(Var3d)+len(Var2d),len(ifiles),opts_dict['nbin']))
            if me.get_rank()==0:
                fout = open(opts_dict['outfile'],"w")
        for i in range(me.get_size()):
            for j in zmall[i]:
                        np.savetxt(fout,j,fmt='%-7.2e')
    else:
    # Read all variables from the ensemble summary file
    ens_var_name,ens_avg,ens_stddev,ens_rmsz,ens_gm,num_3d,mu_gm,sigma_gm,loadings_gm,sigma_scores_gm,is_SE_sum=pyEnsLib.read_ensemble_summary(opts_dict['sumfile'])

    if len(ens_rmsz) == 0:
        gmonly = True
    # Add ensemble rmsz and global mean to the dictionary "variables"
    variables={}
    if not gmonly:
        for k,v in ens_rmsz.iteritems():
        pyEnsLib.addvariables(variables,k,'zscoreRange',v)

    for k,v in ens_gm.iteritems():
        pyEnsLib.addvariables(variables,k,'gmRange',v)

    # Get 3d variable name list and 2d variable name list seperately
    var_name3d=[]
    var_name2d=[]
    for vcount,v in enumerate(ens_var_name):
      if vcount < num_3d:
        var_name3d.append(v)
      else:
        var_name2d.append(v)

    # Get ncol and nlev value
    npts3d,npts2d,is_SE=pyEnsLib.get_ncol_nlev(ifiles[0])

        if (is_SE ^ is_SE_sum):
           print('Warning: please note the ensemble summary file is different from the testing files, they use different grids')


    # Compare the new run and the ensemble summary file to get rmsz score
    results={}
    countzscore=np.zeros(len(ifiles),dtype=np.int32)
    countgm=np.zeros(len(ifiles),dtype=np.int32)
    if not gmonly:
        for fcount,fid in enumerate(ifiles):
        otimeSeries = fid.variables
        for var_name in ens_var_name:
            orig=otimeSeries[var_name]
            Zscore,has_zscore=pyEnsLib.calculate_raw_score(var_name,orig[opts_dict['tslice']],npts3d,npts2d,ens_avg,ens_stddev,is_SE,opts_dict,0,0,0)
            if has_zscore:
            # Add the new run rmsz zscore to the dictionary "results"
            pyEnsLib.addresults(results,'zscore',Zscore,var_name,'f'+str(fcount))


        # Evaluate the new run rmsz score if is in the range of the ensemble summary rmsz zscore range
        for fcount,fid in enumerate(ifiles):
        countzscore[fcount]=pyEnsLib.evaluatestatus('zscore','zscoreRange',variables,'ens',results,'f'+str(fcount))

    # Calculate the new run global mean
    mean3d,mean2d=pyEnsLib.generate_global_mean_for_summary(ifiles,var_name3d,var_name2d,is_SE,opts_dict['pepsi_gm'],opts_dict)
    means=np.concatenate((mean3d,mean2d),axis=0)

    # Add the new run global mean to the dictionary "results"
    for i in range(means.shape[1]):
        for j in range(means.shape[0]):
        pyEnsLib.addresults(results,'means',means[j][i],ens_var_name[j],'f'+str(i))

    # Evaluate the new run global mean if it is in the range of the ensemble summary global mean range
    for fcount,fid in enumerate(ifiles):
        countgm[fcount]=pyEnsLib.evaluatestatus('means','gmRange',variables,'gm',results,'f'+str(fcount))

    # Calculate the PCA scores of the new run
    new_scores=pyEnsLib.standardized(means,mu_gm,sigma_gm,loadings_gm)
    pyEnsLib.comparePCAscores(ifiles,new_scores,sigma_scores_gm,opts_dict)

    # Print out
    if opts_dict['printVarTest']:
        print('*********************************************** ')
        print('Variable-based testing (for reference only - not used to determine pass/fail)')
        print('*********************************************** ')
        for fcount,fid in enumerate(ifiles):
        print(' ')
        print('Run '+str(fcount+1)+":")
        print(' ')
        if not gmonly:
            print('***'+str(countzscore[fcount])," of "+str(len(ens_var_name))+' variables are outside of ensemble RMSZ distribution***')
            pyEnsLib.printsummary(results,'ens','zscore','zscoreRange',(fcount),variables,'RMSZ')
            print(' ')
        print('***'+str(countgm[fcount])," of "+str(len(ens_var_name))+' variables are outside of ensemble global mean distribution***')
        pyEnsLib.printsummary(results,'gm','means','gmRange',fcount,variables,'global mean')
        print(' ')
        print('----------------------------------------------------------------------------')

if __name__ == "__main__":
    main(sys.argv[1:])
    print(' ')
    print("Testing complete.")
Example #3
0
def main(argv):

    # Get command line stuff and store in a dictionary
    s = """verbose sumfile= indir= input_globs= tslice= nPC= sigMul= 
         minPCFail= minRunFail= numRunFile= printVars popens 
         jsonfile= mpi_enable nbin= minrange= maxrange= outfile= 
         casejson= npick= pepsi_gm pop_tol= web_enabled
         pop_threshold= printStdMean fIndex= lev= eet= saveResults json_case= """
    optkeys = s.split()
    try:
        opts, args = getopt.getopt(argv, "h", optkeys)
    except getopt.GetoptError:
        pyEnsLib.CECT_usage()
        sys.exit(2)

    # Set the default value for options
    opts_dict = {}
    opts_dict['input_globs'] = ''
    opts_dict['indir'] = ''
    opts_dict['tslice'] = 1
    opts_dict['nPC'] = 50
    opts_dict['sigMul'] = 2
    opts_dict['verbose'] = False
    opts_dict['minPCFail'] = 3
    opts_dict['minRunFail'] = 2
    opts_dict['numRunFile'] = 3
    opts_dict['printVars'] = False
    opts_dict['popens'] = False
    opts_dict['jsonfile'] = ''
    opts_dict['mpi_enable'] = False
    opts_dict['nbin'] = 40
    opts_dict['minrange'] = 0.0
    opts_dict['maxrange'] = 4.0
    opts_dict['outfile'] = 'testcase.result'
    opts_dict['casejson'] = ''
    opts_dict['npick'] = 10
    opts_dict['pepsi_gm'] = False
    opts_dict['test_failure'] = True
    opts_dict['pop_tol'] = 3.0
    opts_dict['pop_threshold'] = 0.90
    opts_dict['printStdMean'] = False
    opts_dict['lev'] = 0
    opts_dict['eet'] = 0
    opts_dict['json_case'] = ''
    opts_dict['sumfile'] = ''
    opts_dict['web_enabled'] = False
    opts_dict['saveResults'] = False

    # Call utility library getopt_parseconfig to parse the option keys
    # and save to the dictionary
    caller = 'CECT'
    opts_dict = pyEnsLib.getopt_parseconfig(opts, optkeys, caller, opts_dict)
    popens = opts_dict['popens']

    #some mods for POP-ECT
    if popens == True:
        opts_dict['tslice'] = 0
        opts_dict['numRunFile'] = 1
        opts_dict['eet'] = 0
        opts_dict['mpi_enable'] = False

    # Create a mpi simplecomm object
    if opts_dict['mpi_enable']:
        me = simplecomm.create_comm()
    else:
        me = simplecomm.create_comm(not opts_dict['mpi_enable'])

    # Print out timestamp, input ensemble file and new run directory
    dt = datetime.now()
    verbose = opts_dict['verbose']
    if me.get_rank() == 0:
        print(' ')
        print('--------pyCECT--------')
        print(' ')
        print(dt.strftime("%A, %d. %B %Y %I:%M%p"))
        print(' ')
        if not opts_dict['web_enabled']:
            print('Ensemble summary file = ' + opts_dict['sumfile'])
        print(' ')
        print('Testcase file directory = ' + opts_dict['indir'])
        print(' ')
        print(' ')

    #make sure these are valid
    if opts_dict['web_enabled'] == False and os.path.isfile(
            opts_dict['sumfile']) == False:
        print("ERROR: Summary file name is not valid.")
        sys.exit()
    if os.path.exists(opts_dict['indir']) == False:
        print("ERROR: --indir path is not valid.")
        sys.exit()

    # Ensure sensible EET value
    if opts_dict['eet'] and opts_dict['numRunFile'] > opts_dict['eet']:
        pyEnsLib.CECT_usage()
        sys.exit(2)

    ifiles = []
    in_files = []
    # Random pick pop files from not_pick_files list
    if opts_dict['casejson']:
        with open(opts_dict['casejson']) as fin:
            result = json.load(fin)
            in_files_first = result['not_pick_files']
            in_files = random.sample(in_files_first, opts_dict['npick'])
            print('Testcase files:')
            print('\n'.join(in_files))

    elif opts_dict['json_case']:
        json_file = opts_dict['json_case']
        if (os.path.exists(json_file)):
            fd = open(json_file)
            metainfo = json.load(fd)
            if 'CaseName' in metainfo:
                casename = metainfo['CaseName']
                if (os.path.exists(opts_dict['indir'])):
                    for name in casename:
                        wildname = '*.' + name + '.*'
                        full_glob_str = os.path.join(opts_dict['indir'],
                                                     wildname)
                        glob_file = glob.glob(full_glob_str)
                        in_files.extend(glob_file)
        else:
            print("ERROR: " + opts_dict['json_case'] + " does not exist.")
            sys.exit()
        print("in_files=", in_files)
    else:
        wildname = '*' + str(opts_dict['input_globs']) + '*'
        # Open all input files
        if (os.path.exists(opts_dict['indir'])):
            full_glob_str = os.path.join(opts_dict['indir'], wildname)
            glob_files = glob.glob(full_glob_str)
            in_files.extend(glob_files)
            num_file = len(in_files)
            if num_file == 0:
                print("ERROR: no matching files for wildcard=" + wildname +
                      " found in specified --indir")
                sys.exit()
            else:
                print("Found " + str(num_file) +
                      " matching files in specified --indir")
            if opts_dict['numRunFile'] > num_file:
                print("ERROR: more files needed (" +
                      str(opts_dict['numRunFile']) +
                      ") than available in the indir (" + str(num_file) + ").")
                sys.exit()

    in_files.sort()
    #print in_files

    if popens:
        #Partition the input file list
        in_files_list = me.partition(in_files,
                                     func=EqualStride(),
                                     involved=True)

    else:
        # Random pick cam files
        in_files_list = pyEnsLib.Random_pickup(in_files, opts_dict)

    for frun_file in in_files_list:
        if frun_file.find(opts_dict['indir']) != -1:
            frun_temp = frun_file
        else:
            frun_temp = opts_dict['indir'] + '/' + frun_file
        if (os.path.isfile(frun_temp)):
            ifiles.append(frun_temp)
        else:
            print("ERROR: COULD NOT LOCATE FILE " + frun_temp)
            sys.exit()

    if opts_dict['web_enabled']:
        if len(opts_dict['sumfile']) == 0:
            opts_dict[
                'sumfile'] = '/glade/p/cesmdata/cseg/inputdata/validation/'
        #need to open ifiles

        opts_dict['sumfile'], machineid, compiler = pyEnsLib.search_sumfile(
            opts_dict, ifiles)
        if len(machineid) != 0 and len(compiler) != 0:
            print(' ')
            print('Validation file    : machineid = ' + machineid +
                  ', compiler = ' + compiler)
            print('Found summary file : ' + opts_dict['sumfile'])
            print(' ')
        else:
            print('Warning: machine and compiler are unknown')

    if popens:

        # Read in the included var list
        if not os.path.exists(opts_dict['jsonfile']):
            print(
                "ERROR: POP-ECT requires the specification of a valid json file via --jsonfile."
            )
            sys.exit()
        Var2d, Var3d = pyEnsLib.read_jsonlist(opts_dict['jsonfile'], 'ESP')
        print(' ')
        print('Z-score tolerance = ' + '{:3.2f}'.format(opts_dict['pop_tol']))
        print('ZPR = ' + '{:.2%}'.format(opts_dict['pop_threshold']))
        zmall, n_timeslice = pyEnsLib.pop_compare_raw_score(
            opts_dict, ifiles, me.get_rank(), Var3d, Var2d)

        np.set_printoptions(threshold=sys.maxsize)

        if opts_dict['mpi_enable']:
            zmall = pyEnsLib.gather_npArray_pop(
                zmall, me, (me.get_size(), len(Var3d) + len(Var2d),
                            len(ifiles), opts_dict['nbin']))
            if me.get_rank() == 0:
                fout = open(opts_dict['outfile'], "w")
                for i in range(me.get_size()):
                    for j in zmall[i]:
                        np.savetxt(fout, j, fmt='%-7.2e')
    #cam
    else:
        # Read all variables from the ensemble summary file
        ens_var_name, ens_avg, ens_stddev, ens_rmsz, ens_gm, num_3d, mu_gm, sigma_gm, loadings_gm, sigma_scores_gm, is_SE_sum, std_gm, std_gm_array, str_size = pyEnsLib.read_ensemble_summary(
            opts_dict['sumfile'])

        #Only doing gm

        # Add ensemble rmsz and global mean to the dictionary "variables"
        variables = {}

        for k, v in ens_gm.items():
            pyEnsLib.addvariables(variables, k, 'gmRange', v)

        # Get 3d variable name list and 2d variable name list separately
        var_name3d = []
        var_name2d = []
        for vcount, v in enumerate(ens_var_name):
            if vcount < num_3d:
                var_name3d.append(v)
            else:
                var_name2d.append(v)

        # Get ncol and nlev value
        npts3d, npts2d, is_SE = pyEnsLib.get_ncol_nlev(ifiles[0])

        if (is_SE ^ is_SE_sum):
            print(
                'Warning: please note the ensemble summary file is different from the testing files: they use different grids'
            )

        # Compare the new run and the ensemble summary file
        results = {}
        countgm = np.zeros(len(ifiles), dtype=np.int32)

        # Calculate the new run global mean
        mean3d, mean2d, varlist = pyEnsLib.generate_global_mean_for_summary(
            ifiles, var_name3d, var_name2d, is_SE, opts_dict['pepsi_gm'],
            opts_dict)
        means = np.concatenate((mean3d, mean2d), axis=0)

        # Add the new run global mean to the dictionary "results"
        for i in range(means.shape[1]):
            for j in range(means.shape[0]):
                pyEnsLib.addresults(results, 'means', means[j][i],
                                    ens_var_name[j], 'f' + str(i))

        # Evaluate the new run global mean if it is in the range of the ensemble summary global mean range
        for fcount, fid in enumerate(ifiles):
            countgm[fcount] = pyEnsLib.evaluatestatus('means', 'gmRange',
                                                      variables, 'gm', results,
                                                      'f' + str(fcount))

        # Calculate the PCA scores of the new run
        new_scores, var_list, comp_std_gm = pyEnsLib.standardized(
            means, mu_gm, sigma_gm, loadings_gm, ens_var_name, opts_dict,
            ens_avg, me)
        run_index, decision = pyEnsLib.comparePCAscores(
            ifiles, new_scores, sigma_scores_gm, opts_dict, me)

        # If there is failure, plot out standardized mean and compared standardized mean in box plots
        #        if opts_dict['printStdMean'] and decision == 'FAILED':
        if opts_dict['printStdMean']:

            import seaborn as sns
            import matplotlib
            matplotlib.use('Agg')  #don't display figures
            import matplotlib.pyplot as plt

            print(" ")
            print(
                '***************************************************************************** '
            )
            print(
                'Test run variable standardized means (for reference only - not used to determine pass/fail)'
            )
            print(
                '***************************************************************************** '
            )
            print(" ")

            category = {
                "all_outside99": [],
                "two_outside99": [],
                "one_outside99": [],
                "all_oneside_outside1QR": []
            }
            b = list(pyEnsLib.chunk(ens_var_name, 10))
            for f, alist in enumerate(b):
                for fc, avar in enumerate(alist):
                    dist_995 = np.percentile(std_gm[avar], 99.5)
                    dist_75 = np.percentile(std_gm[avar], 75)
                    dist_25 = np.percentile(std_gm[avar], 25)
                    dist_05 = np.percentile(std_gm[avar], 0.5)
                    c = 0
                    d = 0
                    p = 0
                    q = 0
                    for i in range(comp_std_gm[f + fc].size):
                        if comp_std_gm[f + fc][i] > dist_995:
                            c = c + 1
                        elif comp_std_gm[f + fc][i] < dist_05:
                            d = d + 1
                        elif (comp_std_gm[f + fc][i] < dist_995
                              and comp_std_gm[f + fc][i] > dist_75):
                            p = p + 1
                        elif (comp_std_gm[f + fc][i] > dist_05
                              and comp_std_gm[f + fc][i] < dist_25):
                            q = q + 1
                    if c == 3 or d == 3:
                        category["all_outside99"].append((avar, f + fc))
                    elif c == 2 or d == 2:
                        category["two_outside99"].append((avar, f + fc))
                    elif c == 1 or d == 1:
                        category["one_outside99"].append((avar, f + fc))
                    if p == 3 or q == 3:
                        category["all_oneside_outside1QR"].append(
                            (avar, f + fc))
            part_name = opts_dict['indir'].split('/')[-1]
            if not part_name:
                part_name = opts_dict['indir'].split('/')[-2]
            for key in sorted(category):
                list_array = []
                list_array2 = []
                list_var = []
                value = category[key]

                if key == "all_outside99":
                    print(
                        "*** ", len(value),
                        " variables have 3 test run global means outside of the 99th percentile."
                    )
                elif key == "two_outside99":
                    print(
                        "*** ", len(value),
                        " variables have 2 test run global means outside of the 99th percentile."
                    )
                elif key == "one_outside99":
                    print(
                        "*** ", len(value),
                        " variables have 1 test run global mean outside of the 99th percentile."
                    )
                elif key == "all_oneside_outside1QR":
                    print(
                        "*** ", len(value),
                        " variables have all test run global means outside of the first quartile (but not outside the 99th percentile)."
                    )

                if len(value) > 0:
                    print(" => generating plot ...")
                    if len(value) > 20:
                        print(
                            "    NOTE: truncating to only plot the first 20 variables."
                        )
                        value = value[0:20]

                for each_var in value:
                    list_array.append(std_gm[each_var[0]])
                    list_array2.append(comp_std_gm[each_var[1]])
                    name = each_var[0]
                    if isinstance(name, str) == False:
                        name = name.decode("utf-8")

                    list_var.append(name)

                if len(value) != 0:
                    ax = sns.boxplot(data=list_array,
                                     whis=[0.5, 99.5],
                                     fliersize=0.0)
                    sns.stripplot(data=list_array2, jitter=True, color="r")
                    plt.xticks(list(range(len(list_array))),
                               list_var,
                               fontsize=8,
                               rotation=-45)

                    if decision == 'FAILED':
                        plt.savefig(part_name + "_" + key + "_fail.png")
                    else:
                        plt.savefig(part_name + "_" + key + "_pass.png")
                    plt.close()


##
# Print file with info about new test runs....to a netcdf file
##
        if opts_dict['saveResults']:

            num_vars = comp_std_gm.shape[0]
            tsize = comp_std_gm.shape[1]
            esize = std_gm_array.shape[1]
            this_savefile = 'savefile.nc'
            if (verbose == True):
                print("VERBOSE: Creating ", this_savefile, "  ...")

            if os.path.exists(this_savefile):
                os.unlink(this_savefile)
            nc_savefile = nc.Dataset(this_savefile,
                                     "w",
                                     format="NETCDF4_CLASSIC")
            nc_savefile.createDimension('ens_size', esize)
            nc_savefile.createDimension('test_size', tsize)
            nc_savefile.createDimension('nvars', num_vars)
            nc_savefile.createDimension('str_size', str_size)

            # Set global attributes
            now = time.strftime("%c")
            nc_savefile.creation_date = now
            nc_savefile.title = 'PyCECT compare results file'
            nc_savefile.summaryfile = opts_dict['sumfile']
            #nc_savefile.testfiles = in_files

            #variables
            v_vars = nc_savefile.createVariable("vars", 'S1',
                                                ('nvars', 'str_size'))
            v_std_gm = nc_savefile.createVariable("std_gm", 'f8',
                                                  ('nvars', 'test_size'))
            v_scores = nc_savefile.createVariable("scores", 'f8',
                                                  ('nvars', 'test_size'))
            v_ens_sigma_scores = nc_savefile.createVariable(
                'ens_sigma_scores', 'f8', ('nvars', ))
            v_ens_std_gm = nc_savefile.createVariable("ens_std_gm", 'f8',
                                                      ('nvars', 'ens_size'))

            #hard-coded size
            str_out = nc.stringtochar(np.array(ens_var_name, 'S10'))

            v_vars[:] = str_out
            v_std_gm[:, :] = comp_std_gm[:, :]
            v_scores[:, :] = new_scores[:, :]
            v_ens_sigma_scores[:] = sigma_scores_gm[:]
            v_ens_std_gm[:, :] = std_gm_array[:, :]

            nc_savefile.close()

        # Print variables (optional)
        if opts_dict['printVars']:
            print(" ")
            print(
                '***************************************************************************** '
            )
            print(
                'Variable global mean information (for reference only - not used to determine pass/fail)'
            )
            print(
                '***************************************************************************** '
            )
            for fcount, fid in enumerate(ifiles):
                print(' ')
                print('Run ' + str(fcount + 1) + ":")
                print(' ')
                print(
                    '***' + str(countgm[fcount]),
                    " of " + str(len(ens_var_name)) +
                    ' variables are outside of ensemble global mean distribution***'
                )
                pyEnsLib.printsummary(results, 'gm', 'means', 'gmRange',
                                      fcount, variables, 'global mean')
                print(' ')
                print(
                    '----------------------------------------------------------------------------'
                )

    if me.get_rank() == 0:
        print(' ')
        print("Testing complete.")
        print(' ')
Example #4
0
    tslice = opts_dict['tslice']

    if not opts_dict['cumul']:
        # Partition the var list
        
        var3_list_loc=me.partition(d3_var_names,func=EqualStride(),involved=True)
        var2_list_loc=me.partition(d2_var_names,func=EqualStride(),involved=True)
    else:
        var3_list_loc=d3_var_names
        var2_list_loc=d2_var_names

    # Calculate global means #
    if me.get_rank() == 0 and (verbose == True):
        print "Calculating global means ....."
    if not opts_dict['cumul']:
        gm3d,gm2d,var_list = pyEnsLib.generate_global_mean_for_summary(o_files,var3_list_loc,var2_list_loc , is_SE, False,opts_dict)
    if me.get_rank() == 0 and (verbose == True):
        print "Finish calculating global means ....."

    # Calculate RMSZ scores  
    if (not opts_dict['gmonly']) | (opts_dict['cumul']):
        if me.get_rank() == 0 and (verbose == True):
            print "Calculating RMSZ scores ....."
        zscore3d,zscore2d,ens_avg3d,ens_stddev3d,ens_avg2d,ens_stddev2d,temp1,temp2=pyEnsLib.calc_rmsz(o_files,var3_list_loc,var2_list_loc,is_SE,opts_dict)    

    # Calculate max norm ensemble
    if opts_dict['maxnorm']:
        if me.get_rank() == 0 and (verbose == True):
            print "Calculating max norm of ensembles ....."
        pyEnsLib.calculate_maxnormens(opts_dict,var3_list_loc)
        pyEnsLib.calculate_maxnormens(opts_dict,var2_list_loc)
Example #5
0
def main(argv):
    print 'Running pyEnsSumPop!'

    # Get command line stuff and store in a dictionary
    s = 'nyear= nmonth= npert= tag= res= mach= compset= sumfile= indir= tslice= verbose jsonfile= mpi_enable zscoreonly nrand= rand seq= jsondir='
    optkeys = s.split()
    try:
        opts, args = getopt.getopt(argv, "h", optkeys)
    except getopt.GetoptError:
        pyEnsLib.EnsSumPop_usage()
        sys.exit(2)

    # Put command line options in a dictionary - also set defaults
    opts_dict = {}

    # Defaults
    opts_dict['tag'] = 'cesm1_2_0'
    opts_dict['compset'] = 'FC5'
    opts_dict['mach'] = 'yellowstone'
    opts_dict['tslice'] = 0
    opts_dict['nyear'] = 3
    opts_dict['nmonth'] = 12
    opts_dict['npert'] = 40
    opts_dict['nbin'] = 40
    opts_dict['minrange'] = 0.0
    opts_dict['maxrange'] = 4.0
    opts_dict['res'] = 'ne30_ne30'
    opts_dict['sumfile'] = 'ens.pop.summary.nc'
    opts_dict['indir'] = './'
    opts_dict['jsonfile'] = ''
    opts_dict['verbose'] = True
    opts_dict['mpi_enable'] = False
    opts_dict['zscoreonly'] = False
    opts_dict['popens'] = True
    opts_dict['nrand'] = 40
    opts_dict['rand'] = False
    opts_dict['seq'] = 0
    opts_dict['jsondir'] = '/glade/scratch/haiyingx/'

    # This creates the dictionary of input arguments
    print "before parseconfig"
    opts_dict = pyEnsLib.getopt_parseconfig(opts, optkeys, 'ESP', opts_dict)

    verbose = opts_dict['verbose']
    nbin = opts_dict['nbin']

    if verbose:
        print opts_dict

    # Now find file names in indir
    input_dir = opts_dict['indir']

    # Create a mpi simplecomm object
    if opts_dict['mpi_enable']:
        me = simplecomm.create_comm()
    else:
        me = simplecomm.create_comm(not opts_dict['mpi_enable'])
    if opts_dict['jsonfile']:
        # Read in the included var list
        Var2d, Var3d = pyEnsLib.read_jsonlist(opts_dict['jsonfile'], 'ESP')
        str_size = 0
        for str in Var3d:
            if str_size < len(str):
                str_size = len(str)
        for str in Var2d:
            if str_size < len(str):
                str_size = len(str)

    in_files = []
    if (os.path.exists(input_dir)):
        # Pick up the 'nrand' random number of input files to generate summary files
        if opts_dict['rand']:
            in_files = pyEnsLib.Random_pickup_pop(input_dir, opts_dict,
                                                  opts_dict['nrand'])
        else:
            # Get the list of files
            in_files_temp = os.listdir(input_dir)
            in_files = sorted(in_files_temp)
        # Make sure we have enough
        num_files = len(in_files)
    else:
        print 'Input directory: ', input_dir, ' not found'
        sys.exit(2)

    # Create a mpi simplecomm object
    if opts_dict['mpi_enable']:
        me = simplecomm.create_comm()
    else:
        me = simplecomm.create_comm(not opts_dict['mpi_enable'])
    #Partition the input file list
    in_file_list = me.partition(in_files, func=EqualStride(), involved=True)

    # Open the files in the input directory
    o_files = []
    for onefile in in_file_list:
        if (os.path.isfile(input_dir + '/' + onefile)):
            o_files.append(Nio.open_file(input_dir + '/' + onefile, "r"))
        else:
            print "COULD NOT LOCATE FILE " + input_dir + onefile + "! EXITING...."
            sys.exit()

    print in_file_list

    # Store dimensions of the input fields
    if (verbose == True):
        print "Getting spatial dimensions"
    nlev = -1
    nlat = -1
    nlon = -1

    # Look at first file and get dims
    input_dims = o_files[0].dimensions
    ndims = len(input_dims)

    # Make sure all files have the same dimensions
    for key in input_dims:
        if key == "z_t":
            nlev = input_dims["z_t"]
        elif key == "nlon":
            nlon = input_dims["nlon"]
        elif key == "nlat":
            nlat = input_dims["nlat"]

    for count, this_file in enumerate(o_files):
        input_dims = this_file.dimensions
        if ( nlev != int(input_dims["z_t"]) or ( nlat != int(input_dims["nlat"]))\
              or ( nlon != int(input_dims["nlon"]))):
            print "Dimension mismatch between ", in_file_list[
                0], 'and', in_file_list[count], '!!!'
            sys.exit()

    # Create new summary ensemble file
    this_sumfile = opts_dict["sumfile"]

    if verbose:
        print "Creating ", this_sumfile, "  ..."
    if (me.get_rank() == 0):
        if os.path.exists(this_sumfile):
            os.unlink(this_sumfile)
        opt = Nio.options()
        opt.PreFill = False
        opt.Format = 'NetCDF4Classic'

        nc_sumfile = Nio.open_file(this_sumfile, 'w', options=opt)

        # Set dimensions
        if (verbose == True):
            print "Setting dimensions ....."
        nc_sumfile.create_dimension('nlat', nlat)
        nc_sumfile.create_dimension('nlon', nlon)
        nc_sumfile.create_dimension('nlev', nlev)
        nc_sumfile.create_dimension('time', None)
        nc_sumfile.create_dimension('ens_size', opts_dict['npert'])
        nc_sumfile.create_dimension('nbin', opts_dict['nbin'])
        nc_sumfile.create_dimension('nvars', len(Var3d) + len(Var2d))
        nc_sumfile.create_dimension('nvars3d', len(Var3d))
        nc_sumfile.create_dimension('nvars2d', len(Var2d))
        nc_sumfile.create_dimension('str_size', str_size)

        # Set global attributes
        now = time.strftime("%c")
        if (verbose == True):
            print "Setting global attributes ....."
        setattr(nc_sumfile, 'creation_date', now)
        setattr(nc_sumfile, 'title', 'POP verification ensemble summary file')
        setattr(nc_sumfile, 'tag', opts_dict["tag"])
        setattr(nc_sumfile, 'compset', opts_dict["compset"])
        setattr(nc_sumfile, 'resolution', opts_dict["res"])
        setattr(nc_sumfile, 'machine', opts_dict["mach"])

        # Create variables
        if (verbose == True):
            print "Creating variables ....."
        v_lev = nc_sumfile.create_variable("lev", 'f', ('nlev', ))
        v_vars = nc_sumfile.create_variable("vars", 'S1',
                                            ('nvars', 'str_size'))
        v_var3d = nc_sumfile.create_variable("var3d", 'S1',
                                             ('nvars3d', 'str_size'))
        v_var2d = nc_sumfile.create_variable("var2d", 'S1',
                                             ('nvars2d', 'str_size'))
        v_time = nc_sumfile.create_variable("time", 'd', ('time', ))
        v_ens_avg3d = nc_sumfile.create_variable(
            "ens_avg3d", 'f', ('time', 'nvars3d', 'nlev', 'nlat', 'nlon'))
        v_ens_stddev3d = nc_sumfile.create_variable(
            "ens_stddev3d", 'f', ('time', 'nvars3d', 'nlev', 'nlat', 'nlon'))
        v_ens_avg2d = nc_sumfile.create_variable(
            "ens_avg2d", 'f', ('time', 'nvars2d', 'nlat', 'nlon'))
        v_ens_stddev2d = nc_sumfile.create_variable(
            "ens_stddev2d", 'f', ('time', 'nvars2d', 'nlat', 'nlon'))

        v_RMSZ = nc_sumfile.create_variable(
            "RMSZ", 'f', ('time', 'nvars', 'ens_size', 'nbin'))
        if not opts_dict['zscoreonly']:
            v_gm = nc_sumfile.create_variable("global_mean", 'f',
                                              ('time', 'nvars', 'ens_size'))

        # Assign vars, var3d and var2d
        if (verbose == True):
            print "Assigning vars, var3d, and var2d ....."

        eq_all_var_names = []
        eq_d3_var_names = []
        eq_d2_var_names = []
        all_var_names = list(Var3d)
        all_var_names += Var2d
        l_eq = len(all_var_names)
        for i in range(l_eq):
            tt = list(all_var_names[i])
            l_tt = len(tt)
            if (l_tt < str_size):
                extra = list(' ') * (str_size - l_tt)
                tt.extend(extra)
            eq_all_var_names.append(tt)

        l_eq = len(Var3d)
        for i in range(l_eq):
            tt = list(Var3d[i])
            l_tt = len(tt)
            if (l_tt < str_size):
                extra = list(' ') * (str_size - l_tt)
                tt.extend(extra)
            eq_d3_var_names.append(tt)

        l_eq = len(Var2d)
        for i in range(l_eq):
            tt = list(Var2d[i])
            l_tt = len(tt)
            if (l_tt < str_size):
                extra = list(' ') * (str_size - l_tt)
                tt.extend(extra)
            eq_d2_var_names.append(tt)

        v_vars[:] = eq_all_var_names[:]
        v_var3d[:] = eq_d3_var_names[:]
        v_var2d[:] = eq_d2_var_names[:]

        # Time-invarient metadata
        if (verbose == True):
            print "Assigning time invariant metadata ....."
        vars_dict = o_files[0].variables
        lev_data = vars_dict["z_t"]
        v_lev = lev_data

    # Time-varient metadata
    if verbose:
        print "Assigning time variant metadata ....."
    vars_dict = o_files[0].variables
    time_value = vars_dict['time']
    time_array = np.array([time_value])
    time_array = pyEnsLib.gather_npArray_pop(time_array, me, (me.get_size(), ))
    if me.get_rank() == 0:
        v_time[:] = time_array[:]

    # Calculate global mean, average, standard deviation
    if verbose:
        print "Calculating global means ....."
    is_SE = False
    tslice = 0
    if not opts_dict['zscoreonly']:
        gm3d, gm2d = pyEnsLib.generate_global_mean_for_summary(
            o_files, Var3d, Var2d, is_SE, False, opts_dict)
    if verbose:
        print "Finish calculating global means ....."

    # Calculate RMSZ scores
    if (verbose == True):
        print "Calculating RMSZ scores ....."
    zscore3d, zscore2d, ens_avg3d, ens_stddev3d, ens_avg2d, ens_stddev2d, temp1, temp2 = pyEnsLib.calc_rmsz(
        o_files, Var3d, Var2d, is_SE, opts_dict)

    # Collect from all processors
    if opts_dict['mpi_enable']:
        # Gather the 3d variable results from all processors to the master processor
        # Gather global means 3d results
        if not opts_dict['zscoreonly']:
            gmall = np.concatenate((gm3d, gm2d), axis=0)
            #print "before gather, gmall.shape=",gmall.shape
            gmall = pyEnsLib.gather_npArray_pop(
                gmall, me,
                (me.get_size(), len(Var3d) + len(Var2d), len(o_files)))
        zmall = np.concatenate((zscore3d, zscore2d), axis=0)
        zmall = pyEnsLib.gather_npArray_pop(
            zmall, me,
            (me.get_size(), len(Var3d) + len(Var2d), len(o_files), nbin))
        #print 'zmall=',zmall

        #print "after gather, gmall.shape=",gmall.shape
        ens_avg3d = pyEnsLib.gather_npArray_pop(
            ens_avg3d, me, (me.get_size(), len(Var3d), nlev, (nlat), nlon))
        ens_avg2d = pyEnsLib.gather_npArray_pop(ens_avg2d, me,
                                                (me.get_size(), len(Var2d),
                                                 (nlat), nlon))
        ens_stddev3d = pyEnsLib.gather_npArray_pop(
            ens_stddev3d, me, (me.get_size(), len(Var3d), nlev, (nlat), nlon))
        ens_stddev2d = pyEnsLib.gather_npArray_pop(ens_stddev2d, me,
                                                   (me.get_size(), len(Var2d),
                                                    (nlat), nlon))

    # Assign to file:
    if me.get_rank() == 0:
        #Zscoreall=np.concatenate((zscore3d,zscore2d),axis=0)
        v_RMSZ[:, :, :, :] = zmall[:, :, :, :]
        v_ens_avg3d[:, :, :, :, :] = ens_avg3d[:, :, :, :, :]
        v_ens_stddev3d[:, :, :, :, :] = ens_stddev3d[:, :, :, :, :]
        v_ens_avg2d[:, :, :, :] = ens_avg2d[:, :, :, :]
        v_ens_stddev2d[:, :, :, :] = ens_stddev2d[:, :, :, :]
        if not opts_dict['zscoreonly']:
            v_gm[:, :, :] = gmall[:, :, :]
        print "All done"
Example #6
0
def main(argv):
    print 'Running pyEnsSumPop!'

    # Get command line stuff and store in a dictionary
    s = 'nyear= nmonth= npert= tag= res= mach= compset= sumfile= indir= tslice= verbose jsonfile= mpi_enable zscoreonly nrand= rand seq= jsondir='
    optkeys = s.split()
    try: 
        opts, args = getopt.getopt(argv, "h", optkeys)
    except getopt.GetoptError:
        pyEnsLib.EnsSumPop_usage()
        sys.exit(2)

    # Put command line options in a dictionary - also set defaults
    opts_dict={}

    # Defaults
    opts_dict['tag'] = 'cesm1_2_0'
    opts_dict['compset'] = 'FC5'
    opts_dict['mach'] = 'yellowstone'
    opts_dict['tslice'] = 0 
    opts_dict['nyear'] = 3
    opts_dict['nmonth'] = 12
    opts_dict['npert'] = 40
    opts_dict['nbin'] = 40
    opts_dict['minrange'] = 0.0
    opts_dict['maxrange'] = 4.0
    opts_dict['res'] = 'ne30_ne30'
    opts_dict['sumfile'] = 'ens.pop.summary.nc'
    opts_dict['indir'] = './'
    opts_dict['jsonfile'] = ''
    opts_dict['verbose'] = True
    opts_dict['mpi_enable'] = False
    opts_dict['zscoreonly'] = False
    opts_dict['popens'] = True
    opts_dict['nrand'] = 40 
    opts_dict['rand'] = False
    opts_dict['seq'] = 0 
    opts_dict['jsondir'] = '/glade/scratch/haiyingx/' 

    # This creates the dictionary of input arguments 
    print "before parseconfig"
    opts_dict = pyEnsLib.getopt_parseconfig(opts,optkeys,'ESP',opts_dict)

    verbose = opts_dict['verbose']
    nbin = opts_dict['nbin']

    if verbose:
       print opts_dict
       
    # Now find file names in indir
    input_dir = opts_dict['indir']

    # Create a mpi simplecomm object
    if opts_dict['mpi_enable']:
        me=simplecomm.create_comm()
    else:
        me=simplecomm.create_comm(not opts_dict['mpi_enable'])
    if opts_dict['jsonfile']:
        # Read in the included var list
        Var2d,Var3d=pyEnsLib.read_jsonlist(opts_dict['jsonfile'],'ESP')
        str_size=0
        for str in Var3d:
            if str_size < len(str):
               str_size=len(str)
        for str in Var2d:
            if str_size < len(str):
               str_size=len(str)


    in_files=[]
    if(os.path.exists(input_dir)):
        # Pick up the 'nrand' random number of input files to generate summary files
        if opts_dict['rand']:
           in_files=pyEnsLib.Random_pickup_pop(input_dir,opts_dict,opts_dict['nrand'])
        else:    
           # Get the list of files
           in_files_temp = os.listdir(input_dir)
           in_files=sorted(in_files_temp)
        # Make sure we have enough
        num_files = len(in_files)
    else:
        print 'Input directory: ',input_dir,' not found'
        sys.exit(2)

    # Create a mpi simplecomm object
    if opts_dict['mpi_enable']:
        me=simplecomm.create_comm()
    else:
        me=simplecomm.create_comm(not opts_dict['mpi_enable'])
    #Partition the input file list 
    in_file_list=me.partition(in_files,func=EqualStride(),involved=True)

    
    # Open the files in the input directory
    o_files=[]
    for onefile in in_file_list:
        if (os.path.isfile(input_dir+'/' + onefile)):
            o_files.append(Nio.open_file(input_dir+'/' + onefile,"r"))
        else:
            print "COULD NOT LOCATE FILE "+ input_dir + onefile + "! EXITING...."
            sys.exit() 


    print in_file_list

    # Store dimensions of the input fields
    if (verbose == True):
        print "Getting spatial dimensions"
    nlev = -1
    nlat = -1
    nlon = -1

    # Look at first file and get dims
    input_dims = o_files[0].dimensions
    ndims = len(input_dims)

    # Make sure all files have the same dimensions
    for key in input_dims:
        if key == "z_t":
            nlev = input_dims["z_t"]
        elif key == "nlon":
            nlon = input_dims["nlon"]
        elif key == "nlat":
            nlat = input_dims["nlat"]

    for count, this_file in enumerate(o_files):
        input_dims = this_file.dimensions     
	if ( nlev != int(input_dims["z_t"]) or ( nlat != int(input_dims["nlat"]))\
	      or ( nlon != int(input_dims["nlon"]))):
	    print "Dimension mismatch between ", in_file_list[0], 'and', in_file_list[count], '!!!'
	    sys.exit() 


    # Create new summary ensemble file
    this_sumfile = opts_dict["sumfile"]

    if verbose:
       print "Creating ", this_sumfile, "  ..."
    if (me.get_rank() == 0 ):
       if os.path.exists(this_sumfile):
           os.unlink(this_sumfile)
       opt =Nio.options()
       opt.PreFill = False
       opt.Format = 'NetCDF4Classic'

       nc_sumfile = Nio.open_file(this_sumfile, 'w', options=opt)

       # Set dimensions
       if (verbose == True):
	   print "Setting dimensions ....."
       nc_sumfile.create_dimension('nlat', nlat)
       nc_sumfile.create_dimension('nlon', nlon)
       nc_sumfile.create_dimension('nlev', nlev)
       nc_sumfile.create_dimension('time',None)
       nc_sumfile.create_dimension('ens_size', opts_dict['npert'])
       nc_sumfile.create_dimension('nbin', opts_dict['nbin'])
       nc_sumfile.create_dimension('nvars', len(Var3d) + len(Var2d))
       nc_sumfile.create_dimension('nvars3d', len(Var3d))
       nc_sumfile.create_dimension('nvars2d', len(Var2d))
       nc_sumfile.create_dimension('str_size', str_size)

       # Set global attributes
       now = time.strftime("%c")
       if (verbose == True):
	   print "Setting global attributes ....."
       setattr(nc_sumfile, 'creation_date',now)
       setattr(nc_sumfile, 'title', 'POP verification ensemble summary file')
       setattr(nc_sumfile, 'tag', opts_dict["tag"]) 
       setattr(nc_sumfile, 'compset', opts_dict["compset"]) 
       setattr(nc_sumfile, 'resolution', opts_dict["res"]) 
       setattr(nc_sumfile, 'machine', opts_dict["mach"]) 

       # Create variables
       if (verbose == True):
	   print "Creating variables ....."
       v_lev = nc_sumfile.create_variable("lev", 'f', ('nlev',))
       v_vars = nc_sumfile.create_variable("vars", 'S1', ('nvars', 'str_size'))
       v_var3d = nc_sumfile.create_variable("var3d", 'S1', ('nvars3d', 'str_size'))
       v_var2d = nc_sumfile.create_variable("var2d", 'S1', ('nvars2d', 'str_size'))
       v_time = nc_sumfile.create_variable("time",'d',('time',))
       v_ens_avg3d = nc_sumfile.create_variable("ens_avg3d", 'f', ('time','nvars3d', 'nlev', 'nlat', 'nlon'))
       v_ens_stddev3d = nc_sumfile.create_variable("ens_stddev3d", 'f', ('time','nvars3d', 'nlev', 'nlat', 'nlon'))
       v_ens_avg2d = nc_sumfile.create_variable("ens_avg2d", 'f', ('time','nvars2d', 'nlat', 'nlon'))
       v_ens_stddev2d = nc_sumfile.create_variable("ens_stddev2d", 'f', ('time','nvars2d', 'nlat', 'nlon'))

       v_RMSZ = nc_sumfile.create_variable("RMSZ", 'f', ('time','nvars', 'ens_size','nbin'))
       if not opts_dict['zscoreonly']:
          v_gm = nc_sumfile.create_variable("global_mean", 'f', ('time','nvars', 'ens_size'))


       # Assign vars, var3d and var2d
       if (verbose == True):
	   print "Assigning vars, var3d, and var2d ....."

       eq_all_var_names =[]
       eq_d3_var_names = []
       eq_d2_var_names = []
       all_var_names = list(Var3d)
       all_var_names += Var2d
       l_eq = len(all_var_names)
       for i in range(l_eq):
	   tt = list(all_var_names[i])
	   l_tt = len(tt)
	   if (l_tt < str_size):
	       extra = list(' ')*(str_size - l_tt)
	       tt.extend(extra)
	   eq_all_var_names.append(tt)

       l_eq = len(Var3d)
       for i in range(l_eq):
	   tt = list(Var3d[i])
	   l_tt = len(tt)
	   if (l_tt < str_size):
	       extra = list(' ')*(str_size - l_tt)
	       tt.extend(extra)
	   eq_d3_var_names.append(tt)

       l_eq = len(Var2d)
       for i in range(l_eq):
	   tt = list(Var2d[i])
	   l_tt = len(tt)
	   if (l_tt < str_size):
	       extra = list(' ')*(str_size - l_tt)
	       tt.extend(extra)
	   eq_d2_var_names.append(tt)

       v_vars[:] = eq_all_var_names[:]
       v_var3d[:] = eq_d3_var_names[:]
       v_var2d[:] = eq_d2_var_names[:]

       # Time-invarient metadata
       if (verbose == True):
	   print "Assigning time invariant metadata ....."
       vars_dict = o_files[0].variables
       lev_data = vars_dict["z_t"]
       v_lev = lev_data
       
    # Time-varient metadata
    if verbose:
       print "Assigning time variant metadata ....."
    vars_dict = o_files[0].variables
    time_value = vars_dict['time']
    time_array = np.array([time_value])
    time_array = pyEnsLib.gather_npArray_pop(time_array,me,(me.get_size(),))
    if me.get_rank() == 0:
       v_time[:]=time_array[:]

    # Calculate global mean, average, standard deviation 
    if verbose:
       print "Calculating global means ....."
    is_SE = False
    tslice=0
    if not opts_dict['zscoreonly']:
       gm3d,gm2d = pyEnsLib.generate_global_mean_for_summary(o_files,Var3d,Var2d, is_SE,False,opts_dict)
    if verbose:
       print "Finish calculating global means ....."

    # Calculate RMSZ scores  
    if (verbose == True):
       print "Calculating RMSZ scores ....."
    zscore3d,zscore2d,ens_avg3d,ens_stddev3d,ens_avg2d,ens_stddev2d,temp1,temp2=pyEnsLib.calc_rmsz(o_files,Var3d,Var2d,is_SE,opts_dict)    

    # Collect from all processors
    if opts_dict['mpi_enable'] :
	# Gather the 3d variable results from all processors to the master processor
	# Gather global means 3d results
        if not opts_dict['zscoreonly']:
           gmall=np.concatenate((gm3d,gm2d),axis=0)
           #print "before gather, gmall.shape=",gmall.shape
	   gmall=pyEnsLib.gather_npArray_pop(gmall,me,(me.get_size(),len(Var3d)+len(Var2d),len(o_files)))
        zmall=np.concatenate((zscore3d,zscore2d),axis=0)
        zmall=pyEnsLib.gather_npArray_pop(zmall,me,(me.get_size(),len(Var3d)+len(Var2d),len(o_files),nbin))
        #print 'zmall=',zmall
        
        #print "after gather, gmall.shape=",gmall.shape
        ens_avg3d=pyEnsLib.gather_npArray_pop(ens_avg3d,me,(me.get_size(),len(Var3d),nlev,(nlat),nlon))
        ens_avg2d=pyEnsLib.gather_npArray_pop(ens_avg2d,me,(me.get_size(),len(Var2d),(nlat),nlon))
        ens_stddev3d=pyEnsLib.gather_npArray_pop(ens_stddev3d,me,(me.get_size(),len(Var3d),nlev,(nlat),nlon))
        ens_stddev2d=pyEnsLib.gather_npArray_pop(ens_stddev2d,me,(me.get_size(),len(Var2d),(nlat),nlon))

    # Assign to file:
    if me.get_rank() == 0 :
	#Zscoreall=np.concatenate((zscore3d,zscore2d),axis=0)
	v_RMSZ[:,:,:,:]=zmall[:,:,:,:]
	v_ens_avg3d[:,:,:,:,:]=ens_avg3d[:,:,:,:,:]
	v_ens_stddev3d[:,:,:,:,:]=ens_stddev3d[:,:,:,:,:]
	v_ens_avg2d[:,:,:,:]=ens_avg2d[:,:,:,:]
	v_ens_stddev2d[:,:,:,:]=ens_stddev2d[:,:,:,:]
	if not opts_dict['zscoreonly']:
	   v_gm[:,:,:]=gmall[:,:,:]
        print "All done"
Example #7
0
def main(argv):

    # Get command line stuff and store in a dictionary
    s = 'tag= compset= esize= tslice= res= sumfile= indir= sumfiledir= mach= verbose jsonfile= mpi_enable maxnorm gmonly popens cumul regx= startMon= endMon= fIndex='
    optkeys = s.split()
    try:
        opts, args = getopt.getopt(argv, "h", optkeys)
    except getopt.GetoptError:
        pyEnsLib.EnsSum_usage()
        sys.exit(2)

    # Put command line options in a dictionary - also set defaults
    opts_dict = {}

    # Defaults
    opts_dict['tag'] = 'cesm2_0_beta08'
    opts_dict['compset'] = 'F2000'
    opts_dict['mach'] = 'cheyenne'
    opts_dict['esize'] = 350
    opts_dict['tslice'] = 1
    opts_dict['res'] = 'f19_f19'
    opts_dict['sumfile'] = 'ens.summary.nc'
    opts_dict['indir'] = './'
    opts_dict['sumfiledir'] = './'
    opts_dict['jsonfile'] = 'exclude_empty.json'
    opts_dict['verbose'] = False
    opts_dict['mpi_enable'] = False
    opts_dict['maxnorm'] = False
    opts_dict['gmonly'] = True
    opts_dict['popens'] = False
    opts_dict['cumul'] = False
    opts_dict['regx'] = 'test'
    opts_dict['startMon'] = 1
    opts_dict['endMon'] = 1
    opts_dict['fIndex'] = 151

    # This creates the dictionary of input arguments
    opts_dict = pyEnsLib.getopt_parseconfig(opts, optkeys, 'ES', opts_dict)

    verbose = opts_dict['verbose']

    st = opts_dict['esize']
    esize = int(st)

    if not (opts_dict['tag'] and opts_dict['compset'] and opts_dict['mach']
            or opts_dict['res']):
        print 'Please specify --tag, --compset, --mach and --res options'
        sys.exit()

    # Now find file names in indir
    input_dir = opts_dict['indir']
    # The var list that will be excluded
    ex_varlist = []
    inc_varlist = []

    # Create a mpi simplecomm object
    if opts_dict['mpi_enable']:
        me = simplecomm.create_comm()
    else:
        me = simplecomm.create_comm(not opts_dict['mpi_enable'])

    if me.get_rank() == 0:
        print 'Running pyEnsSum!'

    if me.get_rank() == 0 and (verbose == True):
        print opts_dict
        print 'Ensemble size for summary = ', esize

    exclude = False
    if me.get_rank() == 0:
        if opts_dict['jsonfile']:
            inc_varlist = []
            # Read in the excluded or included var list
            ex_varlist, exclude = pyEnsLib.read_jsonlist(
                opts_dict['jsonfile'], 'ES')
            if exclude == False:
                inc_varlist = ex_varlist
                ex_varlist = []
            # Read in the included var list
            #inc_varlist=pyEnsLib.read_jsonlist(opts_dict['jsonfile'],'ES')

    # Broadcast the excluded var list to each processor
    #if opts_dict['mpi_enable']:
    #   ex_varlist=me.partition(ex_varlist,func=Duplicate(),involved=True)
    # Broadcast the excluded var list to each processor
    if opts_dict['mpi_enable']:
        exclude = me.partition(exclude, func=Duplicate(), involved=True)
        if exclude:
            ex_varlist = me.partition(ex_varlist,
                                      func=Duplicate(),
                                      involved=True)
        else:
            inc_varlist = me.partition(inc_varlist,
                                       func=Duplicate(),
                                       involved=True)

    in_files = []
    if (os.path.exists(input_dir)):
        # Get the list of files
        in_files_temp = os.listdir(input_dir)
        in_files = sorted(in_files_temp)

        # Make sure we have enough
        num_files = len(in_files)
        if me.get_rank() == 0 and (verbose == True):
            print 'Number of files in input directory = ', num_files
        if (num_files < esize):
            if me.get_rank() == 0 and (verbose == True):
                print 'Number of files in input directory (',num_files,\
                 ') is less than specified ensemble size of ', esize
            sys.exit(2)
        if (num_files > esize):
            if me.get_rank() == 0 and (verbose == True):
                print 'NOTE: Number of files in ', input_dir, \
                 'is greater than specified ensemble size of ', esize ,\
                 '\nwill just use the first ',  esize, 'files'
    else:
        if me.get_rank() == 0:
            print 'Input directory: ', input_dir, ' not found'
        sys.exit(2)

    if opts_dict['cumul']:
        if opts_dict['regx']:
            in_files_list = get_cumul_filelist(opts_dict, opts_dict['indir'],
                                               opts_dict['regx'])
        in_files = me.partition(in_files_list,
                                func=EqualLength(),
                                involved=True)
        if me.get_rank() == 0 and (verbose == True):
            print 'in_files=', in_files

    # Open the files in the input directory
    o_files = []
    if me.get_rank() == 0 and opts_dict['verbose']:
        print 'Input files are: '
        print "\n".join(in_files)
        #for i in in_files:
        #    print "in_files =",i
    for onefile in in_files[0:esize]:
        if (os.path.isfile(input_dir + '/' + onefile)):
            o_files.append(Nio.open_file(input_dir + '/' + onefile, "r"))
        else:
            if me.get_rank() == 0:
                print "COULD NOT LOCATE FILE " + input_dir + onefile + "! EXITING...."
            sys.exit()

    # Store dimensions of the input fields
    if me.get_rank() == 0 and (verbose == True):
        print "Getting spatial dimensions"
    nlev = -1
    nilev = -1
    ncol = -1
    nlat = -1
    nlon = -1
    lonkey = ''
    latkey = ''
    # Look at first file and get dims
    input_dims = o_files[0].dimensions
    ndims = len(input_dims)

    for key in input_dims:
        if key == "lev":
            nlev = input_dims["lev"]
        elif key == "ilev":
            nilev = input_dims["ilev"]
        elif key == "ncol":
            ncol = input_dims["ncol"]
        elif (key == "nlon") or (key == "lon"):
            nlon = input_dims[key]
            lonkey = key
        elif (key == "nlat") or (key == "lat"):
            nlat = input_dims[key]
            latkey = key

    if (nlev == -1):
        if me.get_rank() == 0:
            print "COULD NOT LOCATE valid dimension lev => EXITING...."
        sys.exit()

    if ((ncol == -1) and ((nlat == -1) or (nlon == -1))):
        if me.get_rank() == 0:
            print "Need either lat/lon or ncol  => EXITING...."
        sys.exit()

    # Check if this is SE or FV data
    if (ncol != -1):
        is_SE = True
    else:
        is_SE = False

    # Make sure all files have the same dimensions
    if me.get_rank() == 0 and (verbose == True):
        print "Checking dimensions across files...."
        print 'lev = ', nlev
        if (is_SE == True):
            print 'ncol = ', ncol
        else:
            print 'nlat = ', nlat
            print 'nlon = ', nlon

    for count, this_file in enumerate(o_files):
        input_dims = this_file.dimensions
        if (is_SE == True):
            if (nlev != int(input_dims["lev"])
                    or (ncol != int(input_dims["ncol"]))):
                if me.get_rank() == 0:
                    print "Dimension mismatch between ", in_files[
                        0], 'and', in_files[0], '!!!'
                sys.exit()
        else:
            if ( nlev != int(input_dims["lev"]) or ( nlat != int(input_dims[latkey]))\
                  or ( nlon != int(input_dims[lonkey]))):
                if me.get_rank() == 0:
                    print "Dimension mismatch between ", in_files[
                        0], 'and', in_files[0], '!!!'
                sys.exit()

    # Get 2d vars, 3d vars and all vars (For now include all variables)
    vars_dict_all = o_files[0].variables
    # Remove the excluded variables (specified in json file) from variable dictionary
    #print len(vars_dict_all)
    if exclude:
        vars_dict = vars_dict_all
        for i in ex_varlist:
            if i in vars_dict:
                del vars_dict[i]
    #Given an included var list, remove all float var that are not on the list
    else:
        vars_dict = vars_dict_all.copy()
        for k, v in vars_dict_all.iteritems():
            if (k not in inc_varlist) and (vars_dict_all[k].typecode() == 'f'):
                #print vars_dict_all[k].typecode()
                #print k
                del vars_dict[k]

    num_vars = len(vars_dict)
    #print num_vars
    #if me.get_rank() == 0:
    #   for k,v in vars_dict.iteritems():
    #       print 'vars_dict',k,vars_dict[k].typecode()

    str_size = 0

    d2_var_names = []
    d3_var_names = []
    num_2d = 0
    num_3d = 0

    # Which are 2d, which are 3d and max str_size
    for k, v in vars_dict.iteritems():
        var = k
        vd = v.dimensions  # all the variable's dimensions (names)
        vr = v.rank  # num dimension
        vs = v.shape  # dim values
        is_2d = False
        is_3d = False
        if (is_SE == True):  # (time, lev, ncol) or (time, ncol)
            if ((vr == 2) and (vs[1] == ncol)):
                is_2d = True
                num_2d += 1
            elif ((vr == 3) and (vs[2] == ncol and vs[1] == nlev)):
                is_3d = True
                num_3d += 1
        else:  # (time, lev, nlon, nlon) or (time, nlat, nlon)
            if ((vr == 3) and (vs[1] == nlat and vs[2] == nlon)):
                is_2d = True
                num_2d += 1
            elif ((vr == 4) and (vs[2] == nlat and vs[3] == nlon and
                                 (vs[1] == nlev or vs[1] == nilev))):
                is_3d = True
                num_3d += 1

        if (is_3d == True):
            str_size = max(str_size, len(k))
            d3_var_names.append(k)
        elif (is_2d == True):
            str_size = max(str_size, len(k))
            d2_var_names.append(k)
        #else:
        #    print 'var=',k

    if me.get_rank() == 0 and (verbose == True):
        print 'Number of variables found:  ', num_3d + num_2d
        print '3D variables: ' + str(num_3d) + ', 2D variables: ' + str(num_2d)

    # Now sort these and combine (this sorts caps first, then lower case -
    # which is what we want)
    d2_var_names.sort()
    d3_var_names.sort()

    if esize < num_2d + num_3d:
        if me.get_rank() == 0:
            print "************************************************************************************************************************************"
            print "  Error: the total number of 3D and 2D variables " + str(
                num_2d + num_3d
            ) + " is larger than the number of ensemble files " + str(esize)
            print "  Cannot generate ensemble summary file, please remove more variables from your included variable list,"
            print "  or add more varaibles in your excluded variable list!!!"
            print "************************************************************************************************************************************"
        sys.exit()
    # All vars is 3d vars first (sorted), the 2d vars
    all_var_names = list(d3_var_names)
    all_var_names += d2_var_names
    n_all_var_names = len(all_var_names)

    #if me.get_rank() == 0 and (verbose == True):
    #    print 'num vars = ', n_all_var_names, '(3d = ', num_3d, ' and 2d = ', num_2d, ")"

    # Create new summary ensemble file
    this_sumfile = opts_dict["sumfile"]

    if me.get_rank() == 0 and (verbose == True):
        print "Creating ", this_sumfile, "  ..."
    if (me.get_rank() == 0 | opts_dict["popens"]):
        if os.path.exists(this_sumfile):
            os.unlink(this_sumfile)

        opt = Nio.options()
        opt.PreFill = False
        opt.Format = 'NetCDF4Classic'
        nc_sumfile = Nio.open_file(this_sumfile, 'w', options=opt)

        # Set dimensions
        if me.get_rank() == 0 and (verbose == True):
            print "Setting dimensions ....."
        if (is_SE == True):
            nc_sumfile.create_dimension('ncol', ncol)
        else:
            nc_sumfile.create_dimension('nlat', nlat)
            nc_sumfile.create_dimension('nlon', nlon)
        nc_sumfile.create_dimension('nlev', nlev)
        nc_sumfile.create_dimension('ens_size', esize)
        nc_sumfile.create_dimension('nvars', num_3d + num_2d)
        nc_sumfile.create_dimension('nvars3d', num_3d)
        nc_sumfile.create_dimension('nvars2d', num_2d)
        nc_sumfile.create_dimension('str_size', str_size)

        # Set global attributes
        now = time.strftime("%c")
        if me.get_rank() == 0 and (verbose == True):
            print "Setting global attributes ....."
        setattr(nc_sumfile, 'creation_date', now)
        setattr(nc_sumfile, 'title', 'CAM verification ensemble summary file')
        setattr(nc_sumfile, 'tag', opts_dict["tag"])
        setattr(nc_sumfile, 'compset', opts_dict["compset"])
        setattr(nc_sumfile, 'resolution', opts_dict["res"])
        setattr(nc_sumfile, 'machine', opts_dict["mach"])

        # Create variables
        if me.get_rank() == 0 and (verbose == True):
            print "Creating variables ....."
        v_lev = nc_sumfile.create_variable("lev", 'f', ('nlev', ))
        v_vars = nc_sumfile.create_variable("vars", 'S1',
                                            ('nvars', 'str_size'))
        v_var3d = nc_sumfile.create_variable("var3d", 'S1',
                                             ('nvars3d', 'str_size'))
        v_var2d = nc_sumfile.create_variable("var2d", 'S1',
                                             ('nvars2d', 'str_size'))
        if not opts_dict['gmonly']:
            if (is_SE == True):
                v_ens_avg3d = nc_sumfile.create_variable(
                    "ens_avg3d", 'f', ('nvars3d', 'nlev', 'ncol'))
                v_ens_stddev3d = nc_sumfile.create_variable(
                    "ens_stddev3d", 'f', ('nvars3d', 'nlev', 'ncol'))
                v_ens_avg2d = nc_sumfile.create_variable(
                    "ens_avg2d", 'f', ('nvars2d', 'ncol'))
                v_ens_stddev2d = nc_sumfile.create_variable(
                    "ens_stddev2d", 'f', ('nvars2d', 'ncol'))
            else:
                v_ens_avg3d = nc_sumfile.create_variable(
                    "ens_avg3d", 'f', ('nvars3d', 'nlev', 'nlat', 'nlon'))
                v_ens_stddev3d = nc_sumfile.create_variable(
                    "ens_stddev3d", 'f', ('nvars3d', 'nlev', 'nlat', 'nlon'))
                v_ens_avg2d = nc_sumfile.create_variable(
                    "ens_avg2d", 'f', ('nvars2d', 'nlat', 'nlon'))
                v_ens_stddev2d = nc_sumfile.create_variable(
                    "ens_stddev2d", 'f', ('nvars2d', 'nlat', 'nlon'))

            v_RMSZ = nc_sumfile.create_variable("RMSZ", 'f',
                                                ('nvars', 'ens_size'))
        v_gm = nc_sumfile.create_variable("global_mean", 'f',
                                          ('nvars', 'ens_size'))
        v_standardized_gm = nc_sumfile.create_variable("standardized_gm", 'f',
                                                       ('nvars', 'ens_size'))
        v_loadings_gm = nc_sumfile.create_variable('loadings_gm', 'f',
                                                   ('nvars', 'nvars'))
        v_mu_gm = nc_sumfile.create_variable('mu_gm', 'f', ('nvars', ))
        v_sigma_gm = nc_sumfile.create_variable('sigma_gm', 'f', ('nvars', ))
        v_sigma_scores_gm = nc_sumfile.create_variable('sigma_scores_gm', 'f',
                                                       ('nvars', ))

        # Assign vars, var3d and var2d
        if me.get_rank() == 0 and (verbose == True):
            print "Assigning vars, var3d, and var2d ....."

        eq_all_var_names = []
        eq_d3_var_names = []
        eq_d2_var_names = []

        l_eq = len(all_var_names)
        for i in range(l_eq):
            tt = list(all_var_names[i])
            l_tt = len(tt)
            if (l_tt < str_size):
                extra = list(' ') * (str_size - l_tt)
                tt.extend(extra)
            eq_all_var_names.append(tt)

        l_eq = len(d3_var_names)
        for i in range(l_eq):
            tt = list(d3_var_names[i])
            l_tt = len(tt)
            if (l_tt < str_size):
                extra = list(' ') * (str_size - l_tt)
                tt.extend(extra)
            eq_d3_var_names.append(tt)

        l_eq = len(d2_var_names)
        for i in range(l_eq):
            tt = list(d2_var_names[i])
            l_tt = len(tt)
            if (l_tt < str_size):
                extra = list(' ') * (str_size - l_tt)
                tt.extend(extra)
            eq_d2_var_names.append(tt)

        v_vars[:] = eq_all_var_names[:]
        v_var3d[:] = eq_d3_var_names[:]
        v_var2d[:] = eq_d2_var_names[:]

        # Time-invarient metadata
        if me.get_rank() == 0 and (verbose == True):
            print "Assigning time invariant metadata ....."
        lev_data = vars_dict["lev"]
        v_lev = lev_data

    # Form ensembles, each missing one member; compute RMSZs and global means
    #for each variable, we also do max norm also (currently done in pyStats)
    tslice = opts_dict['tslice']

    if not opts_dict['cumul']:
        # Partition the var list

        var3_list_loc = me.partition(d3_var_names,
                                     func=EqualStride(),
                                     involved=True)
        var2_list_loc = me.partition(d2_var_names,
                                     func=EqualStride(),
                                     involved=True)
    else:
        var3_list_loc = d3_var_names
        var2_list_loc = d2_var_names

    # Calculate global means #
    if me.get_rank() == 0 and (verbose == True):
        print "Calculating global means ....."
    if not opts_dict['cumul']:
        gm3d, gm2d, var_list = pyEnsLib.generate_global_mean_for_summary(
            o_files, var3_list_loc, var2_list_loc, is_SE, False, opts_dict)
    if me.get_rank() == 0 and (verbose == True):
        print "Finish calculating global means ....."

    # Calculate RMSZ scores
    if (not opts_dict['gmonly']) | (opts_dict['cumul']):
        if me.get_rank() == 0 and (verbose == True):
            print "Calculating RMSZ scores ....."
        zscore3d, zscore2d, ens_avg3d, ens_stddev3d, ens_avg2d, ens_stddev2d, temp1, temp2 = pyEnsLib.calc_rmsz(
            o_files, var3_list_loc, var2_list_loc, is_SE, opts_dict)

    # Calculate max norm ensemble
    if opts_dict['maxnorm']:
        if me.get_rank() == 0 and (verbose == True):
            print "Calculating max norm of ensembles ....."
        pyEnsLib.calculate_maxnormens(opts_dict, var3_list_loc)
        pyEnsLib.calculate_maxnormens(opts_dict, var2_list_loc)

    if opts_dict['mpi_enable'] & (not opts_dict['popens']):

        if not opts_dict['cumul']:
            # Gather the 3d variable results from all processors to the master processor
            slice_index = get_stride_list(len(d3_var_names), me)

            # Gather global means 3d results
            gm3d = gather_npArray(gm3d, me, slice_index,
                                  (len(d3_var_names), len(o_files)))
            if not opts_dict['gmonly']:
                # Gather zscore3d results
                zscore3d = gather_npArray(zscore3d, me, slice_index,
                                          (len(d3_var_names), len(o_files)))

                # Gather ens_avg3d and ens_stddev3d results
                shape_tuple3d = get_shape(ens_avg3d.shape, len(d3_var_names),
                                          me.get_rank())
                ens_avg3d = gather_npArray(ens_avg3d, me, slice_index,
                                           shape_tuple3d)
                ens_stddev3d = gather_npArray(ens_stddev3d, me, slice_index,
                                              shape_tuple3d)

            # Gather 2d variable results from all processors to the master processor
            slice_index = get_stride_list(len(d2_var_names), me)

            # Gather global means 2d results
            gm2d = gather_npArray(gm2d, me, slice_index,
                                  (len(d2_var_names), len(o_files)))

            var_list = gather_list(var_list, me)

            if not opts_dict['gmonly']:
                # Gather zscore2d results
                zscore2d = gather_npArray(zscore2d, me, slice_index,
                                          (len(d2_var_names), len(o_files)))

                # Gather ens_avg3d and ens_stddev2d results
                shape_tuple2d = get_shape(ens_avg2d.shape, len(d2_var_names),
                                          me.get_rank())
                ens_avg2d = gather_npArray(ens_avg2d, me, slice_index,
                                           shape_tuple2d)
                ens_stddev2d = gather_npArray(ens_stddev2d, me, slice_index,
                                              shape_tuple2d)

        else:
            gmall = np.concatenate((temp1, temp2), axis=0)
            gmall = pyEnsLib.gather_npArray_pop(
                gmall, me,
                (me.get_size(), len(d3_var_names) + len(d2_var_names)))
    # Assign to file:
    if me.get_rank() == 0 | opts_dict['popens']:
        if not opts_dict['cumul']:
            gmall = np.concatenate((gm3d, gm2d), axis=0)
            if not opts_dict['gmonly']:
                Zscoreall = np.concatenate((zscore3d, zscore2d), axis=0)
                v_RMSZ[:, :] = Zscoreall[:, :]
            if not opts_dict['gmonly']:
                if (is_SE == True):
                    v_ens_avg3d[:, :, :] = ens_avg3d[:, :, :]
                    v_ens_stddev3d[:, :, :] = ens_stddev3d[:, :, :]
                    v_ens_avg2d[:, :] = ens_avg2d[:, :]
                    v_ens_stddev2d[:, :] = ens_stddev2d[:, :]
                else:
                    v_ens_avg3d[:, :, :, :] = ens_avg3d[:, :, :, :]
                    v_ens_stddev3d[:, :, :, :] = ens_stddev3d[:, :, :, :]
                    v_ens_avg2d[:, :, :] = ens_avg2d[:, :, :]
                    v_ens_stddev2d[:, :, :] = ens_stddev2d[:, :, :]
        else:
            gmall_temp = np.transpose(gmall[:, :])
            gmall = gmall_temp
        mu_gm, sigma_gm, standardized_global_mean, loadings_gm, scores_gm = pyEnsLib.pre_PCA(
            gmall, all_var_names, var_list, me)
        v_gm[:, :] = gmall[:, :]
        v_standardized_gm[:, :] = standardized_global_mean[:, :]
        v_mu_gm[:] = mu_gm[:]
        v_sigma_gm[:] = sigma_gm[:].astype(np.float32)
        v_loadings_gm[:, :] = loadings_gm[:, :]
        v_sigma_scores_gm[:] = scores_gm[:]

        if me.get_rank() == 0:
            print "All Done"
Example #8
0
def main(argv):

    print "Running pyEnsSum!"

    # Get command line stuff and store in a dictionary
    s = "tag= compset= esize= tslice= res= sumfile= indir= sumfiledir= mach= verbose jsonfile= mpi_enable maxnorm gmonly popens cumul regx= startMon= endMon= fIndex="
    optkeys = s.split()
    try:
        opts, args = getopt.getopt(argv, "h", optkeys)
    except getopt.GetoptError:
        pyEnsLib.EnsSum_usage()
        sys.exit(2)

    # Put command line options in a dictionary - also set defaults
    opts_dict = {}

    # Defaults
    opts_dict["tag"] = ""
    opts_dict["compset"] = ""
    opts_dict["mach"] = ""
    opts_dict["esize"] = 151
    opts_dict["tslice"] = 0
    opts_dict["res"] = ""
    opts_dict["sumfile"] = "ens.summary.nc"
    opts_dict["indir"] = "./"
    opts_dict["sumfiledir"] = "./"
    opts_dict["jsonfile"] = ""
    opts_dict["verbose"] = True
    opts_dict["mpi_enable"] = False
    opts_dict["maxnorm"] = False
    opts_dict["gmonly"] = False
    opts_dict["popens"] = False
    opts_dict["cumul"] = False
    opts_dict["regx"] = "test"
    opts_dict["startMon"] = 1
    opts_dict["endMon"] = 1
    opts_dict["fIndex"] = 151

    # This creates the dictionary of input arguments
    opts_dict = pyEnsLib.getopt_parseconfig(opts, optkeys, "ES", opts_dict)

    verbose = opts_dict["verbose"]

    st = opts_dict["esize"]
    esize = int(st)

    if verbose == True:
        print opts_dict
        print "Ensemble size for summary = ", esize

    if not (opts_dict["tag"] and opts_dict["compset"] and opts_dict["mach"] or opts_dict["res"]):
        print "Please specify --tag, --compset, --mach and --res options"
        sys.exit()

    # Now find file names in indir
    input_dir = opts_dict["indir"]
    # The var list that will be excluded
    ex_varlist = []

    # Create a mpi simplecomm object
    if opts_dict["mpi_enable"]:
        me = simplecomm.create_comm()
    else:
        me = simplecomm.create_comm(not opts_dict["mpi_enable"])

    if me.get_rank() == 0:
        if opts_dict["jsonfile"]:
            # Read in the excluded var list
            ex_varlist = pyEnsLib.read_jsonlist(opts_dict["jsonfile"], "ES")

        # Broadcast the excluded var list to each processor
    if opts_dict["mpi_enable"]:
        ex_varlist = me.partition(ex_varlist, func=Duplicate(), involved=True)

    in_files = []
    if os.path.exists(input_dir):
        # Get the list of files
        in_files_temp = os.listdir(input_dir)
        in_files = sorted(in_files_temp)
        # Make sure we have enough
        num_files = len(in_files)
        if verbose == True:
            print "Number of files in input directory = ", num_files
        if num_files < esize:
            print "Number of files in input directory (", num_files, ") is less than specified ensemble size of ", esize
            sys.exit(2)
        if num_files > esize:
            print "NOTE: Number of files in ", input_dir, "is greater than specified ensemble size of ", esize, "\nwill just use the first ", esize, "files"
    else:
        print "Input directory: ", input_dir, " not found"
        sys.exit(2)

    if opts_dict["cumul"]:
        if opts_dict["regx"]:
            in_files_list = get_cumul_filelist(opts_dict, opts_dict["indir"], opts_dict["regx"])
        in_files = me.partition(in_files_list, func=EqualLength(), involved=True)
        if me.get_rank() == 0:
            print "in_files=", in_files

    # Open the files in the input directory
    o_files = []
    for onefile in in_files[0:esize]:
        if os.path.isfile(input_dir + "/" + onefile):
            o_files.append(Nio.open_file(input_dir + "/" + onefile, "r"))
        else:
            print "COULD NOT LOCATE FILE " + input_dir + onefile + "! EXITING...."
            sys.exit()

    # Store dimensions of the input fields
    if verbose == True:
        print "Getting spatial dimensions"
    nlev = -1
    ncol = -1
    nlat = -1
    nlon = -1
    lonkey = ""
    latkey = ""
    # Look at first file and get dims
    input_dims = o_files[0].dimensions
    ndims = len(input_dims)

    for key in input_dims:
        if key == "lev":
            nlev = input_dims["lev"]
        elif key == "ncol":
            ncol = input_dims["ncol"]
        elif (key == "nlon") or (key == "lon"):
            nlon = input_dims[key]
            lonkey = key
        elif (key == "nlat") or (key == "lat"):
            nlat = input_dims[key]
            latkey = key

    if nlev == -1:
        print "COULD NOT LOCATE valid dimension lev => EXITING...."
        sys.exit()

    if (ncol == -1) and ((nlat == -1) or (nlon == -1)):
        print "Need either lat/lon or ncol  => EXITING...."
        sys.exit()

    # Check if this is SE or FV data
    if ncol != -1:
        is_SE = True
    else:
        is_SE = False

    # Make sure all files have the same dimensions
    if verbose == True:
        print "Checking dimensions across files...."
        print "lev = ", nlev
        if is_SE == True:
            print "ncol = ", ncol
        else:
            print "nlat = ", nlat
            print "nlon = ", nlon

    for count, this_file in enumerate(o_files):
        input_dims = this_file.dimensions
        if is_SE == True:
            if nlev != int(input_dims["lev"]) or (ncol != int(input_dims["ncol"])):
                print "Dimension mismatch between ", in_files[0], "and", in_files[0], "!!!"
                sys.exit()
        else:
            if nlev != int(input_dims["lev"]) or (nlat != int(input_dims[latkey])) or (nlon != int(input_dims[lonkey])):
                print "Dimension mismatch between ", in_files[0], "and", in_files[0], "!!!"
                sys.exit()

    # Get 2d vars, 3d vars and all vars (For now include all variables)
    vars_dict = o_files[0].variables
    # Remove the excluded variables (specified in json file) from variable dictionary
    if ex_varlist:
        for i in ex_varlist:
            del vars_dict[i]
    num_vars = len(vars_dict)
    if verbose == True:
        print "Number of variables (including metadata) found =  ", num_vars
    str_size = 0

    d2_var_names = []
    d3_var_names = []
    num_2d = 0
    num_3d = 0

    # Which are 2d, which are 3d and max str_size
    for k, v in vars_dict.iteritems():
        var = k
        vd = v.dimensions  # all the variable's dimensions (names)
        vr = v.rank  # num dimension
        vs = v.shape  # dim values
        is_2d = False
        is_3d = False
        if is_SE == True:  # (time, lev, ncol) or (time, ncol)
            if (vr == 2) and (vs[1] == ncol):
                is_2d = True
                num_2d += 1
            elif (vr == 3) and (vs[2] == ncol and vs[1] == nlev):
                is_3d = True
                num_3d += 1
        else:  # (time, lev, nlon, nlon) or (time, nlat, nlon)
            if (vr == 3) and (vs[1] == nlat and vs[2] == nlon):
                is_2d = True
                num_2d += 1
            elif (vr == 4) and (vs[2] == nlat and vs[3] == nlon and vs[1] == nlev):
                is_3d = True
                num_3d += 1
        if is_3d == True:
            str_size = max(str_size, len(k))
            d3_var_names.append(k)
        elif is_2d == True:
            str_size = max(str_size, len(k))
            d2_var_names.append(k)

    # Now sort these and combine (this sorts caps first, then lower case -
    # which is what we want)
    d2_var_names.sort()
    d3_var_names.sort()

    # All vars is 3d vars first (sorted), the 2d vars
    all_var_names = list(d3_var_names)
    all_var_names += d2_var_names
    n_all_var_names = len(all_var_names)

    if verbose == True:
        print "num vars = ", n_all_var_names, "(3d = ", num_3d, " and 2d = ", num_2d, ")"

    # Create new summary ensemble file
    this_sumfile = opts_dict["sumfile"]

    if verbose == True:
        print "Creating ", this_sumfile, "  ..."
    if me.get_rank() == 0 | opts_dict["popens"]:
        if os.path.exists(this_sumfile):
            os.unlink(this_sumfile)

        opt = Nio.options()
        opt.PreFill = False
        opt.Format = "NetCDF4Classic"
        nc_sumfile = Nio.open_file(this_sumfile, "w", options=opt)

        # Set dimensions
        if verbose == True:
            print "Setting dimensions ....."
        if is_SE == True:
            nc_sumfile.create_dimension("ncol", ncol)
        else:
            nc_sumfile.create_dimension("nlat", nlat)
            nc_sumfile.create_dimension("nlon", nlon)
        nc_sumfile.create_dimension("nlev", nlev)
        nc_sumfile.create_dimension("ens_size", esize)
        nc_sumfile.create_dimension("nvars", num_3d + num_2d)
        nc_sumfile.create_dimension("nvars3d", num_3d)
        nc_sumfile.create_dimension("nvars2d", num_2d)
        nc_sumfile.create_dimension("str_size", str_size)

        # Set global attributes
        now = time.strftime("%c")
        if verbose == True:
            print "Setting global attributes ....."
        setattr(nc_sumfile, "creation_date", now)
        setattr(nc_sumfile, "title", "CAM verification ensemble summary file")
        setattr(nc_sumfile, "tag", opts_dict["tag"])
        setattr(nc_sumfile, "compset", opts_dict["compset"])
        setattr(nc_sumfile, "resolution", opts_dict["res"])
        setattr(nc_sumfile, "machine", opts_dict["mach"])

        # Create variables
        if verbose == True:
            print "Creating variables ....."
        v_lev = nc_sumfile.create_variable("lev", "f", ("nlev",))
        v_vars = nc_sumfile.create_variable("vars", "S1", ("nvars", "str_size"))
        v_var3d = nc_sumfile.create_variable("var3d", "S1", ("nvars3d", "str_size"))
        v_var2d = nc_sumfile.create_variable("var2d", "S1", ("nvars2d", "str_size"))
        if not opts_dict["gmonly"]:
            if is_SE == True:
                v_ens_avg3d = nc_sumfile.create_variable("ens_avg3d", "f", ("nvars3d", "nlev", "ncol"))
                v_ens_stddev3d = nc_sumfile.create_variable("ens_stddev3d", "f", ("nvars3d", "nlev", "ncol"))
                v_ens_avg2d = nc_sumfile.create_variable("ens_avg2d", "f", ("nvars2d", "ncol"))
                v_ens_stddev2d = nc_sumfile.create_variable("ens_stddev2d", "f", ("nvars2d", "ncol"))
            else:
                v_ens_avg3d = nc_sumfile.create_variable("ens_avg3d", "f", ("nvars3d", "nlev", "nlat", "nlon"))
                v_ens_stddev3d = nc_sumfile.create_variable("ens_stddev3d", "f", ("nvars3d", "nlev", "nlat", "nlon"))
                v_ens_avg2d = nc_sumfile.create_variable("ens_avg2d", "f", ("nvars2d", "nlat", "nlon"))
                v_ens_stddev2d = nc_sumfile.create_variable("ens_stddev2d", "f", ("nvars2d", "nlat", "nlon"))

            v_RMSZ = nc_sumfile.create_variable("RMSZ", "f", ("nvars", "ens_size"))
        v_gm = nc_sumfile.create_variable("global_mean", "f", ("nvars", "ens_size"))
        v_loadings_gm = nc_sumfile.create_variable("loadings_gm", "f", ("nvars", "nvars"))
        v_mu_gm = nc_sumfile.create_variable("mu_gm", "f", ("nvars",))
        v_sigma_gm = nc_sumfile.create_variable("sigma_gm", "f", ("nvars",))
        v_sigma_scores_gm = nc_sumfile.create_variable("sigma_scores_gm", "f", ("nvars",))

        # Assign vars, var3d and var2d
        if verbose == True:
            print "Assigning vars, var3d, and var2d ....."

        eq_all_var_names = []
        eq_d3_var_names = []
        eq_d2_var_names = []

        l_eq = len(all_var_names)
        for i in range(l_eq):
            tt = list(all_var_names[i])
            l_tt = len(tt)
            if l_tt < str_size:
                extra = list(" ") * (str_size - l_tt)
                tt.extend(extra)
            eq_all_var_names.append(tt)

        l_eq = len(d3_var_names)
        for i in range(l_eq):
            tt = list(d3_var_names[i])
            l_tt = len(tt)
            if l_tt < str_size:
                extra = list(" ") * (str_size - l_tt)
                tt.extend(extra)
            eq_d3_var_names.append(tt)

        l_eq = len(d2_var_names)
        for i in range(l_eq):
            tt = list(d2_var_names[i])
            l_tt = len(tt)
            if l_tt < str_size:
                extra = list(" ") * (str_size - l_tt)
                tt.extend(extra)
            eq_d2_var_names.append(tt)

        v_vars[:] = eq_all_var_names[:]
        v_var3d[:] = eq_d3_var_names[:]
        v_var2d[:] = eq_d2_var_names[:]

        # Time-invarient metadata
        if verbose == True:
            print "Assigning time invariant metadata ....."
        lev_data = vars_dict["lev"]
        v_lev = lev_data

        # Form ensembles, each missing one member; compute RMSZs and global means
        # for each variable, we also do max norm also (currently done in pyStats)
    tslice = opts_dict["tslice"]

    if not opts_dict["cumul"]:
        # Partition the var list
        var3_list_loc = me.partition(d3_var_names, func=EqualStride(), involved=True)
        var2_list_loc = me.partition(d2_var_names, func=EqualStride(), involved=True)
    else:
        var3_list_loc = d3_var_names
        var2_list_loc = d2_var_names

    # Calculate global means #
    if verbose == True:
        print "Calculating global means ....."
    if not opts_dict["cumul"]:
        gm3d, gm2d = pyEnsLib.generate_global_mean_for_summary(
            o_files, var3_list_loc, var2_list_loc, is_SE, False, opts_dict
        )
    if verbose == True:
        print "Finish calculating global means ....."

    # Calculate RMSZ scores
    if verbose == True:
        print "Calculating RMSZ scores ....."
    if (not opts_dict["gmonly"]) | (opts_dict["cumul"]):
        zscore3d, zscore2d, ens_avg3d, ens_stddev3d, ens_avg2d, ens_stddev2d, temp1, temp2 = pyEnsLib.calc_rmsz(
            o_files, var3_list_loc, var2_list_loc, is_SE, opts_dict
        )

    # Calculate max norm ensemble
    if opts_dict["maxnorm"]:
        if verbose == True:
            print "Calculating max norm of ensembles ....."
        pyEnsLib.calculate_maxnormens(opts_dict, var3_list_loc)
        pyEnsLib.calculate_maxnormens(opts_dict, var2_list_loc)

    if opts_dict["mpi_enable"] & (not opts_dict["popens"]):

        if not opts_dict["cumul"]:
            # Gather the 3d variable results from all processors to the master processor
            slice_index = get_stride_list(len(d3_var_names), me)

            # Gather global means 3d results
            gm3d = gather_npArray(gm3d, me, slice_index, (len(d3_var_names), len(o_files)))

            if not opts_dict["gmonly"]:
                # Gather zscore3d results
                zscore3d = gather_npArray(zscore3d, me, slice_index, (len(d3_var_names), len(o_files)))

                # Gather ens_avg3d and ens_stddev3d results
                shape_tuple3d = get_shape(ens_avg3d.shape, len(d3_var_names), me.get_rank())
                ens_avg3d = gather_npArray(ens_avg3d, me, slice_index, shape_tuple3d)
                ens_stddev3d = gather_npArray(ens_stddev3d, me, slice_index, shape_tuple3d)

                # Gather 2d variable results from all processors to the master processor
            slice_index = get_stride_list(len(d2_var_names), me)

            # Gather global means 2d results
            gm2d = gather_npArray(gm2d, me, slice_index, (len(d2_var_names), len(o_files)))

            if not opts_dict["gmonly"]:
                # Gather zscore2d results
                zscore2d = gather_npArray(zscore2d, me, slice_index, (len(d2_var_names), len(o_files)))

                # Gather ens_avg3d and ens_stddev2d results
                shape_tuple2d = get_shape(ens_avg2d.shape, len(d2_var_names), me.get_rank())
                ens_avg2d = gather_npArray(ens_avg2d, me, slice_index, shape_tuple2d)
                ens_stddev2d = gather_npArray(ens_stddev2d, me, slice_index, shape_tuple2d)

        else:
            gmall = np.concatenate((temp1, temp2), axis=0)
            gmall = pyEnsLib.gather_npArray_pop(gmall, me, (me.get_size(), len(d3_var_names) + len(d2_var_names)))
    # Assign to file:
    if me.get_rank() == 0 | opts_dict["popens"]:
        if not opts_dict["cumul"]:
            gmall = np.concatenate((gm3d, gm2d), axis=0)
            if not opts_dict["gmonly"]:
                Zscoreall = np.concatenate((zscore3d, zscore2d), axis=0)
                v_RMSZ[:, :] = Zscoreall[:, :]
            if not opts_dict["gmonly"]:
                if is_SE == True:
                    v_ens_avg3d[:, :, :] = ens_avg3d[:, :, :]
                    v_ens_stddev3d[:, :, :] = ens_stddev3d[:, :, :]
                    v_ens_avg2d[:, :] = ens_avg2d[:, :]
                    v_ens_stddev2d[:, :] = ens_stddev2d[:, :]
                else:
                    v_ens_avg3d[:, :, :, :] = ens_avg3d[:, :, :, :]
                    v_ens_stddev3d[:, :, :, :] = ens_stddev3d[:, :, :, :]
                    v_ens_avg2d[:, :, :] = ens_avg2d[:, :, :]
                    v_ens_stddev2d[:, :, :] = ens_stddev2d[:, :, :]
        else:
            gmall_temp = np.transpose(gmall[:, :])
            gmall = gmall_temp
        mu_gm, sigma_gm, standardized_global_mean, loadings_gm, scores_gm = pyEnsLib.pre_PCA(gmall)
        v_gm[:, :] = gmall[:, :]
        v_mu_gm[:] = mu_gm[:]
        v_sigma_gm[:] = sigma_gm[:].astype(np.float32)
        v_loadings_gm[:, :] = loadings_gm[:, :]
        v_sigma_scores_gm[:] = scores_gm[:]

        print "All Done"
Example #9
0
def main(argv):

    # Get command line stuff and store in a dictionary
    s = 'verbose sumfile= indir= timeslice= nPC= sigMul= minPCFail= minRunFail= numRunFile= printVarTest'
    optkeys = s.split()
    try:
        opts, args = getopt.getopt(argv, "h", optkeys)
    except getopt.GetoptError:
        pyEnsLib.CECT_usage()
        sys.exit(2)

    # Set the default value for options
    opts_dict = {}
    opts_dict['timeslice'] = 1
    opts_dict['nPC'] = 50
    opts_dict['sigMul'] = 2
    opts_dict['verbose'] = False
    opts_dict['minPCFail'] = 3
    opts_dict['minRunFail'] = 2
    opts_dict['numRunFile'] = 3
    opts_dict['printVarTest'] = False
    # Call utility library getopt_parseconfig to parse the option keys
    # and save to the dictionary
    caller = 'CECT'
    gmonly = False
    opts_dict = pyEnsLib.getopt_parseconfig(opts, optkeys, caller, opts_dict)

    # Print out timestamp, input ensemble file and new run directory
    dt = datetime.now()
    verbose = opts_dict['verbose']
    print '--------pyCECT--------'
    print ' '
    print dt.strftime("%A, %d. %B %Y %I:%M%p")
    print ' '
    print 'Ensemble summary file = ' + opts_dict['sumfile']
    print ' '
    print 'Cam output directory = ' + opts_dict['indir']
    print ' '
    print ' '

    # Open all input files
    ifiles = []
    in_files_temp = os.listdir(opts_dict['indir'])
    in_files = sorted(in_files_temp)
    in_files_random = pyEnsLib.Random_pickup(in_files, opts_dict)
    for frun_file in in_files_random:
        if (os.path.isfile(opts_dict['indir'] + '/' + frun_file)):
            ifiles.append(
                Nio.open_file(opts_dict['indir'] + '/' + frun_file, "r"))
        else:
            print "COULD NOT LOCATE FILE " + opts_dict[
                'indir'] + frun_file + " EXISTING"
            sys.exit()

    # Read all variables from the ensemble summary file
    ens_var_name, ens_avg, ens_stddev, ens_rmsz, ens_gm, num_3d, mu_gm, sigma_gm, loadings_gm, sigma_scores_gm = pyEnsLib.read_ensemble_summary(
        opts_dict['sumfile'])

    if len(ens_rmsz) == 0:
        gmonly = True
    # Add ensemble rmsz and global mean to the dictionary "variables"
    variables = {}
    if not gmonly:
        for k, v in ens_rmsz.iteritems():
            pyEnsLib.addvariables(variables, k, 'zscoreRange', v)

    for k, v in ens_gm.iteritems():
        pyEnsLib.addvariables(variables, k, 'gmRange', v)

    # Get 3d variable name list and 2d variable name list seperately
    var_name3d = []
    var_name2d = []
    for vcount, v in enumerate(ens_var_name):
        if vcount < num_3d:
            var_name3d.append(v)
        else:
            var_name2d.append(v)

    # Get ncol and nlev value
    npts3d, npts2d, is_SE = pyEnsLib.get_ncol_nlev(ifiles[0])

    # Compare the new run and the ensemble summary file to get rmsz score
    results = {}
    countzscore = np.zeros(len(ifiles), dtype=np.int32)
    countgm = np.zeros(len(ifiles), dtype=np.int32)
    if not gmonly:
        for fcount, fid in enumerate(ifiles):
            otimeSeries = fid.variables
            for var_name in ens_var_name:
                orig = otimeSeries[var_name]
                Zscore, has_zscore = pyEnsLib.calculate_raw_score(
                    var_name, orig[opts_dict['timeslice']], npts3d, npts2d,
                    ens_avg, ens_stddev, is_SE)
                if has_zscore:
                    # Add the new run rmsz zscore to the dictionary "results"
                    pyEnsLib.addresults(results, 'zscore', Zscore, var_name,
                                        'f' + str(fcount))

        # Evaluate the new run rmsz score if is in the range of the ensemble summary rmsz zscore range
        for fcount, fid in enumerate(ifiles):
            countzscore[fcount] = pyEnsLib.evaluatestatus(
                'zscore', 'zscoreRange', variables, 'ens', results,
                'f' + str(fcount))

    # Calculate the new run global mean
    mean3d, mean2d = pyEnsLib.generate_global_mean_for_summary(
        ifiles, var_name3d, var_name2d, opts_dict['timeslice'], is_SE, verbose)
    means = np.concatenate((mean3d, mean2d), axis=0)

    # Add the new run global mean to the dictionary "results"
    for i in range(means.shape[1]):
        for j in range(means.shape[0]):
            pyEnsLib.addresults(results, 'means', means[j][i], ens_var_name[j],
                                'f' + str(i))

    # Evaluate the new run global mean if it is in the range of the ensemble summary global mean range
    for fcount, fid in enumerate(ifiles):
        countgm[fcount] = pyEnsLib.evaluatestatus('means', 'gmRange',
                                                  variables, 'gm', results,
                                                  'f' + str(fcount))

    # Calculate the PCA scores of the new run
    new_scores = pyEnsLib.standardized(means, mu_gm, sigma_gm, loadings_gm)
    pyEnsLib.comparePCAscores(ifiles, new_scores, sigma_scores_gm, opts_dict)

    # Print out
    if opts_dict['printVarTest']:
        print '*********************************************** '
        print 'Variable-based testing (for reference only - not used to determine pass/fail)'
        print '*********************************************** '
        for fcount, fid in enumerate(ifiles):
            print ' '
            print 'Run ' + str(fcount + 1) + ":"
            print ' '
            if not gmonly:
                print '***' + str(countzscore[fcount]), " of " + str(
                    len(ens_var_name)
                ) + ' variables are outside of ensemble RMSZ distribution***'
                pyEnsLib.printsummary(results, 'ens', 'zscore', 'zscoreRange',
                                      (fcount), variables, 'RMSZ')
                print ' '
            print '***' + str(countgm[fcount]), " of " + str(
                len(ens_var_name)
            ) + ' variables are outside of ensemble global mean distribution***'
            pyEnsLib.printsummary(results, 'gm', 'means', 'gmRange', fcount,
                                  variables, 'global mean')
            print ' '
            print '----------------------------------------------------------------------------'
Example #10
0
def main(argv):


    # Get command line stuff and store in a dictionary
    s = 'tag= compset= esize= tslice= res= sumfile= indir= sumfiledir= mach= verbose jsonfile= mpi_enable maxnorm gmonly popens cumul regx= startMon= endMon= fIndex='
    optkeys = s.split()
    try: 
        opts, args = getopt.getopt(argv, "h", optkeys)
    except getopt.GetoptError:
        pyEnsLib.EnsSum_usage()
        sys.exit(2)

    # Put command line options in a dictionary - also set defaults
    opts_dict={}
    
    # Defaults
    opts_dict['tag'] = 'cesm2_0_beta08'
    opts_dict['compset'] = 'F2000'
    opts_dict['mach'] = 'cheyenne'
    opts_dict['esize'] = 350
    opts_dict['tslice'] = 1
    opts_dict['res'] = 'f19_f19'
    opts_dict['sumfile'] = 'ens.summary.nc'
    opts_dict['indir'] = './'
    opts_dict['sumfiledir'] = './'
    opts_dict['jsonfile'] = 'exclude_empty.json'
    opts_dict['verbose'] = False
    opts_dict['mpi_enable'] = False
    opts_dict['maxnorm'] = False
    opts_dict['gmonly'] = True
    opts_dict['popens'] = False
    opts_dict['cumul'] = False
    opts_dict['regx'] = 'test'
    opts_dict['startMon'] = 1
    opts_dict['endMon'] = 1
    opts_dict['fIndex'] = 151

    # This creates the dictionary of input arguments 
    opts_dict = pyEnsLib.getopt_parseconfig(opts,optkeys,'ES',opts_dict)

    verbose = opts_dict['verbose']

    st = opts_dict['esize']
    esize = int(st)


    if not (opts_dict['tag'] and opts_dict['compset'] and opts_dict['mach'] or opts_dict['res']):
       print 'Please specify --tag, --compset, --mach and --res options'
       sys.exit()
       
    # Now find file names in indir
    input_dir = opts_dict['indir']
    # The var list that will be excluded
    ex_varlist=[]
    inc_varlist=[]

    # Create a mpi simplecomm object
    if opts_dict['mpi_enable']:
        me=simplecomm.create_comm()
    else:
        me=simplecomm.create_comm(not opts_dict['mpi_enable'])
    
    if me.get_rank() == 0:
       print 'Running pyEnsSum!'

    if me.get_rank() ==0 and (verbose == True):
        print opts_dict
        print 'Ensemble size for summary = ', esize

    exclude=False
    if me.get_rank() == 0:
        if opts_dict['jsonfile']:
            inc_varlist=[]
            # Read in the excluded or included var list
            ex_varlist,exclude=pyEnsLib.read_jsonlist(opts_dict['jsonfile'],'ES')
            if exclude == False:
               inc_varlist=ex_varlist
               ex_varlist=[]
            # Read in the included var list
            #inc_varlist=pyEnsLib.read_jsonlist(opts_dict['jsonfile'],'ES')

    # Broadcast the excluded var list to each processor
    #if opts_dict['mpi_enable']:
    #   ex_varlist=me.partition(ex_varlist,func=Duplicate(),involved=True)
    # Broadcast the excluded var list to each processor
    if opts_dict['mpi_enable']:
        exclude=me.partition(exclude,func=Duplicate(),involved=True)
        if exclude:
           ex_varlist=me.partition(ex_varlist,func=Duplicate(),involved=True)
        else:
           inc_varlist=me.partition(inc_varlist,func=Duplicate(),involved=True)
        
    in_files=[]
    if(os.path.exists(input_dir)):
        # Get the list of files
        in_files_temp = os.listdir(input_dir)
        in_files=sorted(in_files_temp)

        # Make sure we have enough
        num_files = len(in_files)
        if me.get_rank()==0 and (verbose == True):
            print 'Number of files in input directory = ', num_files
        if (num_files < esize):
            if me.get_rank()==0 and (verbose == True):
               print 'Number of files in input directory (',num_files,\
                ') is less than specified ensemble size of ', esize
            sys.exit(2)
        if (num_files > esize):
            if me.get_rank()==0 and (verbose == True):
               print 'NOTE: Number of files in ', input_dir, \
                'is greater than specified ensemble size of ', esize ,\
                '\nwill just use the first ',  esize, 'files'
    else:
        if me.get_rank()==0:
           print 'Input directory: ',input_dir,' not found'
        sys.exit(2)

    if opts_dict['cumul']:
        if opts_dict['regx']:
           in_files_list=get_cumul_filelist(opts_dict,opts_dict['indir'],opts_dict['regx'])
        in_files=me.partition(in_files_list,func=EqualLength(),involved=True)
        if me.get_rank()==0 and (verbose == True):
           print 'in_files=',in_files

    # Open the files in the input directory
    o_files=[]
    if me.get_rank() == 0 and opts_dict['verbose']:
       print 'Input files are: '
       print "\n".join(in_files)
       #for i in in_files:
       #    print "in_files =",i
    for onefile in in_files[0:esize]:
        if (os.path.isfile(input_dir+'/' + onefile)):
            o_files.append(Nio.open_file(input_dir+'/' + onefile,"r"))
        else:
            if me.get_rank()==0:
               print "COULD NOT LOCATE FILE "+ input_dir + onefile + "! EXITING...."
            sys.exit() 

    # Store dimensions of the input fields
    if me.get_rank()==0 and (verbose == True):
        print "Getting spatial dimensions"
    nlev = -1
    nilev = -1
    ncol = -1
    nlat = -1
    nlon = -1
    lonkey=''
    latkey=''
    # Look at first file and get dims
    input_dims = o_files[0].dimensions
    ndims = len(input_dims)

    for key in input_dims:
        if key == "lev":
            nlev = input_dims["lev"]
        elif key == "ilev":
            nilev = input_dims["ilev"]
        elif key == "ncol":
            ncol = input_dims["ncol"]
        elif (key == "nlon") or (key =="lon"):
            nlon = input_dims[key]
            lonkey=key
        elif (key == "nlat") or (key == "lat"):
            nlat = input_dims[key]
            latkey=key
        
    if (nlev == -1) : 
        if me.get_rank()==0: 
           print "COULD NOT LOCATE valid dimension lev => EXITING...."
        sys.exit() 

    if (( ncol == -1) and ((nlat == -1) or (nlon == -1))):
        if me.get_rank()==0: 
           print "Need either lat/lon or ncol  => EXITING...."
        sys.exit()            

    # Check if this is SE or FV data
    if (ncol != -1):
        is_SE = True 
    else:
        is_SE = False    

    # Make sure all files have the same dimensions
    if me.get_rank()==0 and (verbose == True):
        print "Checking dimensions across files...."
        print 'lev = ', nlev
        if (is_SE == True):
            print 'ncol = ', ncol
        else:
            print 'nlat = ', nlat
            print 'nlon = ', nlon

    for count, this_file in enumerate(o_files):
        input_dims = this_file.dimensions     
        if (is_SE == True):
            if ( nlev != int(input_dims["lev"]) or ( ncol != int(input_dims["ncol"]))):
                if me.get_rank() == 0:
                   print "Dimension mismatch between ", in_files[0], 'and', in_files[0], '!!!'
                sys.exit() 
        else:
            if ( nlev != int(input_dims["lev"]) or ( nlat != int(input_dims[latkey]))\
                  or ( nlon != int(input_dims[lonkey]))): 
                if me.get_rank() == 0:
                   print "Dimension mismatch between ", in_files[0], 'and', in_files[0], '!!!'
                sys.exit() 

    # Get 2d vars, 3d vars and all vars (For now include all variables) 
    vars_dict_all = o_files[0].variables
    # Remove the excluded variables (specified in json file) from variable dictionary
    #print len(vars_dict_all)
    if exclude:
        vars_dict=vars_dict_all
        for i in ex_varlist:
          if i in vars_dict:
            del vars_dict[i]
    #Given an included var list, remove all float var that are not on the list
    else:
        vars_dict=vars_dict_all.copy()
        for k,v in vars_dict_all.iteritems():
           if (k not in inc_varlist) and (vars_dict_all[k].typecode()=='f'):
            #print vars_dict_all[k].typecode()
            #print k
            del vars_dict[k]
 
    num_vars = len(vars_dict)
    #print num_vars
    #if me.get_rank() == 0:
    #   for k,v in vars_dict.iteritems():
    #       print 'vars_dict',k,vars_dict[k].typecode()

    str_size = 0

    d2_var_names = []
    d3_var_names = []
    num_2d = 0
    num_3d = 0

    # Which are 2d, which are 3d and max str_size 
    for k,v in vars_dict.iteritems():  
        var = k
        vd = v.dimensions # all the variable's dimensions (names)
        vr = v.rank # num dimension
        vs = v.shape # dim values
        is_2d = False
        is_3d = False
        if (is_SE == True): # (time, lev, ncol) or (time, ncol)
            if ((vr == 2) and (vs[1] == ncol)):  
                is_2d = True 
                num_2d += 1
            elif ((vr == 3) and (vs[2] == ncol and vs[1] == nlev )):  
                is_3d = True 
                num_3d += 1
        else: # (time, lev, nlon, nlon) or (time, nlat, nlon)
            if ((vr == 3) and (vs[1] == nlat and vs[2] == nlon)):  
                is_2d = True 
                num_2d += 1
            elif ((vr == 4) and (vs[2] == nlat and vs[3] == nlon and (vs[1] == nlev or vs[1]==nilev ))):  
                is_3d = True 
                num_3d += 1
                    
        if (is_3d == True) :
            str_size = max(str_size, len(k))
            d3_var_names.append(k)
        elif  (is_2d == True):    
            str_size = max(str_size, len(k))
            d2_var_names.append(k)
        #else:
        #    print 'var=',k

    if me.get_rank() == 0 and (verbose == True):
        print 'Number of variables found:  ', num_3d+num_2d
        print '3D variables: '+str(num_3d)+', 2D variables: '+str(num_2d)

    # Now sort these and combine (this sorts caps first, then lower case - 
    # which is what we want)
    d2_var_names.sort()       
    d3_var_names.sort()

    if esize<num_2d+num_3d:
       if me.get_rank()==0:
          print "************************************************************************************************************************************"
          print "  Error: the total number of 3D and 2D variables "+str(num_2d+num_3d)+" is larger than the number of ensemble files "+str(esize)
          print "  Cannot generate ensemble summary file, please remove more variables from your included variable list,"
          print "  or add more varaibles in your excluded variable list!!!"
          print "************************************************************************************************************************************"
       sys.exit()
    # All vars is 3d vars first (sorted), the 2d vars
    all_var_names = list(d3_var_names)
    all_var_names += d2_var_names
    n_all_var_names = len(all_var_names)

    #if me.get_rank() == 0 and (verbose == True):
    #    print 'num vars = ', n_all_var_names, '(3d = ', num_3d, ' and 2d = ', num_2d, ")"

    # Create new summary ensemble file
    this_sumfile = opts_dict["sumfile"]

    if me.get_rank() == 0 and (verbose == True):
        print "Creating ", this_sumfile, "  ..."
    if(me.get_rank() ==0 | opts_dict["popens"]):
        if os.path.exists(this_sumfile):
            os.unlink(this_sumfile)

        opt = Nio.options()
        opt.PreFill = False
        opt.Format = 'NetCDF4Classic'
        nc_sumfile = Nio.open_file(this_sumfile, 'w', options=opt)

        # Set dimensions
        if me.get_rank() == 0 and (verbose == True):
            print "Setting dimensions ....."
        if (is_SE == True):
            nc_sumfile.create_dimension('ncol', ncol)
        else:
            nc_sumfile.create_dimension('nlat', nlat)
            nc_sumfile.create_dimension('nlon', nlon)
        nc_sumfile.create_dimension('nlev', nlev)
        nc_sumfile.create_dimension('ens_size', esize)
        nc_sumfile.create_dimension('nvars', num_3d + num_2d)
        nc_sumfile.create_dimension('nvars3d', num_3d)
        nc_sumfile.create_dimension('nvars2d', num_2d)
        nc_sumfile.create_dimension('str_size', str_size)

        # Set global attributes
        now = time.strftime("%c")
        if me.get_rank() == 0 and (verbose == True):
            print "Setting global attributes ....."
        setattr(nc_sumfile, 'creation_date',now)
        setattr(nc_sumfile, 'title', 'CAM verification ensemble summary file')
        setattr(nc_sumfile, 'tag', opts_dict["tag"]) 
        setattr(nc_sumfile, 'compset', opts_dict["compset"]) 
        setattr(nc_sumfile, 'resolution', opts_dict["res"]) 
        setattr(nc_sumfile, 'machine', opts_dict["mach"]) 

        # Create variables
        if me.get_rank() == 0 and (verbose == True):
            print "Creating variables ....."
        v_lev = nc_sumfile.create_variable("lev", 'f', ('nlev',))
        v_vars = nc_sumfile.create_variable("vars", 'S1', ('nvars', 'str_size'))
        v_var3d = nc_sumfile.create_variable("var3d", 'S1', ('nvars3d', 'str_size'))
        v_var2d = nc_sumfile.create_variable("var2d", 'S1', ('nvars2d', 'str_size'))
        if not opts_dict['gmonly']:
            if (is_SE == True):
                v_ens_avg3d = nc_sumfile.create_variable("ens_avg3d", 'f', ('nvars3d', 'nlev', 'ncol'))
                v_ens_stddev3d = nc_sumfile.create_variable("ens_stddev3d", 'f', ('nvars3d', 'nlev', 'ncol'))
                v_ens_avg2d = nc_sumfile.create_variable("ens_avg2d", 'f', ('nvars2d', 'ncol'))
                v_ens_stddev2d = nc_sumfile.create_variable("ens_stddev2d", 'f', ('nvars2d', 'ncol'))
            else:
                v_ens_avg3d = nc_sumfile.create_variable("ens_avg3d", 'f', ('nvars3d', 'nlev', 'nlat', 'nlon'))
                v_ens_stddev3d = nc_sumfile.create_variable("ens_stddev3d", 'f', ('nvars3d', 'nlev', 'nlat', 'nlon'))
                v_ens_avg2d = nc_sumfile.create_variable("ens_avg2d", 'f', ('nvars2d', 'nlat', 'nlon'))
                v_ens_stddev2d = nc_sumfile.create_variable("ens_stddev2d", 'f', ('nvars2d', 'nlat', 'nlon'))

            v_RMSZ = nc_sumfile.create_variable("RMSZ", 'f', ('nvars', 'ens_size'))
        v_gm = nc_sumfile.create_variable("global_mean", 'f', ('nvars', 'ens_size'))
        v_standardized_gm=nc_sumfile.create_variable("standardized_gm",'f',('nvars','ens_size'))
        v_loadings_gm = nc_sumfile.create_variable('loadings_gm','f',('nvars','nvars'))
        v_mu_gm = nc_sumfile.create_variable('mu_gm','f',('nvars',))
        v_sigma_gm = nc_sumfile.create_variable('sigma_gm','f',('nvars',))
        v_sigma_scores_gm = nc_sumfile.create_variable('sigma_scores_gm','f',('nvars',))


        # Assign vars, var3d and var2d
        if me.get_rank() == 0 and (verbose == True):
            print "Assigning vars, var3d, and var2d ....."

        eq_all_var_names =[]
        eq_d3_var_names = []
        eq_d2_var_names = []

        l_eq = len(all_var_names)
        for i in range(l_eq):
            tt = list(all_var_names[i])
            l_tt = len(tt)
            if (l_tt < str_size):
                extra = list(' ')*(str_size - l_tt)
                tt.extend(extra)
            eq_all_var_names.append(tt)

        l_eq = len(d3_var_names)
        for i in range(l_eq):
            tt = list(d3_var_names[i])
            l_tt = len(tt)
            if (l_tt < str_size):
                extra = list(' ')*(str_size - l_tt)
                tt.extend(extra)
            eq_d3_var_names.append(tt)

        l_eq = len(d2_var_names)
        for i in range(l_eq):
            tt = list(d2_var_names[i])
            l_tt = len(tt)
            if (l_tt < str_size):
                extra = list(' ')*(str_size - l_tt)
                tt.extend(extra)
            eq_d2_var_names.append(tt)

        v_vars[:] = eq_all_var_names[:]
        v_var3d[:] = eq_d3_var_names[:]
        v_var2d[:] = eq_d2_var_names[:]

        # Time-invarient metadata
        if me.get_rank() == 0 and (verbose == True):
            print "Assigning time invariant metadata ....."
        lev_data = vars_dict["lev"]
        v_lev = lev_data

    # Form ensembles, each missing one member; compute RMSZs and global means
    #for each variable, we also do max norm also (currently done in pyStats)
    tslice = opts_dict['tslice']

    if not opts_dict['cumul']:
        # Partition the var list
        
        var3_list_loc=me.partition(d3_var_names,func=EqualStride(),involved=True)
        var2_list_loc=me.partition(d2_var_names,func=EqualStride(),involved=True)
    else:
        var3_list_loc=d3_var_names
        var2_list_loc=d2_var_names

    # Calculate global means #
    if me.get_rank() == 0 and (verbose == True):
        print "Calculating global means ....."
    if not opts_dict['cumul']:
        gm3d,gm2d,var_list = pyEnsLib.generate_global_mean_for_summary(o_files,var3_list_loc,var2_list_loc , is_SE, False,opts_dict)
    if me.get_rank() == 0 and (verbose == True):
        print "Finish calculating global means ....."

    # Calculate RMSZ scores  
    if (not opts_dict['gmonly']) | (opts_dict['cumul']):
        if me.get_rank() == 0 and (verbose == True):
            print "Calculating RMSZ scores ....."
        zscore3d,zscore2d,ens_avg3d,ens_stddev3d,ens_avg2d,ens_stddev2d,temp1,temp2=pyEnsLib.calc_rmsz(o_files,var3_list_loc,var2_list_loc,is_SE,opts_dict)    

    # Calculate max norm ensemble
    if opts_dict['maxnorm']:
        if me.get_rank() == 0 and (verbose == True):
            print "Calculating max norm of ensembles ....."
        pyEnsLib.calculate_maxnormens(opts_dict,var3_list_loc)
        pyEnsLib.calculate_maxnormens(opts_dict,var2_list_loc)

    if opts_dict['mpi_enable'] & ( not opts_dict['popens']):

        if not opts_dict['cumul']:
            # Gather the 3d variable results from all processors to the master processor
            slice_index=get_stride_list(len(d3_var_names),me)
         
            # Gather global means 3d results
            gm3d=gather_npArray(gm3d,me,slice_index,(len(d3_var_names),len(o_files)))
            if not opts_dict['gmonly']:
                # Gather zscore3d results
                zscore3d=gather_npArray(zscore3d,me,slice_index,(len(d3_var_names),len(o_files)))

                # Gather ens_avg3d and ens_stddev3d results
                shape_tuple3d=get_shape(ens_avg3d.shape,len(d3_var_names),me.get_rank())
                ens_avg3d=gather_npArray(ens_avg3d,me,slice_index,shape_tuple3d) 
                ens_stddev3d=gather_npArray(ens_stddev3d,me,slice_index,shape_tuple3d) 

            # Gather 2d variable results from all processors to the master processor
            slice_index=get_stride_list(len(d2_var_names),me)

            # Gather global means 2d results
            gm2d=gather_npArray(gm2d,me,slice_index,(len(d2_var_names),len(o_files)))

            var_list=gather_list(var_list,me)

            if not opts_dict['gmonly']:
                # Gather zscore2d results
                zscore2d=gather_npArray(zscore2d,me,slice_index,(len(d2_var_names),len(o_files)))

                # Gather ens_avg3d and ens_stddev2d results
                shape_tuple2d=get_shape(ens_avg2d.shape,len(d2_var_names),me.get_rank())
                ens_avg2d=gather_npArray(ens_avg2d,me,slice_index,shape_tuple2d) 
                ens_stddev2d=gather_npArray(ens_stddev2d,me,slice_index,shape_tuple2d) 

        else:
            gmall=np.concatenate((temp1,temp2),axis=0)
            gmall=pyEnsLib.gather_npArray_pop(gmall,me,(me.get_size(),len(d3_var_names)+len(d2_var_names)))
    # Assign to file:
    if me.get_rank() == 0 | opts_dict['popens'] :
        if not opts_dict['cumul']:
            gmall=np.concatenate((gm3d,gm2d),axis=0)
            if not opts_dict['gmonly']:
                Zscoreall=np.concatenate((zscore3d,zscore2d),axis=0)
                v_RMSZ[:,:]=Zscoreall[:,:]
            if not opts_dict['gmonly']:
                if (is_SE == True):
                    v_ens_avg3d[:,:,:]=ens_avg3d[:,:,:]
                    v_ens_stddev3d[:,:,:]=ens_stddev3d[:,:,:]
                    v_ens_avg2d[:,:]=ens_avg2d[:,:]
                    v_ens_stddev2d[:,:]=ens_stddev2d[:,:]
                else:
                    v_ens_avg3d[:,:,:,:]=ens_avg3d[:,:,:,:]
                    v_ens_stddev3d[:,:,:,:]=ens_stddev3d[:,:,:,:]
                    v_ens_avg2d[:,:,:]=ens_avg2d[:,:,:]
                    v_ens_stddev2d[:,:,:]=ens_stddev2d[:,:,:]
        else:
            gmall_temp=np.transpose(gmall[:,:])
            gmall=gmall_temp
        mu_gm,sigma_gm,standardized_global_mean,loadings_gm,scores_gm=pyEnsLib.pre_PCA(gmall,all_var_names,var_list,me)
        v_gm[:,:]=gmall[:,:]
        v_standardized_gm[:,:]=standardized_global_mean[:,:]
        v_mu_gm[:]=mu_gm[:]
        v_sigma_gm[:]=sigma_gm[:].astype(np.float32)
        v_loadings_gm[:,:]=loadings_gm[:,:]
        v_sigma_scores_gm[:]=scores_gm[:]

        if me.get_rank() == 0:
           print "All Done"
Example #11
0
def main(argv):


    # Get command line stuff and store in a dictionary
    s='verbose sumfile= indir= timeslice= nPC= sigMul= minPCFail= minRunFail= numRunFile= printVarTest'
    optkeys = s.split()
    try:
        opts, args = getopt.getopt(argv,"h",optkeys)
    except getopt.GetoptError:
        pyEnsLib.CECT_usage()
        sys.exit(2)
  
    
    # Set the default value for options
    opts_dict = {}
    opts_dict['timeslice'] = 1
    opts_dict['nPC'] = 50
    opts_dict['sigMul'] = 2
    opts_dict['verbose'] = False
    opts_dict['minPCFail'] = 3
    opts_dict['minRunFail'] = 2
    opts_dict['numRunFile'] = 3
    opts_dict['printVarTest'] = False
    # Call utility library getopt_parseconfig to parse the option keys
    # and save to the dictionary
    caller = 'CECT'
    gmonly = False
    opts_dict = pyEnsLib.getopt_parseconfig(opts,optkeys,caller,opts_dict)

    # Print out timestamp, input ensemble file and new run directory
    dt=datetime.now()
    verbose = opts_dict['verbose']
    print '--------pyCECT--------'
    print ' '
    print dt.strftime("%A, %d. %B %Y %I:%M%p")
    print ' '
    print 'Ensemble summary file = '+opts_dict['sumfile']
    print ' '
    print 'Cam output directory = '+opts_dict['indir']    
    print ' '
    print ' '


    # Open all input files
    ifiles=[]
    in_files_temp=os.listdir(opts_dict['indir'])
    in_files=sorted(in_files_temp)
    in_files_random=pyEnsLib.Random_pickup(in_files,opts_dict)
    for frun_file in in_files_random:
         if (os.path.isfile(opts_dict['indir'] +'/'+ frun_file)):
             ifiles.append(Nio.open_file(opts_dict['indir']+'/'+frun_file,"r"))
         else:
             print "COULD NOT LOCATE FILE " +opts_dict['indir']+frun_file+" EXISTING"
             sys.exit()
    

 
    # Read all variables from the ensemble summary file
    ens_var_name,ens_avg,ens_stddev,ens_rmsz,ens_gm,num_3d,mu_gm,sigma_gm,loadings_gm,sigma_scores_gm=pyEnsLib.read_ensemble_summary(opts_dict['sumfile']) 

    if len(ens_rmsz) == 0:
        gmonly = True
    # Add ensemble rmsz and global mean to the dictionary "variables"
    variables={}
    if not gmonly:
	for k,v in ens_rmsz.iteritems():
	    pyEnsLib.addvariables(variables,k,'zscoreRange',v)

    for k,v in ens_gm.iteritems():
        pyEnsLib.addvariables(variables,k,'gmRange',v)

    # Get 3d variable name list and 2d variable name list seperately
    var_name3d=[]
    var_name2d=[]
    for vcount,v in enumerate(ens_var_name):
      if vcount < num_3d:
        var_name3d.append(v)
      else:
        var_name2d.append(v)

    # Get ncol and nlev value
    npts3d,npts2d,is_SE=pyEnsLib.get_ncol_nlev(ifiles[0])
 
    # Compare the new run and the ensemble summary file to get rmsz score
    results={}
    countzscore=np.zeros(len(ifiles),dtype=np.int32)
    countgm=np.zeros(len(ifiles),dtype=np.int32)
    if not gmonly:
	for fcount,fid in enumerate(ifiles): 
	    otimeSeries = fid.variables 
	    for var_name in ens_var_name: 
		orig=otimeSeries[var_name]
		Zscore,has_zscore=pyEnsLib.calculate_raw_score(var_name,orig[opts_dict['timeslice']],npts3d,npts2d,ens_avg,ens_stddev,is_SE) 
		if has_zscore:
		    # Add the new run rmsz zscore to the dictionary "results"
		    pyEnsLib.addresults(results,'zscore',Zscore,var_name,'f'+str(fcount))


	# Evaluate the new run rmsz score if is in the range of the ensemble summary rmsz zscore range
	for fcount,fid in enumerate(ifiles):
	    countzscore[fcount]=pyEnsLib.evaluatestatus('zscore','zscoreRange',variables,'ens',results,'f'+str(fcount))

    # Calculate the new run global mean
    mean3d,mean2d=pyEnsLib.generate_global_mean_for_summary(ifiles,var_name3d,var_name2d,opts_dict['timeslice'],is_SE,verbose)
    means=np.concatenate((mean3d,mean2d),axis=0)

    # Add the new run global mean to the dictionary "results"
    for i in range(means.shape[1]):
        for j in range(means.shape[0]):
	    pyEnsLib.addresults(results,'means',means[j][i],ens_var_name[j],'f'+str(i))

    # Evaluate the new run global mean if it is in the range of the ensemble summary global mean range
    for fcount,fid in enumerate(ifiles):
        countgm[fcount]=pyEnsLib.evaluatestatus('means','gmRange',variables,'gm',results,'f'+str(fcount))
  
    # Calculate the PCA scores of the new run
    new_scores=pyEnsLib.standardized(means,mu_gm,sigma_gm,loadings_gm)
    pyEnsLib.comparePCAscores(ifiles,new_scores,sigma_scores_gm,opts_dict)

    # Print out 
    if opts_dict['printVarTest']:
	print '*********************************************** '
	print 'Variable-based testing (for reference only - not used to determine pass/fail)'
	print '*********************************************** '
	for fcount,fid in enumerate(ifiles):
	    print ' '
	    print 'Run '+str(fcount+1)+":"
	    print ' '
	    if not gmonly:
		print '***'+str(countzscore[fcount])," of "+str(len(ens_var_name))+' variables are outside of ensemble RMSZ distribution***'
		pyEnsLib.printsummary(results,'ens','zscore','zscoreRange',(fcount),variables,'RMSZ')
		print ' '
	    print '***'+str(countgm[fcount])," of "+str(len(ens_var_name))+' variables are outside of ensemble global mean distribution***'
	    pyEnsLib.printsummary(results,'gm','means','gmRange',fcount,variables,'global mean')
	    print ' '
	    print '----------------------------------------------------------------------------'
Example #12
0
def main(argv):

    # Get command line stuff and store in a dictionary
    s = """verbose sumfile= indir= input_globs= tslice= nPC= sigMul= 
         minPCFail= minRunFail= numRunFile= printVarTest popens 
         jsonfile= mpi_enable nbin= minrange= maxrange= outfile= 
         casejson= npick= pepsi_gm pop_tol= web_enabled
         pop_threshold= prn_std_mean fIndex= lev= eet= json_case= """
    optkeys = s.split()
    try:
        opts, args = getopt.getopt(argv, "h", optkeys)
    except getopt.GetoptError:
        pyEnsLib.CECT_usage()
        sys.exit(2)

    # Set the default value for options
    opts_dict = {}
    opts_dict['input_globs'] = ''
    opts_dict['indir'] = ''
    opts_dict['tslice'] = 1
    opts_dict['nPC'] = 50
    opts_dict['sigMul'] = 2
    opts_dict['verbose'] = False
    opts_dict['minPCFail'] = 3
    opts_dict['minRunFail'] = 2
    opts_dict['numRunFile'] = 3
    opts_dict['printVarTest'] = False
    opts_dict['popens'] = False
    opts_dict['jsonfile'] = ''
    opts_dict['mpi_enable'] = False
    opts_dict['nbin'] = 40
    opts_dict['minrange'] = 0.0
    opts_dict['maxrange'] = 4.0
    opts_dict['outfile'] = 'testcase.result'
    opts_dict['casejson'] = ''
    opts_dict['npick'] = 10
    opts_dict['pepsi_gm'] = False
    opts_dict['test_failure'] = True
    opts_dict['pop_tol'] = 3.0
    opts_dict['pop_threshold'] = 0.90
    opts_dict['prn_std_mean'] = False
    opts_dict['lev'] = 0
    opts_dict['eet'] = 0
    opts_dict['json_case'] = ''
    opts_dict['sumfile'] = ''
    opts_dict['web_enabled'] = False

    # Call utility library getopt_parseconfig to parse the option keys
    # and save to the dictionary
    caller = 'CECT'
    gmonly = False
    opts_dict = pyEnsLib.getopt_parseconfig(opts, optkeys, caller, opts_dict)
    popens = opts_dict['popens']
    #some mods for POP-ECT
    if popens == True:
        opts_dict['tslice'] = 0
        opts_dict['numRunFile'] = 1
        opts_dict['eet'] = 0
        opts_dict['mpi_enable'] = False

        #print opts_dict

    # Create a mpi simplecomm object
    if opts_dict['mpi_enable']:
        me = simplecomm.create_comm()
    else:
        me = simplecomm.create_comm(not opts_dict['mpi_enable'])

    # Print out timestamp, input ensemble file and new run directory
    dt = datetime.now()
    verbose = opts_dict['verbose']
    if me.get_rank() == 0:
        print '--------pyCECT--------'
        print ' '
        print dt.strftime("%A, %d. %B %Y %I:%M%p")
        print ' '
        if not opts_dict['web_enabled']:
            print 'Ensemble summary file = ' + opts_dict['sumfile']
        print ' '
        print 'Testcase file directory = ' + opts_dict['indir']
        print ' '
        print ' '

    # Ensure sensible EET value
    if opts_dict['eet'] and opts_dict['numRunFile'] > opts_dict['eet']:
        pyEnsLib.CECT_usage()
        sys.exit(2)

    ifiles = []
    in_files = []
    # Random pick pop files from not_pick_files list
    if opts_dict['casejson']:
        with open(opts_dict['casejson']) as fin:
            result = json.load(fin)
            in_files_first = result['not_pick_files']
            in_files = random.sample(in_files_first, opts_dict['npick'])
            print 'Testcase files:'
            print '\n'.join(in_files)

    elif opts_dict['json_case']:
        json_file = opts_dict['json_case']
        if (os.path.exists(json_file)):
            fd = open(json_file)
            metainfo = json.load(fd)
            if 'CaseName' in metainfo:
                casename = metainfo['CaseName']
                if (os.path.exists(opts_dict['indir'])):
                    for name in casename:
                        wildname = '*.' + name + '.*'
                        full_glob_str = os.path.join(opts_dict['indir'],
                                                     wildname)
                        glob_file = glob.glob(full_glob_str)
                        in_files.extend(glob_file)
        else:
            print "ERROR: " + opts_dict['json_case'] + " does not exist."
            sys.exit()
        print "in_files=", in_files
    else:
        wildname = '*' + str(opts_dict['input_globs']) + '*'
        # Open all input files
        if (os.path.exists(opts_dict['indir'])):
            full_glob_str = os.path.join(opts_dict['indir'], wildname)
            glob_files = glob.glob(full_glob_str)
            in_files.extend(glob_files)
            num_file = len(in_files)
            if num_file == 0:
                print "ERROR: no matching files for wildcard=" + wildname + " found in specified --indir"
                sys.exit()
            else:
                print "Found " + str(
                    num_file) + " matching files in specified --indir"
            if opts_dict['numRunFile'] > num_file:
                print "ERROR: more files needed (" + str(
                    opts_dict['numRunFile']
                ) + ") than available in the indir (" + str(num_file) + ")."
                sys.exit()
            #in_files_temp=os.listdir(opts_dict['indir'])
    in_files.sort()
    #print in_files

    if popens:
        #Partition the input file list
        in_files_list = me.partition(in_files,
                                     func=EqualStride(),
                                     involved=True)

    else:
        # Random pick non pop files
        in_files_list = pyEnsLib.Random_pickup(in_files, opts_dict)
        #in_files_list=in_files

    for frun_file in in_files_list:
        if frun_file.find(opts_dict['indir']) != -1:
            frun_temp = frun_file
        else:
            frun_temp = opts_dict['indir'] + '/' + frun_file
        if (os.path.isfile(frun_temp)):
            ifiles.append(Nio.open_file(frun_temp, "r"))
        else:
            print "ERROR: COULD NOT LOCATE FILE " + frun_temp
            sys.exit()

    if opts_dict['web_enabled']:
        if len(opts_dict['sumfile']) == 0:
            opts_dict[
                'sumfile'] = '/glade/p/cesmdata/cseg/inputdata/validation/'
        opts_dict['sumfile'], machineid, compiler = pyEnsLib.search_sumfile(
            opts_dict, ifiles)
        if len(machineid) != 0 and len(compiler) != 0:
            print ' '
            print 'Validation file    : machineid = ' + machineid + ', compiler = ' + compiler
            print 'Found summary file : ' + opts_dict['sumfile']
            print ' '
        else:
            print 'Warning: machine and compiler are unknown'

    if popens:

        # Read in the included var list
        if not os.path.exists(opts_dict['jsonfile']):
            print "ERROR: POP-ECT requires the specification of a valid json file via --jsonfile."
            sys.exit()
        Var2d, Var3d = pyEnsLib.read_jsonlist(opts_dict['jsonfile'], 'ESP')
        print ' '
        print 'Z-score tolerance = ' + '{:3.2f}'.format(opts_dict['pop_tol'])
        print 'ZPR = ' + '{:.2%}'.format(opts_dict['pop_threshold'])
        zmall, n_timeslice = pyEnsLib.pop_compare_raw_score(
            opts_dict, ifiles, me.get_rank(), Var3d, Var2d)
        #zmall = np.concatenate((Zscore3d,Zscore2d),axis=0)
        np.set_printoptions(threshold=np.nan)

        if opts_dict['mpi_enable']:
            zmall = pyEnsLib.gather_npArray_pop(
                zmall, me, (me.get_size(), len(Var3d) + len(Var2d),
                            len(ifiles), opts_dict['nbin']))
            if me.get_rank() == 0:
                fout = open(opts_dict['outfile'], "w")
                for i in range(me.get_size()):
                    for j in zmall[i]:
                        np.savetxt(fout, j, fmt='%-7.2e')
    #cam
    else:
        # Read all variables from the ensemble summary file
        ens_var_name, ens_avg, ens_stddev, ens_rmsz, ens_gm, num_3d, mu_gm, sigma_gm, loadings_gm, sigma_scores_gm, is_SE_sum, std_gm = pyEnsLib.read_ensemble_summary(
            opts_dict['sumfile'])

        if len(ens_rmsz) == 0:
            gmonly = True
        # Add ensemble rmsz and global mean to the dictionary "variables"
        variables = {}
        if not gmonly:
            for k, v in ens_rmsz.iteritems():
                pyEnsLib.addvariables(variables, k, 'zscoreRange', v)

        for k, v in ens_gm.iteritems():
            pyEnsLib.addvariables(variables, k, 'gmRange', v)

        # Get 3d variable name list and 2d variable name list separately
        var_name3d = []
        var_name2d = []
        for vcount, v in enumerate(ens_var_name):
            if vcount < num_3d:
                var_name3d.append(v)
            else:
                var_name2d.append(v)

        # Get ncol and nlev value
        npts3d, npts2d, is_SE = pyEnsLib.get_ncol_nlev(ifiles[0])

        if (is_SE ^ is_SE_sum):
            print 'Warning: please note the ensemble summary file is different from the testing files, they use different grids'

        # Compare the new run and the ensemble summary file to get rmsz score
        results = {}
        countzscore = np.zeros(len(ifiles), dtype=np.int32)
        countgm = np.zeros(len(ifiles), dtype=np.int32)
        if not gmonly:
            for fcount, fid in enumerate(ifiles):
                otimeSeries = fid.variables
                for var_name in ens_var_name:
                    orig = otimeSeries[var_name]
                    Zscore, has_zscore = pyEnsLib.calculate_raw_score(
                        var_name, orig[opts_dict['tslice']], npts3d, npts2d,
                        ens_avg, ens_stddev, is_SE, opts_dict, 0, 0, 0)
                    if has_zscore:
                        # Add the new run rmsz zscore to the dictionary "results"
                        pyEnsLib.addresults(results, 'zscore', Zscore,
                                            var_name, 'f' + str(fcount))

            # Evaluate the new run rmsz score if is in the range of the ensemble summary rmsz zscore range
            for fcount, fid in enumerate(ifiles):
                countzscore[fcount] = pyEnsLib.evaluatestatus(
                    'zscore', 'zscoreRange', variables, 'ens', results,
                    'f' + str(fcount))

        # Calculate the new run global mean
        mean3d, mean2d, varlist = pyEnsLib.generate_global_mean_for_summary(
            ifiles, var_name3d, var_name2d, is_SE, opts_dict['pepsi_gm'],
            opts_dict)
        means = np.concatenate((mean3d, mean2d), axis=0)

        # Add the new run global mean to the dictionary "results"
        for i in range(means.shape[1]):
            for j in range(means.shape[0]):
                pyEnsLib.addresults(results, 'means', means[j][i],
                                    ens_var_name[j], 'f' + str(i))

        # Evaluate the new run global mean if it is in the range of the ensemble summary global mean range
        for fcount, fid in enumerate(ifiles):
            countgm[fcount] = pyEnsLib.evaluatestatus('means', 'gmRange',
                                                      variables, 'gm', results,
                                                      'f' + str(fcount))

        # Calculate the PCA scores of the new run
        new_scores, var_list, comp_std_gm = pyEnsLib.standardized(
            means, mu_gm, sigma_gm, loadings_gm, ens_var_name, opts_dict,
            ens_avg, me)
        run_index, decision = pyEnsLib.comparePCAscores(
            ifiles, new_scores, sigma_scores_gm, opts_dict, me)

        # If there is failure, plot out standardized mean and compared standardized mean in box plots
        if opts_dict['prn_std_mean'] and decision == 'FAILED':
            import seaborn as sns
            category = {
                "all_outside99": [],
                "two_outside99": [],
                "one_outside99": [],
                "all_oneside_outside1QR": []
            }
            b = list(pyEnsLib.chunk(ens_var_name, 10))
            for f, alist in enumerate(b):
                for fc, avar in enumerate(alist):
                    dist_995 = np.percentile(std_gm[avar], 99.5)
                    dist_75 = np.percentile(std_gm[avar], 75)
                    dist_25 = np.percentile(std_gm[avar], 25)
                    dist_05 = np.percentile(std_gm[avar], 0.5)
                    c = 0
                    d = 0
                    p = 0
                    q = 0
                    for i in range(comp_std_gm[f + fc].size):
                        if comp_std_gm[f + fc][i] > dist_995:
                            c = c + 1
                        elif comp_std_gm[f + fc][i] < dist_05:
                            d = d + 1
                        elif (comp_std_gm[f + fc][i] < dist_995
                              and comp_std_gm[f + fc][i] > dist_75):
                            p = p + 1
                        elif (comp_std_gm[f + fc][i] > dist_05
                              and comp_std_gm[f + fc][i] < dist_25):
                            q = q + 1
                    if c == 3 or d == 3:
                        category["all_outside99"].append((avar, f + fc))
                    elif c == 2 or d == 2:
                        category["two_outside99"].append((avar, f + fc))
                    elif c == 1 or d == 1:
                        category["one_outside99"].append((avar, f + fc))
                    if p == 3 or q == 3:
                        category["all_oneside_outside1QR"].append(
                            (avar, f + fc))
            part_name = opts_dict['indir'].split('/')[-1]
            if not part_name:
                part_name = opts_dict['indir'].split('/')[-2]
            for key in sorted(category):
                list_array = []
                list_array2 = []
                list_var = []
                value = category[key]
                print "value len=", key, len(value)
                for each_var in value:
                    list_array.append(std_gm[each_var[0]])
                    list_array2.append(comp_std_gm[each_var[1]])
                    list_var.append(each_var[0])
                if len(value) != 0:
                    ax = sns.boxplot(data=list_array,
                                     whis=[0.5, 99.5],
                                     fliersize=0.0)
                    sns.stripplot(data=list_array2, jitter=True, color="r")
                    sns.plt.xticks(range(len(list_array)),
                                   list_var,
                                   fontsize=8,
                                   rotation=-45)
                    if decision == 'FAILED':
                        sns.plt.savefig(part_name + "_" + key + "_fail.png")
                    else:
                        sns.plt.savefig(part_name + "_" + key + "_pass.png")
                    sns.plt.clf()
            '''
            if len(run_index)>0:
               json_file=opts_dict['json_case']
               if (os.path.exists(json_file)):
                  fd=open(json_file)
                  metainfo=json.load(fd)
                  caseindex=metainfo['CaseIndex']
                  enspath=str(metainfo['EnsPath'][0])
                  #print caseindex
                  if (os.path.exists(enspath)):
                     i=0
                     comp_file=[]
                     search = '\.[0-9]{3}\.'
                     for name in in_files_list:
                        s=re.search(search,name)
                        in_files_index=s.group(0)
                        if in_files_index[1:4] in caseindex:
                           ens_index=str(caseindex[in_files_index[1:4]])
                           wildname='*.'+ens_index+'.*'
                           full_glob_str=os.path.join(enspath,wildname)
                           glob_file=glob.glob(full_glob_str)
                           comp_file.extend(glob_file)
                     print "comp_file=",comp_file                
                     pyEnsLib.plot_variable(in_files_list,comp_file,opts_dict,var_list,run_index,me)
            '''
        # Print out
        if opts_dict['printVarTest']:
            print '*********************************************** '
            print 'Variable-based testing (for reference only - not used to determine pass/fail)'
            print '*********************************************** '
            for fcount, fid in enumerate(ifiles):
                print ' '
                print 'Run ' + str(fcount + 1) + ":"
                print ' '
                if not gmonly:
                    print '***' + str(countzscore[fcount]), " of " + str(
                        len(ens_var_name)
                    ) + ' variables are outside of ensemble RMSZ distribution***'
                    pyEnsLib.printsummary(results, 'ens', 'zscore',
                                          'zscoreRange', (fcount), variables,
                                          'RMSZ')
                    print ' '
                print '***' + str(countgm[fcount]), " of " + str(
                    len(ens_var_name)
                ) + ' variables are outside of ensemble global mean distribution***'
                pyEnsLib.printsummary(results, 'gm', 'means', 'gmRange',
                                      fcount, variables, 'global mean')
                print ' '
                print '----------------------------------------------------------------------------'
    if me.get_rank() == 0:
        print ' '
        print "Testing complete."
        print ' '
Example #13
0
def main(argv):


    # Get command line stuff and store in a dictionary
    s="""verbose sumfile= indir= input_globs= tslice= nPC= sigMul= 
         minPCFail= minRunFail= numRunFile= printVarTest popens 
         jsonfile= mpi_enable nbin= minrange= maxrange= outfile= 
         casejson= npick= pepsi_gm test_failure pop_tol= web_enabled
         pop_threshold= prn_std_mean fIndex= lev= eet= json_case= """
    optkeys = s.split()
    try:
        opts, args = getopt.getopt(argv,"h",optkeys)
    except getopt.GetoptError:
        pyEnsLib.CECT_usage()
        sys.exit(2)
  
    
    # Set the default value for options
    opts_dict = {}
    opts_dict['input_globs'] = ''
    opts_dict['indir'] = ''
    opts_dict['tslice'] = 1
    opts_dict['nPC'] = 50
    opts_dict['sigMul'] = 2
    opts_dict['verbose'] = False
    opts_dict['minPCFail'] = 3
    opts_dict['minRunFail'] = 2
    opts_dict['numRunFile'] = 3
    opts_dict['printVarTest'] = False
    opts_dict['popens'] = False
    opts_dict['jsonfile'] = ''
    opts_dict['mpi_enable'] = False
    opts_dict['nbin'] = 40
    opts_dict['minrange'] = 0.0
    opts_dict['maxrange'] = 4.0
    opts_dict['outfile'] = 'testcase.result'
    opts_dict['casejson'] = ''
    opts_dict['npick'] = 10
    opts_dict['pepsi_gm'] = False
    opts_dict['test_failure'] = True
    opts_dict['pop_tol'] = 3.0
    opts_dict['pop_threshold'] = 0.90
    opts_dict['prn_std_mean'] = False
    opts_dict['lev'] = 0
    opts_dict['eet'] = 0
    opts_dict['json_case'] = ''
    opts_dict['sumfile'] = ''
    opts_dict['web_enabled'] = False
    # Call utility library getopt_parseconfig to parse the option keys
    # and save to the dictionary
    caller = 'CECT'
    gmonly = False
    opts_dict = pyEnsLib.getopt_parseconfig(opts,optkeys,caller,opts_dict)
    popens = opts_dict['popens']

    # Create a mpi simplecomm object
    if opts_dict['mpi_enable']:
        me=simplecomm.create_comm()
    else:
        me=simplecomm.create_comm(not opts_dict['mpi_enable'])

    # Print out timestamp, input ensemble file and new run directory
    dt=datetime.now()
    verbose = opts_dict['verbose']
    if me.get_rank()==0:
        print '--------pyCECT--------'
        print ' '
        print dt.strftime("%A, %d. %B %Y %I:%M%p")
        print ' '
        if not opts_dict['web_enabled']:
          print 'Ensemble summary file = '+opts_dict['sumfile']
        print ' '
        print 'Testcase file directory = '+opts_dict['indir']    
        print ' '
        print ' '

    # Ensure sensible EET value
    if opts_dict['eet'] and opts_dict['numRunFile'] > opts_dict['eet']:
        pyEnsLib.CECT_usage()
        sys.exit(2)

  
    ifiles=[]
    in_files=[]
    # Random pick pop files from not_pick_files list
    if opts_dict['casejson']:
       with open(opts_dict['casejson']) as fin:
            result=json.load(fin)
            in_files_first=result['not_pick_files']
            in_files=random.sample(in_files_first,opts_dict['npick'])
            print 'Testcase files:'
            print '\n'.join(in_files)
           
    elif opts_dict['json_case']: 
       json_file=opts_dict['json_case']
       if (os.path.exists(json_file)):
          fd=open(json_file)
          metainfo=json.load(fd)
          if 'CaseName' in metainfo:
              casename=metainfo['CaseName']
              if (os.path.exists(opts_dict['indir'])):
                 for name in casename: 
                     wildname='*.'+name+'.*'
                     full_glob_str=os.path.join(opts_dict['indir'],wildname)
                     glob_file=glob.glob(full_glob_str)
                     in_files.extend(glob_file)
       else:
          print "Error: "+opts_dict['json_case']+" does not exist"
          sys.exit()
       print "in_files=",in_files
    else: 
       wildname='*'+opts_dict['input_globs']+'*'
       # Open all input files
       if (os.path.exists(opts_dict['indir'])):
          full_glob_str=os.path.join(opts_dict['indir'],wildname)
          glob_files=glob.glob(full_glob_str)
          in_files.extend(glob_files)
          num_file=len(in_files)
          if opts_dict['numRunFile'] > num_file:
             print "You requested more numRunFile than it is available at the indir, please change"
             sys.exit()
          #in_files_temp=os.listdir(opts_dict['indir'])
    in_files.sort()

    if popens:
        #Partition the input file list 
        in_files_list=me.partition(in_files,func=EqualStride(),involved=True)

    else:
        # Random pick non pop files
        in_files_list=pyEnsLib.Random_pickup(in_files,opts_dict)
        #in_files_list=in_files

    for frun_file in in_files_list:
         if frun_file.find(opts_dict['indir']) != -1:
            frun_temp=frun_file
         else:
            frun_temp=opts_dict['indir']+'/'+frun_file
         if (os.path.isfile(frun_temp)):
             ifiles.append(Nio.open_file(frun_temp,"r"))
         else:
             print "COULD NOT LOCATE FILE " +frun_temp+" EXISTING"
             sys.exit()
   
    if opts_dict['web_enabled']:
       if len(opts_dict['sumfile'])==0:
          opts_dict['sumfile']='/glade/p/cesmdata/cseg/inputdata/validation/'
       opts_dict['sumfile'],machineid,compiler=pyEnsLib.search_sumfile(opts_dict,ifiles) 
       if len(machineid)!=0 and len(compiler)!=0:
          print ' '
          print 'Validation file    : machineid = '+machineid+', compiler = '+compiler
          print 'Found summery file : '+opts_dict['sumfile']
          print ' '
       else:
          print 'Warning: machineid and compiler are unknown'

             

    if popens:
        
        # Read in the included var list
        Var2d,Var3d=pyEnsLib.read_jsonlist(opts_dict['jsonfile'],'ESP')
        print ' '
        print 'Z-score tolerance = '+'{:3.2f}'.format(opts_dict['pop_tol'])
        print 'ZPR = '+'{:.2%}'.format(opts_dict['pop_threshold'])
        zmall,n_timeslice=pyEnsLib.compare_raw_score(opts_dict,ifiles,me.get_rank(),Var3d,Var2d)  
        #zmall = np.concatenate((Zscore3d,Zscore2d),axis=0)
        np.set_printoptions(threshold=np.nan)

        if opts_dict['mpi_enable']:
            zmall = pyEnsLib.gather_npArray_pop(zmall,me,(me.get_size(),len(Var3d)+len(Var2d),len(ifiles),opts_dict['nbin'])) 
            if me.get_rank()==0:
                fout = open(opts_dict['outfile'],"w")
                for i in range(me.get_size()):
                    for j in zmall[i]:
                        np.savetxt(fout,j,fmt='%-7.2e')
    else:
        # Read all variables from the ensemble summary file
        ens_var_name,ens_avg,ens_stddev,ens_rmsz,ens_gm,num_3d,mu_gm,sigma_gm,loadings_gm,sigma_scores_gm,is_SE_sum,std_gm=pyEnsLib.read_ensemble_summary(opts_dict['sumfile']) 

        if len(ens_rmsz) == 0:
            gmonly = True
        # Add ensemble rmsz and global mean to the dictionary "variables"
        variables={}
        if not gmonly:
            for k,v in ens_rmsz.iteritems():
                pyEnsLib.addvariables(variables,k,'zscoreRange',v)

        for k,v in ens_gm.iteritems():
            pyEnsLib.addvariables(variables,k,'gmRange',v)

        # Get 3d variable name list and 2d variable name list seperately
        var_name3d=[]
        var_name2d=[]
        for vcount,v in enumerate(ens_var_name):
          if vcount < num_3d:
            var_name3d.append(v)
          else:
            var_name2d.append(v)

        # Get ncol and nlev value
        npts3d,npts2d,is_SE=pyEnsLib.get_ncol_nlev(ifiles[0])
 
        if (is_SE ^ is_SE_sum):
           print 'Warning: please note the ensemble summary file is different from the testing files, they use different grids'
           
     
        # Compare the new run and the ensemble summary file to get rmsz score
        results={}
        countzscore=np.zeros(len(ifiles),dtype=np.int32)
        countgm=np.zeros(len(ifiles),dtype=np.int32)
        if not gmonly:
            for fcount,fid in enumerate(ifiles): 
                otimeSeries = fid.variables 
                for var_name in ens_var_name: 
                    orig=otimeSeries[var_name]
                    Zscore,has_zscore=pyEnsLib.calculate_raw_score(var_name,orig[opts_dict['tslice']],npts3d,npts2d,ens_avg,ens_stddev,is_SE,opts_dict,0,0,0) 
                    if has_zscore:
                        # Add the new run rmsz zscore to the dictionary "results"
                        pyEnsLib.addresults(results,'zscore',Zscore,var_name,'f'+str(fcount))


            # Evaluate the new run rmsz score if is in the range of the ensemble summary rmsz zscore range
            for fcount,fid in enumerate(ifiles):
                countzscore[fcount]=pyEnsLib.evaluatestatus('zscore','zscoreRange',variables,'ens',results,'f'+str(fcount))

        # Calculate the new run global mean
        mean3d,mean2d,varlist=pyEnsLib.generate_global_mean_for_summary(ifiles,var_name3d,var_name2d,is_SE,opts_dict['pepsi_gm'],opts_dict)
        means=np.concatenate((mean3d,mean2d),axis=0)

        # Add the new run global mean to the dictionary "results"
        for i in range(means.shape[1]):
            for j in range(means.shape[0]):
                pyEnsLib.addresults(results,'means',means[j][i],ens_var_name[j],'f'+str(i))

        # Evaluate the new run global mean if it is in the range of the ensemble summary global mean range
        for fcount,fid in enumerate(ifiles):
            countgm[fcount]=pyEnsLib.evaluatestatus('means','gmRange',variables,'gm',results,'f'+str(fcount))
      
        # Calculate the PCA scores of the new run
        new_scores,var_list,comp_std_gm=pyEnsLib.standardized(means,mu_gm,sigma_gm,loadings_gm,ens_var_name,opts_dict,ens_avg,me)
        run_index,decision=pyEnsLib.comparePCAscores(ifiles,new_scores,sigma_scores_gm,opts_dict,me)

        # If there is failure, plot out standardized mean and compared standardized mean in box plots
        if opts_dict['prn_std_mean'] and decision == 'FAILED':
            import seaborn as sns
            category={"all_outside99":[],"two_outside99":[],"one_outside99":[],"all_oneside_outside1QR":[]}
            b=list(pyEnsLib.chunk(ens_var_name,10))
            for f,alist in enumerate(b):
                for fc,avar in enumerate(alist):
                    dist_995=np.percentile(std_gm[avar],99.5)
                    dist_75=np.percentile(std_gm[avar],75)
                    dist_25=np.percentile(std_gm[avar],25)
                    dist_05=np.percentile(std_gm[avar],0.5)
                    c=0
                    d=0
                    p=0
                    q=0
                    for i in range(comp_std_gm[f+fc].size):
                        if comp_std_gm[f+fc][i]>dist_995:
                           c=c+1
                        elif comp_std_gm[f+fc][i]<dist_05:
                           d=d+1
                        elif (comp_std_gm[f+fc][i]<dist_995 and comp_std_gm[f+fc][i]>dist_75):
                           p=p+1
                        elif (comp_std_gm[f+fc][i]>dist_05 and comp_std_gm[f+fc][i]<dist_25):
                           q=q+1
                    if c == 3 or d == 3:
                       category["all_outside99"].append((avar,f+fc))
                    elif c == 2 or d == 2:    
                       category["two_outside99"].append((avar,f+fc))
                    elif c == 1 or d == 1:
                       category["one_outside99"].append((avar,f+fc))
                    if p == 3 or q == 3:
                       category["all_oneside_outside1QR"].append((avar,f+fc))
            part_name=opts_dict['indir'].split('/')[-1]
            if not part_name:
                part_name=opts_dict['indir'].split('/')[-2]
            for key in sorted(category):
                list_array=[]
                list_array2=[]
                list_var=[]
                value=category[key]
                print "value len=",key,len(value)
                for each_var in value:
                    list_array.append(std_gm[each_var[0]])
                    list_array2.append(comp_std_gm[each_var[1]])
                    list_var.append(each_var[0])
                if len(value) !=0 :
                    ax=sns.boxplot(data=list_array,whis=[0.5,99.5],fliersize=0.0)
                    sns.stripplot(data=list_array2,jitter=True,color="r")
                    sns.plt.xticks(range(len(list_array)),list_var,fontsize=8,rotation=-45)
                    if decision == 'FAILED':
                       sns.plt.savefig(part_name+"_"+key+"_fail.png")
                    else:
                       sns.plt.savefig(part_name+"_"+key+"_pass.png")
                    sns.plt.clf()
                
            '''
            if len(run_index)>0:
               json_file=opts_dict['json_case']
               if (os.path.exists(json_file)):
                  fd=open(json_file)
                  metainfo=json.load(fd)
                  caseindex=metainfo['CaseIndex']
                  enspath=str(metainfo['EnsPath'][0])
                  #print caseindex
                  if (os.path.exists(enspath)):
                     i=0
                     comp_file=[]
                     search = '\.[0-9]{3}\.'
                     for name in in_files_list:
                        s=re.search(search,name)
                        in_files_index=s.group(0)
                        if in_files_index[1:4] in caseindex:
                           ens_index=str(caseindex[in_files_index[1:4]])
                           wildname='*.'+ens_index+'.*'
                           full_glob_str=os.path.join(enspath,wildname)
                           glob_file=glob.glob(full_glob_str)
                           comp_file.extend(glob_file)
                     print "comp_file=",comp_file                
                     pyEnsLib.plot_variable(in_files_list,comp_file,opts_dict,var_list,run_index,me)
            '''
        # Print out 
        if opts_dict['printVarTest']:
            print '*********************************************** '
            print 'Variable-based testing (for reference only - not used to determine pass/fail)'
            print '*********************************************** '
            for fcount,fid in enumerate(ifiles):
                print ' '
                print 'Run '+str(fcount+1)+":"
                print ' '
                if not gmonly:
                    print '***'+str(countzscore[fcount])," of "+str(len(ens_var_name))+' variables are outside of ensemble RMSZ distribution***'
                    pyEnsLib.printsummary(results,'ens','zscore','zscoreRange',(fcount),variables,'RMSZ')
                    print ' '
                print '***'+str(countgm[fcount])," of "+str(len(ens_var_name))+' variables are outside of ensemble global mean distribution***'
                pyEnsLib.printsummary(results,'gm','means','gmRange',fcount,variables,'global mean')
                print ' '
                print '----------------------------------------------------------------------------'
    if me.get_rank() == 0:
        print ' '
        print "Testing complete."
        print ' '
def main(argv):

    print('Running pyEnsSum!')

    # Get command line stuff and store in a dictionary
    s = 'tag= compset= esize= tslice= res= sumfile= indir= sumfiledir= mach= verbose jsonfile= mpi_enable maxnorm gmonly popens cumul regx= startMon= endMon= fIndex='
    optkeys = s.split()
    try:
        opts, args = getopt.getopt(argv, "h", optkeys)
    except getopt.GetoptError:
        pyEnsLib.EnsSum_usage()
        sys.exit(2)

    # Put command line options in a dictionary - also set defaults
    opts_dict={}

    # Defaults
    opts_dict['tag'] = ''
    opts_dict['compset'] = ''
    opts_dict['mach'] = ''
    opts_dict['esize'] = 151
    opts_dict['tslice'] = 0
    opts_dict['res'] = ''
    opts_dict['sumfile'] = 'ens.summary.nc'
    opts_dict['indir'] = './'
    opts_dict['sumfiledir'] = './'
    opts_dict['jsonfile'] = ''
    opts_dict['verbose'] = True
    opts_dict['mpi_enable'] = False
    opts_dict['maxnorm'] = False
    opts_dict['gmonly'] = False
    opts_dict['popens'] = False
    opts_dict['cumul'] = False
    opts_dict['regx'] = 'test'
    opts_dict['startMon'] = 1
    opts_dict['endMon'] = 1
    opts_dict['fIndex'] = 151

    # This creates the dictionary of input arguments
    opts_dict = pyEnsLib.getopt_parseconfig(opts,optkeys,'ES',opts_dict)

    verbose = opts_dict['verbose']

    st = opts_dict['esize']
    esize = int(st)

    if (verbose == True):
        print(opts_dict)
        print('Ensemble size for summary = ', esize)

    if not (opts_dict['tag'] and opts_dict['compset'] and opts_dict['mach'] or opts_dict['res']):
       print('Please specify --tag, --compset, --mach and --res options')
       sys.exit()

    # Now find file names in indir
    input_dir = opts_dict['indir']
    # The var list that will be excluded
    ex_varlist=[]

    # Create a mpi simplecomm object
    if opts_dict['mpi_enable']:
        me=simplecomm.create_comm()
    else:
        me=simplecomm.create_comm(not opts_dict['mpi_enable'])


    if me.get_rank() == 0:
    if opts_dict['jsonfile']:
        # Read in the excluded var list
        ex_varlist=pyEnsLib.read_jsonlist(opts_dict['jsonfile'],'ES')

    # Broadcast the excluded var list to each processor
    if opts_dict['mpi_enable']:
    ex_varlist=me.partition(ex_varlist,func=Duplicate(),involved=True)

    in_files=[]
    if(os.path.exists(input_dir)):
        # Get the list of files
        in_files_temp = os.listdir(input_dir)
        in_files=sorted(in_files_temp)
        #print in_files
        # Make sure we have enough
        num_files = len(in_files)
        if (verbose == True):
            print('Number of files in input directory = ', num_files)
        if (num_files < esize):
            print('Number of files in input directory (',num_files,
                ') is less than specified ensemble size of ', esize)
            sys.exit(2)
        if (num_files > esize):
            print('NOTE: Number of files in ', input_dir,
                'is greater than specified ensemble size of ', esize,
                '\nwill just use the first ',  esize, 'files')
    else:
        print('Input directory: ',input_dir,' not found')
        sys.exit(2)

    if opts_dict['cumul']:
        if opts_dict['regx']:
           in_files_list=get_cumul_filelist(opts_dict,opts_dict['indir'],opts_dict['regx'])
        in_files=me.partition(in_files_list,func=EqualLength(),involved=True)
        if me.get_rank()==0:
           print('in_files=',in_files)

    # Open the files in the input directory
    o_files=[]
    for onefile in in_files[0:esize]:
        if (os.path.isfile(input_dir+'/' + onefile)):
            o_files.append(Nio.open_file(input_dir+'/' + onefile,"r"))
        else:
            print("COULD NOT LOCATE FILE "+ input_dir + onefile + "! EXITING....")
            sys.exit()

    # Store dimensions of the input fields
    if (verbose == True):
        print("Getting spatial dimensions")
    nlev = -1
    ncol = -1
    nlat = -1
    nlon = -1
    lonkey=''
    latkey=''
    # Look at first file and get dims
    input_dims = o_files[0].dimensions
    ndims = len(input_dims)

    for key in input_dims:
        if key == "lev":
            nlev = input_dims["lev"]
        elif key == "ncol":
            ncol = input_dims["ncol"]
        elif (key == "nlon") or (key =="lon"):
            nlon = input_dims[key]
            lonkey=key
        elif (key == "nlat") or (key == "lat"):
            nlat = input_dims[key]
            latkey=key

    if (nlev == -1) :
        print("COULD NOT LOCATE valid dimension lev => EXITING....")
        sys.exit()

    if (( ncol == -1) and ((nlat == -1) or (nlon == -1))):
        print("Need either lat/lon or ncol  => EXITING....")
        sys.exit()

    # Check if this is SE or FV data
    if (ncol != -1):
        is_SE = True
    else:
        is_SE = False

    # Make sure all files have the same dimensions
    if (verbose == True):
        print("Checking dimensions across files....")
        print('lev = ', nlev)
        if (is_SE == True):
            print('ncol = ', ncol)
        else:
            print('nlat = ', nlat)
            print('nlon = ', nlon)

    for count, this_file in enumerate(o_files):
        input_dims = this_file.dimensions
        if (is_SE == True):
            if ( nlev != int(input_dims["lev"]) or ( ncol != int(input_dims["ncol"]))):
                print("Dimension mismatch between ", in_files[0], 'and', in_files[0], '!!!')
                sys.exit()
        else:
            if ( nlev != int(input_dims["lev"]) or ( nlat != int(input_dims[latkey]))\
                  or ( nlon != int(input_dims[lonkey]))):
                print("Dimension mismatch between ", in_files[0], 'and', in_files[0], '!!!')
                sys.exit()

    # Get 2d vars, 3d vars and all vars (For now include all variables)
    vars_dict = o_files[0].variables
    # Remove the excluded variables (specified in json file) from variable dictionary
    if ex_varlist:
    for i in ex_varlist:
            if i in vars_dict:
           del vars_dict[i]
    num_vars = len(vars_dict)
    if (verbose == True):
        print('Number of variables (including metadata) found =  ', num_vars)
    str_size = 0

    d2_var_names = []
    d3_var_names = []
    num_2d = 0
    num_3d = 0

    # Which are 2d, which are 3d and max str_size
    for k,v in vars_dict.iteritems():
        var = k
        vd = v.dimensions # all the variable's dimensions (names)
        vr = v.rank # num dimension
        vs = v.shape # dim values
        is_2d = False
        is_3d = False
        if (is_SE == True): # (time, lev, ncol) or (time, ncol)
        if ((vr == 2) and (vs[1] == ncol)):
        is_2d = True
        num_2d += 1
        elif ((vr == 3) and (vs[2] == ncol and vs[1] == nlev )):
        is_3d = True
        num_3d += 1
        else: # (time, lev, nlon, nlon) or (time, nlat, nlon)
            if ((vr == 3) and (vs[1] == nlat and vs[2] == nlon)):
                is_2d = True
                num_2d += 1
            elif ((vr == 4) and (vs[2] == nlat and vs[3] == nlon and vs[1] == nlev )):
                is_3d = True
                num_3d += 1
        if (is_3d == True) :
            str_size = max(str_size, len(k))
            d3_var_names.append(k)
        elif  (is_2d == True):
            str_size = max(str_size, len(k))
            d2_var_names.append(k)


    # Now sort these and combine (this sorts caps first, then lower case -
    # which is what we want)
    d2_var_names.sort()
    d3_var_names.sort()


    # All vars is 3d vars first (sorted), the 2d vars
    all_var_names = list(d3_var_names)
    all_var_names += d2_var_names
    n_all_var_names = len(all_var_names)

    if (verbose == True):
        print('num vars = ', n_all_var_names, '(3d = ', num_3d, ' and 2d = ', num_2d, ")")

    # Create new summary ensemble file
    this_sumfile = opts_dict["sumfile"]

    if (verbose == True):
        print("Creating ", this_sumfile, "  ...")
    if(me.get_rank() ==0 | opts_dict["popens"]):
    if os.path.exists(this_sumfile):
        os.unlink(this_sumfile)

    opt = Nio.options()
    opt.PreFill = False
    opt.Format = 'NetCDF4Classic'
    nc_sumfile = Nio.open_file(this_sumfile, 'w', options=opt)

    # Set dimensions
    if (verbose == True):
        print("Setting dimensions .....")
    if (is_SE == True):
        nc_sumfile.create_dimension('ncol', ncol)
    else:
        nc_sumfile.create_dimension('nlat', nlat)
        nc_sumfile.create_dimension('nlon', nlon)
    nc_sumfile.create_dimension('nlev', nlev)
    nc_sumfile.create_dimension('ens_size', esize)
    nc_sumfile.create_dimension('nvars', num_3d + num_2d)
    nc_sumfile.create_dimension('nvars3d', num_3d)
    nc_sumfile.create_dimension('nvars2d', num_2d)
    nc_sumfile.create_dimension('str_size', str_size)

    # Set global attributes
    now = time.strftime("%c")
    if (verbose == True):
        print("Setting global attributes .....")
    setattr(nc_sumfile, 'creation_date',now)
    setattr(nc_sumfile, 'title', 'CAM verification ensemble summary file')
    setattr(nc_sumfile, 'tag', opts_dict["tag"])
    setattr(nc_sumfile, 'compset', opts_dict["compset"])
    setattr(nc_sumfile, 'resolution', opts_dict["res"])
    setattr(nc_sumfile, 'machine', opts_dict["mach"])

    # Create variables
    if (verbose == True):
        print("Creating variables .....")
    v_lev = nc_sumfile.create_variable("lev", 'f', ('nlev',))
    v_vars = nc_sumfile.create_variable("vars", 'S1', ('nvars', 'str_size'))
    v_var3d = nc_sumfile.create_variable("var3d", 'S1', ('nvars3d', 'str_size'))
    v_var2d = nc_sumfile.create_variable("var2d", 'S1', ('nvars2d', 'str_size'))
        if not opts_dict['gmonly']:
        if (is_SE == True):
        v_ens_avg3d = nc_sumfile.create_variable("ens_avg3d", 'f', ('nvars3d', 'nlev', 'ncol'))
        v_ens_stddev3d = nc_sumfile.create_variable("ens_stddev3d", 'f', ('nvars3d', 'nlev', 'ncol'))
        v_ens_avg2d = nc_sumfile.create_variable("ens_avg2d", 'f', ('nvars2d', 'ncol'))
        v_ens_stddev2d = nc_sumfile.create_variable("ens_stddev2d", 'f', ('nvars2d', 'ncol'))
        else:
        v_ens_avg3d = nc_sumfile.create_variable("ens_avg3d", 'f', ('nvars3d', 'nlev', 'nlat', 'nlon'))
        v_ens_stddev3d = nc_sumfile.create_variable("ens_stddev3d", 'f', ('nvars3d', 'nlev', 'nlat', 'nlon'))
        v_ens_avg2d = nc_sumfile.create_variable("ens_avg2d", 'f', ('nvars2d', 'nlat', 'nlon'))
        v_ens_stddev2d = nc_sumfile.create_variable("ens_stddev2d", 'f', ('nvars2d', 'nlat', 'nlon'))

        v_RMSZ = nc_sumfile.create_variable("RMSZ", 'f', ('nvars', 'ens_size'))
    v_gm = nc_sumfile.create_variable("global_mean", 'f', ('nvars', 'ens_size'))
    v_loadings_gm = nc_sumfile.create_variable('loadings_gm','f',('nvars','nvars'))
    v_mu_gm = nc_sumfile.create_variable('mu_gm','f',('nvars',))
    v_sigma_gm = nc_sumfile.create_variable('sigma_gm','f',('nvars',))
    v_sigma_scores_gm = nc_sumfile.create_variable('sigma_scores_gm','f',('nvars',))


    # Assign vars, var3d and var2d
    if (verbose == True):
        print("Assigning vars, var3d, and var2d .....")

    eq_all_var_names =[]
    eq_d3_var_names = []
    eq_d2_var_names = []

    l_eq = len(all_var_names)
    for i in range(l_eq):
        tt = list(all_var_names[i])
        l_tt = len(tt)
        if (l_tt < str_size):
        extra = list(' ')*(str_size - l_tt)
        tt.extend(extra)
        eq_all_var_names.append(tt)

    l_eq = len(d3_var_names)
    for i in range(l_eq):
        tt = list(d3_var_names[i])
        l_tt = len(tt)
        if (l_tt < str_size):
        extra = list(' ')*(str_size - l_tt)
        tt.extend(extra)
        eq_d3_var_names.append(tt)

    l_eq = len(d2_var_names)
    for i in range(l_eq):
        tt = list(d2_var_names[i])
        l_tt = len(tt)
        if (l_tt < str_size):
        extra = list(' ')*(str_size - l_tt)
        tt.extend(extra)
        eq_d2_var_names.append(tt)

    v_vars[:] = eq_all_var_names[:]
    v_var3d[:] = eq_d3_var_names[:]
    v_var2d[:] = eq_d2_var_names[:]

    # Time-invarient metadata
    if (verbose == True):
        print("Assigning time invariant metadata .....")
    lev_data = vars_dict["lev"]
    v_lev = lev_data

    # Form ensembles, each missing one member; compute RMSZs and global means
    #for each variable, we also do max norm also (currently done in pyStats)
    tslice = opts_dict['tslice']

    if not opts_dict['cumul']:
        # Partition the var list
        var3_list_loc=me.partition(d3_var_names,func=EqualStride(),involved=True)
        var2_list_loc=me.partition(d2_var_names,func=EqualStride(),involved=True)
    else:
        var3_list_loc=d3_var_names
        var2_list_loc=d2_var_names

    # Calculate global means #
    if (verbose == True):
        print("Calculating global means .....")
    if not opts_dict['cumul']:
        gm3d,gm2d = pyEnsLib.generate_global_mean_for_summary(o_files,var3_list_loc,var2_list_loc , is_SE, False,opts_dict)
    if (verbose == True):
        print("Finish calculating global means .....")

    # Calculate RMSZ scores
    if (verbose == True):
        print("Calculating RMSZ scores .....")
    if (not opts_dict['gmonly']) | (opts_dict['cumul']):
        zscore3d,zscore2d,ens_avg3d,ens_stddev3d,ens_avg2d,ens_stddev2d,temp1,temp2=pyEnsLib.calc_rmsz(o_files,var3_list_loc,var2_list_loc,is_SE,opts_dict)

    # Calculate max norm ensemble
    if opts_dict['maxnorm']:
    if (verbose == True):
        print("Calculating max norm of ensembles .....")
    pyEnsLib.calculate_maxnormens(opts_dict,var3_list_loc)
    pyEnsLib.calculate_maxnormens(opts_dict,var2_list_loc)

    if opts_dict['mpi_enable'] & ( not opts_dict['popens']):

        if not opts_dict['cumul']:
        # Gather the 3d variable results from all processors to the master processor
        slice_index=get_stride_list(len(d3_var_names),me)

        # Gather global means 3d results
        gm3d=gather_npArray(gm3d,me,slice_index,(len(d3_var_names),len(o_files)))

        if not opts_dict['gmonly']:
        # Gather zscore3d results
        zscore3d=gather_npArray(zscore3d,me,slice_index,(len(d3_var_names),len(o_files)))

        # Gather ens_avg3d and ens_stddev3d results
        shape_tuple3d=get_shape(ens_avg3d.shape,len(d3_var_names),me.get_rank())
        ens_avg3d=gather_npArray(ens_avg3d,me,slice_index,shape_tuple3d)
        ens_stddev3d=gather_npArray(ens_stddev3d,me,slice_index,shape_tuple3d)

        # Gather 2d variable results from all processors to the master processor
        slice_index=get_stride_list(len(d2_var_names),me)

        # Gather global means 2d results
        gm2d=gather_npArray(gm2d,me,slice_index,(len(d2_var_names),len(o_files)))

        if not opts_dict['gmonly']:
        # Gather zscore2d results
        zscore2d=gather_npArray(zscore2d,me,slice_index,(len(d2_var_names),len(o_files)))

        # Gather ens_avg3d and ens_stddev2d results
        shape_tuple2d=get_shape(ens_avg2d.shape,len(d2_var_names),me.get_rank())
        ens_avg2d=gather_npArray(ens_avg2d,me,slice_index,shape_tuple2d)
        ens_stddev2d=gather_npArray(ens_stddev2d,me,slice_index,shape_tuple2d)

        else:
        gmall=np.concatenate((temp1,temp2),axis=0)
            gmall=pyEnsLib.gather_npArray_pop(gmall,me,(me.get_size(),len(d3_var_names)+len(d2_var_names)))
    # Assign to file:
    if me.get_rank() == 0 | opts_dict['popens'] :
        if not opts_dict['cumul']:
        gmall=np.concatenate((gm3d,gm2d),axis=0)
        if not opts_dict['gmonly']:
        Zscoreall=np.concatenate((zscore3d,zscore2d),axis=0)
        v_RMSZ[:,:]=Zscoreall[:,:]
        if not opts_dict['gmonly']:
        if (is_SE == True):
            v_ens_avg3d[:,:,:]=ens_avg3d[:,:,:]
            v_ens_stddev3d[:,:,:]=ens_stddev3d[:,:,:]
            v_ens_avg2d[:,:]=ens_avg2d[:,:]
            v_ens_stddev2d[:,:]=ens_stddev2d[:,:]
        else:
            v_ens_avg3d[:,:,:,:]=ens_avg3d[:,:,:,:]
            v_ens_stddev3d[:,:,:,:]=ens_stddev3d[:,:,:,:]
            v_ens_avg2d[:,:,:]=ens_avg2d[:,:,:]
            v_ens_stddev2d[:,:,:]=ens_stddev2d[:,:,:]
        else:
            gmall_temp=np.transpose(gmall[:,:])
            gmall=gmall_temp
    mu_gm,sigma_gm,standardized_global_mean,loadings_gm,scores_gm=pyEnsLib.pre_PCA(gmall)
    v_gm[:,:]=gmall[:,:]
    v_mu_gm[:]=mu_gm[:]
    v_sigma_gm[:]=sigma_gm[:].astype(np.float32)
    v_loadings_gm[:,:]=loadings_gm[:,:]
    v_sigma_scores_gm[:]=scores_gm[:]

    print("All Done")

def get_cumul_filelist(opts_dict,indir,regx):
   if not opts_dict['indir']:
      print('input dir is not specified')
      sys.exit(2)
   #regx='(pgi(.)*-(01|02))'
   regx_list=["mon","gnu","pgi"]
   all_files=[]
   for prefix in regx_list:
       for i in range(opts_dict['fIndex'],opts_dict['fIndex']+opts_dict['esize']/3):
       for j in range(opts_dict['startMon'],opts_dict['endMon']+1):
           mon_str=str(j).zfill(2)
           regx='(^'+prefix+'(.)*'+str(i)+'(.)*-('+mon_str+'))'
           print('regx=',regx)
           res=[f for f in os.listdir(indir) if re.search(regx,f)]
           in_files=sorted(res)
           all_files.extend(in_files)
   print("all_files=",all_files)
   #in_files=res
   return all_files





#
# Get the shape of all variable list in tuple for all processor
#
def get_shape(shape_tuple,shape1,rank):
    lst=list(shape_tuple)
    lst[0]=shape1
    shape_tuple=tuple(lst)
    return shape_tuple

#
# Get the mpi partition list for each processor
#
def get_stride_list(len_of_list,me):
    slice_index=[]
    for i in range(me.get_size()):
    index_arr=np.arange(len_of_list)
    slice_index.append(index_arr[i::me.get_size()])
    return slice_index

#
# Gather arrays from each processor by the var_list to the master processor and make it an array
#
def gather_npArray(npArray,me,slice_index,array_shape):
    the_array=np.zeros(array_shape,dtype=np.float32)
    if me.get_rank()==0:
    k=0
    for j in slice_index[me.get_rank()]:
         the_array[j,:]=npArray[k,:]
         k=k+1
    for i in range(1,me.get_size()):
    if me.get_rank() == 0:
        rank,npArray=me.collect()
        k=0
        for j in slice_index[rank]:
        the_array[j,:]=npArray[k,:]
        k=k+1
    if me.get_rank() != 0:
    message={"from_rank":me.get_rank(),"shape":npArray.shape}
    me.collect(npArray)
    me.sync()
    return the_array

if __name__ == "__main__":
    main(sys.argv[1:])