Python EqualStride Examples

Programming Language: Python

Namespace/Package Name: asaptools.partition

Class/Type: EqualStride

Examples at hotexamples.com: 23

Python EqualStride - 23 examples found. These are the top rated real world Python examples of asaptools.partition.EqualStride extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

EqualStride(23)

Frequently Used Methods

EqualStride (23)

Example #1

Show file

File: simpleCommP1STests.py Project: zhishang80/CESM_postprocessing

 def testPartitionList(self):
     data = range(5 + self.rank)
     sresult = self.scomm.partition(data, func=EqualStride())
     presult = self.pcomm.partition(data, func=EqualStride())
     msg = test_info_msg('partition(list)', data, sresult, presult)
     print(msg)
     self.assertEqual(sresult, presult, msg)

Example #2

Show file

File: simpleCommP1STests.py Project: zhenkunl/CESM_postprocessing

 def testPartitionListInvolved(self):
     data = range(5 + self.rank)
     sresult = self.scomm.partition(data, func=EqualStride(), involved=True)
     presult = self.pcomm.partition(data, func=EqualStride(), involved=True)
     msg = test_info_msg('partition(list, T)', data, sresult, presult)
     print msg
     self.assertEqual(sresult, presult, msg)

Example #3

Show file

File: simpleCommParTests.py Project: kmpaul/ASAPPyTools

 def testPartitionArrayInvolved(self):
     if self.gcomm.is_manager():
         data = np.arange(10)
     else:
         data = None
     actual = self.gcomm.partition(data, func=EqualStride(), involved=True)
     expected = np.arange(self.rank, 10, self.size)
     np.testing.assert_array_equal(actual, expected)

Example #4

Show file

 def testPartitionArrayInvolved(self):
     if self.gcomm.is_manager():
         data = np.arange(10)
     else:
         data = None
     actual = self.gcomm.partition(data, func=EqualStride(), involved=True)
     expected = np.arange(self.rank, 10, self.size)
     msg = test_info_msg(self.rank, self.size, 'partition(array, T)', data,
                         actual, expected)
     print msg
     np.testing.assert_array_equal(actual, expected, msg)

Example #5

Show file

 def testMonoPartitionListInvolved(self):
     if self.monocomm.is_manager():
         data = list(range(10 + self.grank))
     else:
         data = None
     actual = self.monocomm.partition(data,
                                      func=EqualStride(),
                                      involved=True)
     expected = list(
         range(self.rank, 10 + self.color, self.monocomm.get_size()))
     self.assertEqual(actual, expected)

Example #6

Show file

File: simpleCommParDivTests.py Project: dchandan/ASAPPyTools

 def testMonoPartitionListInvolved(self):
     if self.monocomm.is_manager():
         data = range(10 + self.grank)
     else:
         data = None
     actual = self.monocomm.partition(data,
                                      func=EqualStride(),
                                      involved=True)
     expected = range(self.rank, 10 + self.color, self.monocomm.get_size())
     msg = test_info_msg(self.grank, self.gsize, 'mono.partition(list,T)',
                         data, actual, expected)
     print msg
     self.assertEqual(actual, expected, msg)

Example #7

Show file

File: simpleCommParTests.py Project: kmpaul/ASAPPyTools

 def testPartitionArray(self):
     if self.gcomm.is_manager():
         data = np.arange(10)
     else:
         data = None
     actual = self.gcomm.partition(data, func=EqualStride())
     if self.gcomm.is_manager():
         expected = None
     else:
         expected = np.arange(self.rank - 1, 10, self.size - 1)
     if self.gcomm.is_manager():
         self.assertEqual(actual, expected)
     else:
         np.testing.assert_array_equal(actual, expected)

Example #8

Show file

 def testMultiPartitionListInvolved(self):
     if self.multicomm.is_manager():
         data = list(range(10 + self.grank))
     else:
         data = None
     actual = self.multicomm.partition(data,
                                       func=EqualStride(),
                                       involved=True)
     expected = list(
         range(self.color, 10 + self.rank * len(self.groups),
               self.multicomm.get_size()))
     msg = test_info_msg(self.grank, self.gsize, 'multi.partition(list,T)',
                         data, actual, expected)
     print(msg)
     self.assertEqual(actual, expected, msg)

Example #9

Show file

File: simpleCommParTests.py Project: kmpaul/ASAPPyTools

 def testPartitionStrArray(self):
     indata = list('abcdefghi')
     if self.gcomm.is_manager():
         data = np.array(indata)
     else:
         data = None
     actual = self.gcomm.partition(data, func=EqualStride())
     if self.gcomm.is_manager():
         expected = None
     else:
         expected = np.array(indata[self.rank - 1::self.size - 1])
     if self.gcomm.is_manager():
         self.assertEqual(actual, expected)
     else:
         np.testing.assert_array_equal(actual, expected)

Example #10

Show file

 def testPartitionArray(self):
     if self.gcomm.is_manager():
         data = np.arange(10)
     else:
         data = None
     actual = self.gcomm.partition(data, func=EqualStride())
     if self.gcomm.is_manager():
         expected = None
     else:
         expected = np.arange(self.rank - 1, 10, self.size - 1)
     msg = test_info_msg(self.rank, self.size, 'partition(array)', data,
                         actual, expected)
     print msg
     if self.gcomm.is_manager():
         self.assertEqual(actual, expected, msg)
     else:
         np.testing.assert_array_equal(actual, expected, msg)

Example #11

Show file

 def testPartitionStrArray(self):
     indata = list('abcdefghi')
     if self.gcomm.is_manager():
         data = data = np.array(indata)
     else:
         data = None
     actual = self.gcomm.partition(data, func=EqualStride())
     if self.gcomm.is_manager():
         expected = None
     else:
         expected = np.array(indata[self.rank - 1::self.size - 1])
     msg = test_info_msg(self.rank, self.size, 'partition(string-array)',
                         data, actual, expected)
     print msg
     if self.gcomm.is_manager():
         self.assertEqual(actual, expected, msg)
     else:
         np.testing.assert_array_equal(actual, expected, msg)

Example #12

Show file

def main(argv):

    # Get command line stuff and store in a dictionary
    s = 'tag= compset= esize= tslice= res= sumfile= indir= sumfiledir= mach= verbose jsonfile= mpi_enable maxnorm gmonly popens cumul regx= startMon= endMon= fIndex= mpi_disable'
    optkeys = s.split()
    try:
        opts, args = getopt.getopt(argv, "h", optkeys)
    except getopt.GetoptError:
        pyEnsLib.EnsSum_usage()
        sys.exit(2)

    # Put command line options in a dictionary - also set defaults
    opts_dict = {}

    # Defaults
    opts_dict['tag'] = 'cesm2_0'
    opts_dict['compset'] = 'F2000climo'
    opts_dict['mach'] = 'cheyenne'
    opts_dict['esize'] = 350
    opts_dict['tslice'] = 1
    opts_dict['res'] = 'f19_f19'
    opts_dict['sumfile'] = 'ens.summary.nc'
    opts_dict['indir'] = './'
    opts_dict['sumfiledir'] = './'
    opts_dict['jsonfile'] = 'exclude_empty.json'
    opts_dict['verbose'] = False
    opts_dict['mpi_enable'] = True
    opts_dict['mpi_disable'] = False
    opts_dict['maxnorm'] = False
    opts_dict['gmonly'] = True
    opts_dict['popens'] = False
    opts_dict['cumul'] = False
    opts_dict['regx'] = 'test'
    opts_dict['startMon'] = 1
    opts_dict['endMon'] = 1
    opts_dict['fIndex'] = 151

    # This creates the dictionary of input arguments
    opts_dict = pyEnsLib.getopt_parseconfig(opts, optkeys, 'ES', opts_dict)

    verbose = opts_dict['verbose']

    st = opts_dict['esize']
    esize = int(st)

    if opts_dict['popens'] == True:
        print(
            "ERROR: Please use pyEnsSumPop.py for a POP ensemble (not --popens)  => EXITING...."
        )
        sys.exit()

    if not (opts_dict['tag'] and opts_dict['compset'] and opts_dict['mach']
            or opts_dict['res']):
        print(
            'ERROR: Please specify --tag, --compset, --mach and --res options  => EXITING....'
        )
        sys.exit()

    if opts_dict['mpi_disable'] == True:
        opts_dict['mpi_enable'] = False

    # Now find file names in indir
    input_dir = opts_dict['indir']
    # The var list that will be excluded
    ex_varlist = []
    inc_varlist = []

    # Create a mpi simplecomm object
    if opts_dict['mpi_enable']:
        me = simplecomm.create_comm()
    else:
        me = simplecomm.create_comm(not opts_dict['mpi_enable'])

    if me.get_rank() == 0:
        print('STATUS: Running pyEnsSum.py')

    if me.get_rank() == 0 and (verbose == True):
        print(opts_dict)
        print('STATUS: Ensemble size for summary = ', esize)

    exclude = False
    if me.get_rank() == 0:
        if opts_dict['jsonfile']:
            inc_varlist = []
            # Read in the excluded or included var list
            ex_varlist, exclude = pyEnsLib.read_jsonlist(
                opts_dict['jsonfile'], 'ES')
            if exclude == False:
                inc_varlist = ex_varlist
                ex_varlist = []

    # Broadcast the excluded var list to each processor
    if opts_dict['mpi_enable']:
        exclude = me.partition(exclude, func=Duplicate(), involved=True)
        if exclude:
            ex_varlist = me.partition(ex_varlist,
                                      func=Duplicate(),
                                      involved=True)
        else:
            inc_varlist = me.partition(inc_varlist,
                                       func=Duplicate(),
                                       involved=True)

    in_files = []
    if (os.path.exists(input_dir)):
        # Get the list of files
        in_files_temp = os.listdir(input_dir)
        in_files = sorted(in_files_temp)

        # Make sure we have enough
        num_files = len(in_files)
        if me.get_rank() == 0 and (verbose == True):
            print('VERBOSE: Number of files in input directory = ', num_files)
        if (num_files < esize):
            if me.get_rank() == 0 and (verbose == True):
                print('VERBOSE: Number of files in input directory (',num_files,\
                 ') is less than specified ensemble size of ', esize)
            sys.exit(2)
        if (num_files > esize):
            if me.get_rank() == 0 and (verbose == True):
                print('VERBOSE: Note that the number of files in ', input_dir, \
                 'is greater than specified ensemble size of ', esize ,\
                 '\nwill just use the first ',  esize, 'files')
    else:
        if me.get_rank() == 0:
            print('ERROR: Input directory: ', input_dir, ' not found')
        sys.exit(2)

    if opts_dict['cumul']:
        if opts_dict['regx']:
            in_files_list = get_cumul_filelist(opts_dict, opts_dict['indir'],
                                               opts_dict['regx'])
        in_files = me.partition(in_files_list,
                                func=EqualLength(),
                                involved=True)
        if me.get_rank() == 0 and (verbose == True):
            print('VERBOSE: in_files  = ', in_files)

    # Check full file names in input directory (don't open yet)
    full_in_files = []
    if me.get_rank() == 0 and opts_dict['verbose']:
        print('VERBOSE: Input files are: ')

    for onefile in in_files[0:esize]:
        fname = input_dir + '/' + onefile
        if me.get_rank() == 0 and opts_dict['verbose']:
            print(fname)
        if (os.path.isfile(fname)):
            full_in_files.append(fname)
        else:
            if me.get_rank() == 0:
                print("ERROR: Could not locate file ", fname,
                      " => EXITING....")
            sys.exit()

    #open just the first file
    first_file = nc.Dataset(full_in_files[0], "r")

    # Store dimensions of the input fields
    if me.get_rank() == 0 and (verbose == True):
        print("VERBOSE: Getting spatial dimensions")
    nlev = -1
    nilev = -1
    ncol = -1
    nlat = -1
    nlon = -1
    lonkey = ''
    latkey = ''
    # Look at first file and get dims
    input_dims = first_file.dimensions
    ndims = len(input_dims)

    for key in input_dims:
        if key == "lev":
            nlev = len(input_dims["lev"])
        elif key == "ilev":
            nilev = len(input_dims["ilev"])
        elif key == "ncol":
            ncol = len(input_dims["ncol"])
        elif (key == "nlon") or (key == "lon"):
            nlon = len(input_dims[key])
            lonkey = key
        elif (key == "nlat") or (key == "lat"):
            nlat = len(input_dims[key])
            latkey = key

    if (nlev == -1):
        if me.get_rank() == 0:
            print(
                "ERROR: could not locate a valid dimension (lev) => EXITING...."
            )
        sys.exit()

    if ((ncol == -1) and ((nlat == -1) or (nlon == -1))):
        if me.get_rank() == 0:
            print("ERROR: Need either lat/lon or ncol  => EXITING....")
        sys.exit()

    # Check if this is SE or FV data
    if (ncol != -1):
        is_SE = True
    else:
        is_SE = False

    # output dimensions
    if me.get_rank() == 0 and (verbose == True):
        print('lev = ', nlev)
        if (is_SE == True):
            print('ncol = ', ncol)
        else:
            print('nlat = ', nlat)
            print('nlon = ', nlon)

    # Get 2d vars, 3d vars and all vars (For now include all variables)
    vars_dict_all = first_file.variables

    # Remove the excluded variables (specified in json file) from variable dictionary
    if exclude:
        vars_dict = vars_dict_all
        for i in ex_varlist:
            if i in vars_dict:
                del vars_dict[i]
    #Given an included var list, remove all the variables that are not on the list
    else:
        vars_dict = vars_dict_all.copy()
        for k, v in vars_dict_all.items():
            if (k not in inc_varlist) and (vars_dict_all[k].typecode() == 'f'):
                del vars_dict[k]

    num_vars = len(vars_dict)

    str_size = 0
    d2_var_names = []
    d3_var_names = []
    num_2d = 0
    num_3d = 0

    # Which are 2d, which are 3d and max str_size
    for k, v in vars_dict.items():
        var = k
        vd = v.dimensions  # all the variable's dimensions (names)
        vr = len(v.dimensions)  # num dimension
        vs = v.shape  # dim values
        is_2d = False
        is_3d = False
        if (is_SE == True):  # (time, lev, ncol) or (time, ncol)
            if ((vr == 2) and (vs[1] == ncol)):
                is_2d = True
                num_2d += 1
            elif ((vr == 3) and (vs[2] == ncol and vs[1] == nlev)):
                is_3d = True
                num_3d += 1
        else:  # (time, lev, nlon, nlon) or (time, nlat, nlon)
            if ((vr == 3) and (vs[1] == nlat and vs[2] == nlon)):
                is_2d = True
                num_2d += 1
            elif ((vr == 4) and (vs[2] == nlat and vs[3] == nlon and
                                 (vs[1] == nlev or vs[1] == nilev))):
                is_3d = True
                num_3d += 1

        if (is_3d == True):
            str_size = max(str_size, len(k))
            d3_var_names.append(k)
        elif (is_2d == True):
            str_size = max(str_size, len(k))
            d2_var_names.append(k)

    if me.get_rank() == 0 and (verbose == True):
        print('VERBOSE: Number of variables found:  ', num_3d + num_2d)
        print('VERBOSE: 3D variables: ' + str(num_3d) + ', 2D variables: ' +
              str(num_2d))

    # Now sort these and combine (this sorts caps first, then lower case -
    # which is what we want)
    d2_var_names.sort()
    d3_var_names.sort()

    if esize < num_2d + num_3d:
        if me.get_rank() == 0:
            print(
                "************************************************************************************************************************************"
            )
            print("  ERROR: the total number of 3D and 2D variables " +
                  str(num_2d + num_3d) +
                  " is larger than the number of ensemble files " + str(esize))
            print(
                "  Cannot generate ensemble summary file, please remove more variables from your included variable list,"
            )
            print(
                "  or add more variables in your excluded variable list  => EXITING...."
            )
            print(
                "************************************************************************************************************************************"
            )
        sys.exit()
    # All vars is 3d vars first (sorted), the 2d vars
    all_var_names = list(d3_var_names)
    all_var_names += d2_var_names
    n_all_var_names = len(all_var_names)

    # Rank 0 - Create new summary ensemble file
    this_sumfile = opts_dict["sumfile"]

    #check if directory is valid
    sum_dir = os.path.dirname(this_sumfile)
    if len(sum_dir) == 0:
        sum_dir = '.'
    if (os.path.exists(sum_dir) == False):
        if me.get_rank() == 0:
            print('ERROR: Summary file directory: ', sum_dir, ' not found')
        sys.exit(2)

    if (me.get_rank() == 0):

        if (verbose == True):
            print("VERBOSE: Creating ", this_sumfile, "  ...")

        if os.path.exists(this_sumfile):
            os.unlink(this_sumfile)
        nc_sumfile = nc.Dataset(this_sumfile, "w", format="NETCDF4_CLASSIC")

        # Set dimensions
        if (verbose == True):
            print("VERBOSE: Setting dimensions .....")
        if (is_SE == True):
            nc_sumfile.createDimension('ncol', ncol)
        else:
            nc_sumfile.createDimension('nlat', nlat)
            nc_sumfile.createDimension('nlon', nlon)

        nc_sumfile.createDimension('nlev', nlev)
        nc_sumfile.createDimension('ens_size', esize)
        nc_sumfile.createDimension('nvars', num_3d + num_2d)
        nc_sumfile.createDimension('nvars3d', num_3d)
        nc_sumfile.createDimension('nvars2d', num_2d)
        nc_sumfile.createDimension('str_size', str_size)

        # Set global attributes
        now = time.strftime("%c")
        if (verbose == True):
            print("VERBOSE: Setting global attributes .....")
        nc_sumfile.creation_date = now
        nc_sumfile.title = 'CAM verification ensemble summary file'
        nc_sumfile.tag = opts_dict["tag"]
        nc_sumfile.compset = opts_dict["compset"]
        nc_sumfile.resolution = opts_dict["res"]
        nc_sumfile.machine = opts_dict["mach"]

        # Create variables
        if (verbose == True):
            print("VERBOSE: Creating variables .....")
        v_lev = nc_sumfile.createVariable("lev", 'f8', ('nlev', ))
        v_vars = nc_sumfile.createVariable("vars", 'S1', ('nvars', 'str_size'))
        v_var3d = nc_sumfile.createVariable("var3d", 'S1',
                                            ('nvars3d', 'str_size'))
        v_var2d = nc_sumfile.createVariable("var2d", 'S1',
                                            ('nvars2d', 'str_size'))

        v_gm = nc_sumfile.createVariable("global_mean", 'f8',
                                         ('nvars', 'ens_size'))
        v_standardized_gm = nc_sumfile.createVariable("standardized_gm", 'f8',
                                                      ('nvars', 'ens_size'))
        v_loadings_gm = nc_sumfile.createVariable('loadings_gm', 'f8',
                                                  ('nvars', 'nvars'))
        v_mu_gm = nc_sumfile.createVariable('mu_gm', 'f8', ('nvars', ))
        v_sigma_gm = nc_sumfile.createVariable('sigma_gm', 'f8', ('nvars', ))
        v_sigma_scores_gm = nc_sumfile.createVariable('sigma_scores_gm', 'f8',
                                                      ('nvars', ))

        # Assign vars, var3d and var2d
        if (verbose == True):
            print("VERBOSE: Assigning vars, var3d, and var2d .....")

        eq_all_var_names = []
        eq_d3_var_names = []
        eq_d2_var_names = []

        l_eq = len(all_var_names)
        for i in range(l_eq):
            tt = list(all_var_names[i])
            l_tt = len(tt)
            if (l_tt < str_size):
                extra = list(' ') * (str_size - l_tt)
                tt.extend(extra)
            eq_all_var_names.append(tt)

        l_eq = len(d3_var_names)
        for i in range(l_eq):
            tt = list(d3_var_names[i])
            l_tt = len(tt)
            if (l_tt < str_size):
                extra = list(' ') * (str_size - l_tt)
                tt.extend(extra)
            eq_d3_var_names.append(tt)

        l_eq = len(d2_var_names)
        for i in range(l_eq):
            tt = list(d2_var_names[i])
            l_tt = len(tt)
            if (l_tt < str_size):
                extra = list(' ') * (str_size - l_tt)
                tt.extend(extra)
            eq_d2_var_names.append(tt)

        v_vars[:] = eq_all_var_names[:]
        v_var3d[:] = eq_d3_var_names[:]
        v_var2d[:] = eq_d2_var_names[:]

        # Time-invarient metadata
        if (verbose == True):
            print("VERBOSE: Assigning time invariant metadata .....")


#        lev_data = np.zeros(num_lev,dtype=np.float64)
        lev_data = first_file.variables["lev"]
        v_lev[:] = lev_data[:]
    #end of rank=0 work

    # All:
    tslice = opts_dict['tslice']
    if not opts_dict['cumul']:
        # Partition the var list
        var3_list_loc = me.partition(d3_var_names,
                                     func=EqualStride(),
                                     involved=True)
        var2_list_loc = me.partition(d2_var_names,
                                     func=EqualStride(),
                                     involved=True)
    else:
        var3_list_loc = d3_var_names
        var2_list_loc = d2_var_names

    #close first_file
    first_file.close()

    # Calculate global means #
    if me.get_rank() == 0 and (verbose == True):
        print("VERBOSE: Calculating global means .....")
    if not opts_dict['cumul']:
        gm3d, gm2d, var_list = pyEnsLib.generate_global_mean_for_summary(
            full_in_files, var3_list_loc, var2_list_loc, is_SE, False,
            opts_dict)
    if me.get_rank() == 0 and (verbose == True):
        print("VERBOSE: Finished calculating global means .....")

    #gather to rank = 0
    if opts_dict['mpi_enable']:

        if not opts_dict['cumul']:
            # Gather the 3d variable results from all processors to the master processor
            slice_index = get_stride_list(len(d3_var_names), me)

            # Gather global means 3d results
            gm3d = gather_npArray(gm3d, me, slice_index,
                                  (len(d3_var_names), len(full_in_files)))

            # Gather 2d variable results from all processors to the master processor
            slice_index = get_stride_list(len(d2_var_names), me)

            # Gather global means 2d results
            gm2d = gather_npArray(gm2d, me, slice_index,
                                  (len(d2_var_names), len(full_in_files)))

            #gather variables ro exclude (in pre_pca)
            var_list = gather_list(var_list, me)

        else:
            gmall = np.concatenate((temp1, temp2), axis=0)
            gmall = pyEnsLib.gather_npArray_pop(
                gmall, me,
                (me.get_size(), len(d3_var_names) + len(d2_var_names)))

    # rank =0 : complete calculations for summary file
    if me.get_rank() == 0:
        if not opts_dict['cumul']:
            gmall = np.concatenate((gm3d, gm2d), axis=0)
        else:
            gmall_temp = np.transpose(gmall[:, :])
            gmall = gmall_temp

        #PCA prep and calculation
        mu_gm, sigma_gm, standardized_global_mean, loadings_gm, scores_gm, b_exit = pyEnsLib.pre_PCA(
            gmall, all_var_names, var_list, me)

        #if PCA calc encounters an error, then remove the summary file and exit
        if b_exit:
            nc_sumfile.close()
            os.unlink(this_sumfile)
            print("STATUS: Summary could not be created.")
            sys.exit(2)

        v_gm[:, :] = gmall[:, :]
        v_standardized_gm[:, :] = standardized_global_mean[:, :]
        v_mu_gm[:] = mu_gm[:]
        v_sigma_gm[:] = sigma_gm[:]
        v_loadings_gm[:, :] = loadings_gm[:, :]
        v_sigma_scores_gm[:] = scores_gm[:]

        print("STATUS: Summary file is complete.")

        nc_sumfile.close()

Example #13

Show file

def main(argv):
    print 'Running pyEnsSumPop!'

    # Get command line stuff and store in a dictionary
    s = 'nyear= nmonth= npert= tag= res= mach= compset= sumfile= indir= tslice= verbose jsonfile= mpi_enable zscoreonly nrand= rand seq= jsondir='
    optkeys = s.split()
    try:
        opts, args = getopt.getopt(argv, "h", optkeys)
    except getopt.GetoptError:
        pyEnsLib.EnsSumPop_usage()
        sys.exit(2)

    # Put command line options in a dictionary - also set defaults
    opts_dict = {}

    # Defaults
    opts_dict['tag'] = 'cesm1_2_0'
    opts_dict['compset'] = 'FC5'
    opts_dict['mach'] = 'yellowstone'
    opts_dict['tslice'] = 0
    opts_dict['nyear'] = 3
    opts_dict['nmonth'] = 12
    opts_dict['npert'] = 40
    opts_dict['nbin'] = 40
    opts_dict['minrange'] = 0.0
    opts_dict['maxrange'] = 4.0
    opts_dict['res'] = 'ne30_ne30'
    opts_dict['sumfile'] = 'ens.pop.summary.nc'
    opts_dict['indir'] = './'
    opts_dict['jsonfile'] = ''
    opts_dict['verbose'] = True
    opts_dict['mpi_enable'] = False
    opts_dict['zscoreonly'] = False
    opts_dict['popens'] = True
    opts_dict['nrand'] = 40
    opts_dict['rand'] = False
    opts_dict['seq'] = 0
    opts_dict['jsondir'] = '/glade/scratch/haiyingx/'

    # This creates the dictionary of input arguments
    print "before parseconfig"
    opts_dict = pyEnsLib.getopt_parseconfig(opts, optkeys, 'ESP', opts_dict)

    verbose = opts_dict['verbose']
    nbin = opts_dict['nbin']

    if verbose:
        print opts_dict

    # Now find file names in indir
    input_dir = opts_dict['indir']

    # Create a mpi simplecomm object
    if opts_dict['mpi_enable']:
        me = simplecomm.create_comm()
    else:
        me = simplecomm.create_comm(not opts_dict['mpi_enable'])
    if opts_dict['jsonfile']:
        # Read in the included var list
        Var2d, Var3d = pyEnsLib.read_jsonlist(opts_dict['jsonfile'], 'ESP')
        str_size = 0
        for str in Var3d:
            if str_size < len(str):
                str_size = len(str)
        for str in Var2d:
            if str_size < len(str):
                str_size = len(str)

    in_files = []
    if (os.path.exists(input_dir)):
        # Pick up the 'nrand' random number of input files to generate summary files
        if opts_dict['rand']:
            in_files = pyEnsLib.Random_pickup_pop(input_dir, opts_dict,
                                                  opts_dict['nrand'])
        else:
            # Get the list of files
            in_files_temp = os.listdir(input_dir)
            in_files = sorted(in_files_temp)
        # Make sure we have enough
        num_files = len(in_files)
    else:
        print 'Input directory: ', input_dir, ' not found'
        sys.exit(2)

    # Create a mpi simplecomm object
    if opts_dict['mpi_enable']:
        me = simplecomm.create_comm()
    else:
        me = simplecomm.create_comm(not opts_dict['mpi_enable'])
    #Partition the input file list
    in_file_list = me.partition(in_files, func=EqualStride(), involved=True)

    # Open the files in the input directory
    o_files = []
    for onefile in in_file_list:
        if (os.path.isfile(input_dir + '/' + onefile)):
            o_files.append(Nio.open_file(input_dir + '/' + onefile, "r"))
        else:
            print "COULD NOT LOCATE FILE " + input_dir + onefile + "! EXITING...."
            sys.exit()

    print in_file_list

    # Store dimensions of the input fields
    if (verbose == True):
        print "Getting spatial dimensions"
    nlev = -1
    nlat = -1
    nlon = -1

    # Look at first file and get dims
    input_dims = o_files[0].dimensions
    ndims = len(input_dims)

    # Make sure all files have the same dimensions
    for key in input_dims:
        if key == "z_t":
            nlev = input_dims["z_t"]
        elif key == "nlon":
            nlon = input_dims["nlon"]
        elif key == "nlat":
            nlat = input_dims["nlat"]

    for count, this_file in enumerate(o_files):
        input_dims = this_file.dimensions
        if ( nlev != int(input_dims["z_t"]) or ( nlat != int(input_dims["nlat"]))\
              or ( nlon != int(input_dims["nlon"]))):
            print "Dimension mismatch between ", in_file_list[
                0], 'and', in_file_list[count], '!!!'
            sys.exit()

    # Create new summary ensemble file
    this_sumfile = opts_dict["sumfile"]

    if verbose:
        print "Creating ", this_sumfile, "  ..."
    if (me.get_rank() == 0):
        if os.path.exists(this_sumfile):
            os.unlink(this_sumfile)
        opt = Nio.options()
        opt.PreFill = False
        opt.Format = 'NetCDF4Classic'

        nc_sumfile = Nio.open_file(this_sumfile, 'w', options=opt)

        # Set dimensions
        if (verbose == True):
            print "Setting dimensions ....."
        nc_sumfile.create_dimension('nlat', nlat)
        nc_sumfile.create_dimension('nlon', nlon)
        nc_sumfile.create_dimension('nlev', nlev)
        nc_sumfile.create_dimension('time', None)
        nc_sumfile.create_dimension('ens_size', opts_dict['npert'])
        nc_sumfile.create_dimension('nbin', opts_dict['nbin'])
        nc_sumfile.create_dimension('nvars', len(Var3d) + len(Var2d))
        nc_sumfile.create_dimension('nvars3d', len(Var3d))
        nc_sumfile.create_dimension('nvars2d', len(Var2d))
        nc_sumfile.create_dimension('str_size', str_size)

        # Set global attributes
        now = time.strftime("%c")
        if (verbose == True):
            print "Setting global attributes ....."
        setattr(nc_sumfile, 'creation_date', now)
        setattr(nc_sumfile, 'title', 'POP verification ensemble summary file')
        setattr(nc_sumfile, 'tag', opts_dict["tag"])
        setattr(nc_sumfile, 'compset', opts_dict["compset"])
        setattr(nc_sumfile, 'resolution', opts_dict["res"])
        setattr(nc_sumfile, 'machine', opts_dict["mach"])

        # Create variables
        if (verbose == True):
            print "Creating variables ....."
        v_lev = nc_sumfile.create_variable("lev", 'f', ('nlev', ))
        v_vars = nc_sumfile.create_variable("vars", 'S1',
                                            ('nvars', 'str_size'))
        v_var3d = nc_sumfile.create_variable("var3d", 'S1',
                                             ('nvars3d', 'str_size'))
        v_var2d = nc_sumfile.create_variable("var2d", 'S1',
                                             ('nvars2d', 'str_size'))
        v_time = nc_sumfile.create_variable("time", 'd', ('time', ))
        v_ens_avg3d = nc_sumfile.create_variable(
            "ens_avg3d", 'f', ('time', 'nvars3d', 'nlev', 'nlat', 'nlon'))
        v_ens_stddev3d = nc_sumfile.create_variable(
            "ens_stddev3d", 'f', ('time', 'nvars3d', 'nlev', 'nlat', 'nlon'))
        v_ens_avg2d = nc_sumfile.create_variable(
            "ens_avg2d", 'f', ('time', 'nvars2d', 'nlat', 'nlon'))
        v_ens_stddev2d = nc_sumfile.create_variable(
            "ens_stddev2d", 'f', ('time', 'nvars2d', 'nlat', 'nlon'))

        v_RMSZ = nc_sumfile.create_variable(
            "RMSZ", 'f', ('time', 'nvars', 'ens_size', 'nbin'))
        if not opts_dict['zscoreonly']:
            v_gm = nc_sumfile.create_variable("global_mean", 'f',
                                              ('time', 'nvars', 'ens_size'))

        # Assign vars, var3d and var2d
        if (verbose == True):
            print "Assigning vars, var3d, and var2d ....."

        eq_all_var_names = []
        eq_d3_var_names = []
        eq_d2_var_names = []
        all_var_names = list(Var3d)
        all_var_names += Var2d
        l_eq = len(all_var_names)
        for i in range(l_eq):
            tt = list(all_var_names[i])
            l_tt = len(tt)
            if (l_tt < str_size):
                extra = list(' ') * (str_size - l_tt)
                tt.extend(extra)
            eq_all_var_names.append(tt)

        l_eq = len(Var3d)
        for i in range(l_eq):
            tt = list(Var3d[i])
            l_tt = len(tt)
            if (l_tt < str_size):
                extra = list(' ') * (str_size - l_tt)
                tt.extend(extra)
            eq_d3_var_names.append(tt)

        l_eq = len(Var2d)
        for i in range(l_eq):
            tt = list(Var2d[i])
            l_tt = len(tt)
            if (l_tt < str_size):
                extra = list(' ') * (str_size - l_tt)
                tt.extend(extra)
            eq_d2_var_names.append(tt)

        v_vars[:] = eq_all_var_names[:]
        v_var3d[:] = eq_d3_var_names[:]
        v_var2d[:] = eq_d2_var_names[:]

        # Time-invarient metadata
        if (verbose == True):
            print "Assigning time invariant metadata ....."
        vars_dict = o_files[0].variables
        lev_data = vars_dict["z_t"]
        v_lev = lev_data

    # Time-varient metadata
    if verbose:
        print "Assigning time variant metadata ....."
    vars_dict = o_files[0].variables
    time_value = vars_dict['time']
    time_array = np.array([time_value])
    time_array = pyEnsLib.gather_npArray_pop(time_array, me, (me.get_size(), ))
    if me.get_rank() == 0:
        v_time[:] = time_array[:]

    # Calculate global mean, average, standard deviation
    if verbose:
        print "Calculating global means ....."
    is_SE = False
    tslice = 0
    if not opts_dict['zscoreonly']:
        gm3d, gm2d = pyEnsLib.generate_global_mean_for_summary(
            o_files, Var3d, Var2d, is_SE, False, opts_dict)
    if verbose:
        print "Finish calculating global means ....."

    # Calculate RMSZ scores
    if (verbose == True):
        print "Calculating RMSZ scores ....."
    zscore3d, zscore2d, ens_avg3d, ens_stddev3d, ens_avg2d, ens_stddev2d, temp1, temp2 = pyEnsLib.calc_rmsz(
        o_files, Var3d, Var2d, is_SE, opts_dict)

    # Collect from all processors
    if opts_dict['mpi_enable']:
        # Gather the 3d variable results from all processors to the master processor
        # Gather global means 3d results
        if not opts_dict['zscoreonly']:
            gmall = np.concatenate((gm3d, gm2d), axis=0)
            #print "before gather, gmall.shape=",gmall.shape
            gmall = pyEnsLib.gather_npArray_pop(
                gmall, me,
                (me.get_size(), len(Var3d) + len(Var2d), len(o_files)))
        zmall = np.concatenate((zscore3d, zscore2d), axis=0)
        zmall = pyEnsLib.gather_npArray_pop(
            zmall, me,
            (me.get_size(), len(Var3d) + len(Var2d), len(o_files), nbin))
        #print 'zmall=',zmall

        #print "after gather, gmall.shape=",gmall.shape
        ens_avg3d = pyEnsLib.gather_npArray_pop(
            ens_avg3d, me, (me.get_size(), len(Var3d), nlev, (nlat), nlon))
        ens_avg2d = pyEnsLib.gather_npArray_pop(ens_avg2d, me,
                                                (me.get_size(), len(Var2d),
                                                 (nlat), nlon))
        ens_stddev3d = pyEnsLib.gather_npArray_pop(
            ens_stddev3d, me, (me.get_size(), len(Var3d), nlev, (nlat), nlon))
        ens_stddev2d = pyEnsLib.gather_npArray_pop(ens_stddev2d, me,
                                                   (me.get_size(), len(Var2d),
                                                    (nlat), nlon))

    # Assign to file:
    if me.get_rank() == 0:
        #Zscoreall=np.concatenate((zscore3d,zscore2d),axis=0)
        v_RMSZ[:, :, :, :] = zmall[:, :, :, :]
        v_ens_avg3d[:, :, :, :, :] = ens_avg3d[:, :, :, :, :]
        v_ens_stddev3d[:, :, :, :, :] = ens_stddev3d[:, :, :, :, :]
        v_ens_avg2d[:, :, :, :] = ens_avg2d[:, :, :, :]
        v_ens_stddev2d[:, :, :, :] = ens_stddev2d[:, :, :, :]
        if not opts_dict['zscoreonly']:
            v_gm[:, :, :] = gmall[:, :, :]
        print "All done"

Example #14

Show file

        v_var2d[:] = eq_d2_var_names[:]

        # Time-invarient metadata
        if me.get_rank() == 0 and (verbose == True):
            print "Assigning time invariant metadata ....."
        lev_data = vars_dict["lev"]
        v_lev = lev_data

    # Form ensembles, each missing one member; compute RMSZs and global means
    #for each variable, we also do max norm also (currently done in pyStats)
    tslice = opts_dict['tslice']

    if not opts_dict['cumul']:
        # Partition the var list
        
        var3_list_loc=me.partition(d3_var_names,func=EqualStride(),involved=True)
        var2_list_loc=me.partition(d2_var_names,func=EqualStride(),involved=True)
    else:
        var3_list_loc=d3_var_names
        var2_list_loc=d2_var_names

    # Calculate global means #
    if me.get_rank() == 0 and (verbose == True):
        print "Calculating global means ....."
    if not opts_dict['cumul']:
        gm3d,gm2d,var_list = pyEnsLib.generate_global_mean_for_summary(o_files,var3_list_loc,var2_list_loc , is_SE, False,opts_dict)
    if me.get_rank() == 0 and (verbose == True):
        print "Finish calculating global means ....."

    # Calculate RMSZ scores  
    if (not opts_dict['gmonly']) | (opts_dict['cumul']):

Example #15

Show file

File: pyEnsSumPop.py Project: yelizy/cime

def main(argv):

    # Get command line stuff and store in a dictionary
    s = 'nyear= nmonth= npert= tag= res= mach= compset= sumfile= indir= tslice= verbose jsonfile= mpi_enable mpi_disable nrand= rand seq= jsondir= esize='
    optkeys = s.split()
    try:
        opts, args = getopt.getopt(argv, "h", optkeys)
    except getopt.GetoptError:
        pyEnsLib.EnsSumPop_usage()
        sys.exit(2)

    # Put command line options in a dictionary - also set defaults
    opts_dict = {}

    # Defaults
    opts_dict['tag'] = 'cesm2_1_0'
    opts_dict['compset'] = 'G'
    opts_dict['mach'] = 'cheyenne'
    opts_dict['tslice'] = 0
    opts_dict['nyear'] = 1
    opts_dict['nmonth'] = 12
    opts_dict['esize'] = 40
    opts_dict['npert'] = 0  #for backwards compatible
    opts_dict['nbin'] = 40
    opts_dict['minrange'] = 0.0
    opts_dict['maxrange'] = 4.0
    opts_dict['res'] = 'T62_g17'
    opts_dict['sumfile'] = 'pop.ens.summary.nc'
    opts_dict['indir'] = './'
    opts_dict['jsonfile'] = 'pop_ensemble.json'
    opts_dict['verbose'] = True
    opts_dict['mpi_enable'] = True
    opts_dict['mpi_disable'] = False
    #opts_dict['zscoreonly'] = True
    opts_dict['popens'] = True
    opts_dict['nrand'] = 40
    opts_dict['rand'] = False
    opts_dict['seq'] = 0
    opts_dict['jsondir'] = './'

    # This creates the dictionary of input arguments
    #print "before parseconfig"
    opts_dict = pyEnsLib.getopt_parseconfig(opts, optkeys, 'ESP', opts_dict)

    verbose = opts_dict['verbose']
    nbin = opts_dict['nbin']

    if opts_dict['mpi_disable']:
        opts_dict['mpi_enable'] = False

    #still have npert for backwards compatibility - check if it was set
    #and override esize
    if opts_dict['npert'] > 0:
        user_size = opts_dict['npert']
        print(
            'WARNING: User specified value for --npert will override --esize.  Please consider using --esize instead of --npert in the future.'
        )
        opts_dict['esize'] = user_size

    # Now find file names in indir
    input_dir = opts_dict['indir']

    # Create a mpi simplecomm object
    if opts_dict['mpi_enable']:
        me = simplecomm.create_comm()
    else:
        me = simplecomm.create_comm(False)

    if opts_dict['jsonfile']:
        # Read in the included var list
        Var2d, Var3d = pyEnsLib.read_jsonlist(opts_dict['jsonfile'], 'ESP')
        str_size = 0
        for str in Var3d:
            if str_size < len(str):
                str_size = len(str)
        for str in Var2d:
            if str_size < len(str):
                str_size = len(str)

    if me.get_rank() == 0:
        print('STATUS: Running pyEnsSumPop!')

        if verbose:
            print("VERBOSE: opts_dict = ")
            print(opts_dict)

    in_files = []
    if (os.path.exists(input_dir)):
        # Pick up the 'nrand' random number of input files to generate summary files
        if opts_dict['rand']:
            in_files = pyEnsLib.Random_pickup_pop(input_dir, opts_dict,
                                                  opts_dict['nrand'])
        else:
            # Get the list of files
            in_files_temp = os.listdir(input_dir)
            in_files = sorted(in_files_temp)
        num_files = len(in_files)

    else:
        if me.get_rank() == 0:
            print('ERROR: Input directory: ', input_dir,
                  ' not found => EXITING....')
        sys.exit(2)

    #make sure we have enough files
    files_needed = opts_dict['nmonth'] * opts_dict['esize'] * opts_dict['nyear']
    if (num_files < files_needed):
        if me.get_rank() == 0:
            print(
                'ERROR: Input directory does not contain enough files (must be esize*nyear*nmonth = ',
                files_needed, ' ) and it has only ', num_files, ' files).')
        sys.exit(2)

    #Partition the input file list (ideally we have one processor per month)
    in_file_list = me.partition(in_files, func=EqualStride(), involved=True)

    # Check the files in the input directory
    full_in_files = []
    if me.get_rank() == 0 and opts_dict['verbose']:
        print('VERBOSE: Input files are:')

    for onefile in in_file_list:
        fname = input_dir + '/' + onefile
        if opts_dict['verbose']:
            print("my_rank = ", me.get_rank(), "  ", fname)
        if (os.path.isfile(fname)):
            full_in_files.append(fname)
        else:
            print("ERROR: Could not locate file: " + fname + " => EXITING....")
            sys.exit()

    #open just the first file (all procs)
    first_file = nc.Dataset(full_in_files[0], "r")

    # Store dimensions of the input fields
    if (verbose == True) and me.get_rank() == 0:
        print("VERBOSE: Getting spatial dimensions")
    nlev = -1
    nlat = -1
    nlon = -1

    # Look at first file and get dims
    input_dims = first_file.dimensions
    ndims = len(input_dims)

    # Make sure all files have the same dimensions
    if (verbose == True) and me.get_rank() == 0:
        print("VERBOSE: Checking dimensions ...")
    for key in input_dims:
        if key == "z_t":
            nlev = len(input_dims["z_t"])
        elif key == "nlon":
            nlon = len(input_dims["nlon"])
        elif key == "nlat":
            nlat = len(input_dims["nlat"])

    # Rank 0: prepare new summary ensemble file
    this_sumfile = opts_dict["sumfile"]
    if (me.get_rank() == 0):
        if os.path.exists(this_sumfile):
            os.unlink(this_sumfile)

        if verbose:
            print("VERBOSE: Creating ", this_sumfile, "  ...")

        nc_sumfile = nc.Dataset(this_sumfile, "w", format="NETCDF4_CLASSIC")

        # Set dimensions
        if verbose:
            print("VERBOSE: Setting dimensions .....")
        nc_sumfile.createDimension('nlat', nlat)
        nc_sumfile.createDimension('nlon', nlon)
        nc_sumfile.createDimension('nlev', nlev)
        nc_sumfile.createDimension('time', None)
        nc_sumfile.createDimension('ens_size', opts_dict['esize'])
        nc_sumfile.createDimension('nbin', opts_dict['nbin'])
        nc_sumfile.createDimension('nvars', len(Var3d) + len(Var2d))
        nc_sumfile.createDimension('nvars3d', len(Var3d))
        nc_sumfile.createDimension('nvars2d', len(Var2d))
        nc_sumfile.createDimension('str_size', str_size)

        # Set global attributes
        now = time.strftime("%c")
        if verbose:
            print("VERBOSE: Setting global attributes .....")
        nc_sumfile.creation_date = now
        nc_sumfile.title = 'POP verification ensemble summary file'
        nc_sumfile.tag = opts_dict["tag"]
        nc_sumfile.compset = opts_dict["compset"]
        nc_sumfile.resolution = opts_dict["res"]
        nc_sumfile.machine = opts_dict["mach"]

        # Create variables
        if verbose:
            print("VERBOSE: Creating variables .....")
        v_lev = nc_sumfile.createVariable("z_t", 'f', ('nlev', ))
        v_vars = nc_sumfile.createVariable("vars", 'S1', ('nvars', 'str_size'))
        v_var3d = nc_sumfile.createVariable("var3d", 'S1',
                                            ('nvars3d', 'str_size'))
        v_var2d = nc_sumfile.createVariable("var2d", 'S1',
                                            ('nvars2d', 'str_size'))
        v_time = nc_sumfile.createVariable("time", 'd', ('time', ))
        v_ens_avg3d = nc_sumfile.createVariable(
            "ens_avg3d", 'f', ('time', 'nvars3d', 'nlev', 'nlat', 'nlon'))
        v_ens_stddev3d = nc_sumfile.createVariable(
            "ens_stddev3d", 'f', ('time', 'nvars3d', 'nlev', 'nlat', 'nlon'))
        v_ens_avg2d = nc_sumfile.createVariable(
            "ens_avg2d", 'f', ('time', 'nvars2d', 'nlat', 'nlon'))
        v_ens_stddev2d = nc_sumfile.createVariable(
            "ens_stddev2d", 'f', ('time', 'nvars2d', 'nlat', 'nlon'))
        v_RMSZ = nc_sumfile.createVariable(
            "RMSZ", 'f', ('time', 'nvars', 'ens_size', 'nbin'))

        # Assign vars, var3d and var2d
        if verbose:
            print("VERBOSE: Assigning vars, var3d, and var2d .....")

        eq_all_var_names = []
        eq_d3_var_names = []
        eq_d2_var_names = []
        all_var_names = list(Var3d)
        all_var_names += Var2d
        l_eq = len(all_var_names)
        for i in range(l_eq):
            tt = list(all_var_names[i])
            l_tt = len(tt)
            if (l_tt < str_size):
                extra = list(' ') * (str_size - l_tt)
                tt.extend(extra)
            eq_all_var_names.append(tt)

        l_eq = len(Var3d)
        for i in range(l_eq):
            tt = list(Var3d[i])
            l_tt = len(tt)
            if (l_tt < str_size):
                extra = list(' ') * (str_size - l_tt)
                tt.extend(extra)
            eq_d3_var_names.append(tt)

        l_eq = len(Var2d)
        for i in range(l_eq):
            tt = list(Var2d[i])
            l_tt = len(tt)
            if (l_tt < str_size):
                extra = list(' ') * (str_size - l_tt)
                tt.extend(extra)
            eq_d2_var_names.append(tt)

        v_vars[:] = eq_all_var_names[:]
        v_var3d[:] = eq_d3_var_names[:]
        v_var2d[:] = eq_d2_var_names[:]

        # Time-invarient metadata
        if verbose:
            print("VERBOSE: Assigning time invariant metadata .....")
        vars_dict = first_file.variables
        lev_data = vars_dict["z_t"]
        v_lev[:] = lev_data[:]

        #end of rank 0

    #All:
    # Time-varient metadata
    if verbose:
        if me.get_rank() == 0:
            print("VERBOSE: Assigning time variant metadata .....")
    vars_dict = first_file.variables
    time_value = vars_dict['time']
    time_array = np.array([time_value])
    time_array = pyEnsLib.gather_npArray_pop(time_array, me, (me.get_size(), ))
    if me.get_rank() == 0:
        v_time[:] = time_array[:]

    #Assign zero values to first time slice of RMSZ and avg and stddev for 2d & 3d
    #in case of a calculation problem before finishing
    e_size = opts_dict['esize']
    b_size = opts_dict['nbin']
    z_ens_avg3d = np.zeros((len(Var3d), nlev, nlat, nlon), dtype=np.float32)
    z_ens_stddev3d = np.zeros((len(Var3d), nlev, nlat, nlon), dtype=np.float32)
    z_ens_avg2d = np.zeros((len(Var2d), nlat, nlon), dtype=np.float32)
    z_ens_stddev2d = np.zeros((len(Var2d), nlat, nlon), dtype=np.float32)
    z_RMSZ = np.zeros(((len(Var3d) + len(Var2d)), e_size, b_size),
                      dtype=np.float32)

    #rank 0 (put zero values in summary file)
    if me.get_rank() == 0:
        v_RMSZ[0, :, :, :] = z_RMSZ[:, :, :]
        v_ens_avg3d[0, :, :, :, :] = z_ens_avg3d[:, :, :, :]
        v_ens_stddev3d[0, :, :, :, :] = z_ens_stddev3d[:, :, :, :]
        v_ens_avg2d[0, :, :, :] = z_ens_avg2d[:, :, :]
        v_ens_stddev2d[0, :, :, :] = z_ens_stddev2d[:, :, :]

    #close file[0]
    first_file.close()

    # Calculate RMSZ scores
    if (verbose == True and me.get_rank() == 0):
        print("VERBOSE: Calculating RMSZ scores .....")

    zscore3d, zscore2d, ens_avg3d, ens_stddev3d, ens_avg2d, ens_stddev2d = pyEnsLib.calc_rmsz(
        full_in_files, Var3d, Var2d, opts_dict)

    if (verbose == True and me.get_rank() == 0):
        print("VERBOSE: Finished with RMSZ scores .....")

    # Collect from all processors
    if opts_dict['mpi_enable']:
        # Gather the 3d variable results from all processors to the master processor

        zmall = np.concatenate((zscore3d, zscore2d), axis=0)
        zmall = pyEnsLib.gather_npArray_pop(
            zmall, me,
            (me.get_size(), len(Var3d) + len(Var2d), len(full_in_files), nbin))

        ens_avg3d = pyEnsLib.gather_npArray_pop(
            ens_avg3d, me, (me.get_size(), len(Var3d), nlev, (nlat), nlon))
        ens_avg2d = pyEnsLib.gather_npArray_pop(ens_avg2d, me,
                                                (me.get_size(), len(Var2d),
                                                 (nlat), nlon))
        ens_stddev3d = pyEnsLib.gather_npArray_pop(
            ens_stddev3d, me, (me.get_size(), len(Var3d), nlev, (nlat), nlon))
        ens_stddev2d = pyEnsLib.gather_npArray_pop(ens_stddev2d, me,
                                                   (me.get_size(), len(Var2d),
                                                    (nlat), nlon))

    # Assign to summary file:
    if me.get_rank() == 0:

        v_RMSZ[:, :, :, :] = zmall[:, :, :, :]
        v_ens_avg3d[:, :, :, :, :] = ens_avg3d[:, :, :, :, :]
        v_ens_stddev3d[:, :, :, :, :] = ens_stddev3d[:, :, :, :, :]
        v_ens_avg2d[:, :, :, :] = ens_avg2d[:, :, :, :]
        v_ens_stddev2d[:, :, :, :] = ens_stddev2d[:, :, :, :]

        print("STATUS: PyEnsSumPop has completed.")

        nc_sumfile.close()

Example #16

Show file

def main(argv):

    # Get command line stuff and store in a dictionary
    s = """verbose sumfile= indir= input_globs= tslice= nPC= sigMul= 
         minPCFail= minRunFail= numRunFile= printVarTest popens 
         jsonfile= mpi_enable nbin= minrange= maxrange= outfile= 
         casejson= npick= pepsi_gm pop_tol= web_enabled
         pop_threshold= prn_std_mean fIndex= lev= eet= json_case= """
    optkeys = s.split()
    try:
        opts, args = getopt.getopt(argv, "h", optkeys)
    except getopt.GetoptError:
        pyEnsLib.CECT_usage()
        sys.exit(2)

    # Set the default value for options
    opts_dict = {}
    opts_dict['input_globs'] = ''
    opts_dict['indir'] = ''
    opts_dict['tslice'] = 1
    opts_dict['nPC'] = 50
    opts_dict['sigMul'] = 2
    opts_dict['verbose'] = False
    opts_dict['minPCFail'] = 3
    opts_dict['minRunFail'] = 2
    opts_dict['numRunFile'] = 3
    opts_dict['printVarTest'] = False
    opts_dict['popens'] = False
    opts_dict['jsonfile'] = ''
    opts_dict['mpi_enable'] = False
    opts_dict['nbin'] = 40
    opts_dict['minrange'] = 0.0
    opts_dict['maxrange'] = 4.0
    opts_dict['outfile'] = 'testcase.result'
    opts_dict['casejson'] = ''
    opts_dict['npick'] = 10
    opts_dict['pepsi_gm'] = False
    opts_dict['test_failure'] = True
    opts_dict['pop_tol'] = 3.0
    opts_dict['pop_threshold'] = 0.90
    opts_dict['prn_std_mean'] = False
    opts_dict['lev'] = 0
    opts_dict['eet'] = 0
    opts_dict['json_case'] = ''
    opts_dict['sumfile'] = ''
    opts_dict['web_enabled'] = False

    # Call utility library getopt_parseconfig to parse the option keys
    # and save to the dictionary
    caller = 'CECT'
    gmonly = False
    opts_dict = pyEnsLib.getopt_parseconfig(opts, optkeys, caller, opts_dict)
    popens = opts_dict['popens']
    #some mods for POP-ECT
    if popens == True:
        opts_dict['tslice'] = 0
        opts_dict['numRunFile'] = 1
        opts_dict['eet'] = 0
        opts_dict['mpi_enable'] = False

        #print opts_dict

    # Create a mpi simplecomm object
    if opts_dict['mpi_enable']:
        me = simplecomm.create_comm()
    else:
        me = simplecomm.create_comm(not opts_dict['mpi_enable'])

    # Print out timestamp, input ensemble file and new run directory
    dt = datetime.now()
    verbose = opts_dict['verbose']
    if me.get_rank() == 0:
        print '--------pyCECT--------'
        print ' '
        print dt.strftime("%A, %d. %B %Y %I:%M%p")
        print ' '
        if not opts_dict['web_enabled']:
            print 'Ensemble summary file = ' + opts_dict['sumfile']
        print ' '
        print 'Testcase file directory = ' + opts_dict['indir']
        print ' '
        print ' '

    # Ensure sensible EET value
    if opts_dict['eet'] and opts_dict['numRunFile'] > opts_dict['eet']:
        pyEnsLib.CECT_usage()
        sys.exit(2)

    ifiles = []
    in_files = []
    # Random pick pop files from not_pick_files list
    if opts_dict['casejson']:
        with open(opts_dict['casejson']) as fin:
            result = json.load(fin)
            in_files_first = result['not_pick_files']
            in_files = random.sample(in_files_first, opts_dict['npick'])
            print 'Testcase files:'
            print '\n'.join(in_files)

    elif opts_dict['json_case']:
        json_file = opts_dict['json_case']
        if (os.path.exists(json_file)):
            fd = open(json_file)
            metainfo = json.load(fd)
            if 'CaseName' in metainfo:
                casename = metainfo['CaseName']
                if (os.path.exists(opts_dict['indir'])):
                    for name in casename:
                        wildname = '*.' + name + '.*'
                        full_glob_str = os.path.join(opts_dict['indir'],
                                                     wildname)
                        glob_file = glob.glob(full_glob_str)
                        in_files.extend(glob_file)
        else:
            print "ERROR: " + opts_dict['json_case'] + " does not exist."
            sys.exit()
        print "in_files=", in_files
    else:
        wildname = '*' + str(opts_dict['input_globs']) + '*'
        # Open all input files
        if (os.path.exists(opts_dict['indir'])):
            full_glob_str = os.path.join(opts_dict['indir'], wildname)
            glob_files = glob.glob(full_glob_str)
            in_files.extend(glob_files)
            num_file = len(in_files)
            if num_file == 0:
                print "ERROR: no matching files for wildcard=" + wildname + " found in specified --indir"
                sys.exit()
            else:
                print "Found " + str(
                    num_file) + " matching files in specified --indir"
            if opts_dict['numRunFile'] > num_file:
                print "ERROR: more files needed (" + str(
                    opts_dict['numRunFile']
                ) + ") than available in the indir (" + str(num_file) + ")."
                sys.exit()
            #in_files_temp=os.listdir(opts_dict['indir'])
    in_files.sort()
    #print in_files

    if popens:
        #Partition the input file list
        in_files_list = me.partition(in_files,
                                     func=EqualStride(),
                                     involved=True)

    else:
        # Random pick non pop files
        in_files_list = pyEnsLib.Random_pickup(in_files, opts_dict)
        #in_files_list=in_files

    for frun_file in in_files_list:
        if frun_file.find(opts_dict['indir']) != -1:
            frun_temp = frun_file
        else:
            frun_temp = opts_dict['indir'] + '/' + frun_file
        if (os.path.isfile(frun_temp)):
            ifiles.append(Nio.open_file(frun_temp, "r"))
        else:
            print "ERROR: COULD NOT LOCATE FILE " + frun_temp
            sys.exit()

    if opts_dict['web_enabled']:
        if len(opts_dict['sumfile']) == 0:
            opts_dict[
                'sumfile'] = '/glade/p/cesmdata/cseg/inputdata/validation/'
        opts_dict['sumfile'], machineid, compiler = pyEnsLib.search_sumfile(
            opts_dict, ifiles)
        if len(machineid) != 0 and len(compiler) != 0:
            print ' '
            print 'Validation file    : machineid = ' + machineid + ', compiler = ' + compiler
            print 'Found summary file : ' + opts_dict['sumfile']
            print ' '
        else:
            print 'Warning: machine and compiler are unknown'

    if popens:

        # Read in the included var list
        if not os.path.exists(opts_dict['jsonfile']):
            print "ERROR: POP-ECT requires the specification of a valid json file via --jsonfile."
            sys.exit()
        Var2d, Var3d = pyEnsLib.read_jsonlist(opts_dict['jsonfile'], 'ESP')
        print ' '
        print 'Z-score tolerance = ' + '{:3.2f}'.format(opts_dict['pop_tol'])
        print 'ZPR = ' + '{:.2%}'.format(opts_dict['pop_threshold'])
        zmall, n_timeslice = pyEnsLib.pop_compare_raw_score(
            opts_dict, ifiles, me.get_rank(), Var3d, Var2d)
        #zmall = np.concatenate((Zscore3d,Zscore2d),axis=0)
        np.set_printoptions(threshold=np.nan)

        if opts_dict['mpi_enable']:
            zmall = pyEnsLib.gather_npArray_pop(
                zmall, me, (me.get_size(), len(Var3d) + len(Var2d),
                            len(ifiles), opts_dict['nbin']))
            if me.get_rank() == 0:
                fout = open(opts_dict['outfile'], "w")
                for i in range(me.get_size()):
                    for j in zmall[i]:
                        np.savetxt(fout, j, fmt='%-7.2e')
    #cam
    else:
        # Read all variables from the ensemble summary file
        ens_var_name, ens_avg, ens_stddev, ens_rmsz, ens_gm, num_3d, mu_gm, sigma_gm, loadings_gm, sigma_scores_gm, is_SE_sum, std_gm = pyEnsLib.read_ensemble_summary(
            opts_dict['sumfile'])

        if len(ens_rmsz) == 0:
            gmonly = True
        # Add ensemble rmsz and global mean to the dictionary "variables"
        variables = {}
        if not gmonly:
            for k, v in ens_rmsz.iteritems():
                pyEnsLib.addvariables(variables, k, 'zscoreRange', v)

        for k, v in ens_gm.iteritems():
            pyEnsLib.addvariables(variables, k, 'gmRange', v)

        # Get 3d variable name list and 2d variable name list separately
        var_name3d = []
        var_name2d = []
        for vcount, v in enumerate(ens_var_name):
            if vcount < num_3d:
                var_name3d.append(v)
            else:
                var_name2d.append(v)

        # Get ncol and nlev value
        npts3d, npts2d, is_SE = pyEnsLib.get_ncol_nlev(ifiles[0])

        if (is_SE ^ is_SE_sum):
            print 'Warning: please note the ensemble summary file is different from the testing files, they use different grids'

        # Compare the new run and the ensemble summary file to get rmsz score
        results = {}
        countzscore = np.zeros(len(ifiles), dtype=np.int32)
        countgm = np.zeros(len(ifiles), dtype=np.int32)
        if not gmonly:
            for fcount, fid in enumerate(ifiles):
                otimeSeries = fid.variables
                for var_name in ens_var_name:
                    orig = otimeSeries[var_name]
                    Zscore, has_zscore = pyEnsLib.calculate_raw_score(
                        var_name, orig[opts_dict['tslice']], npts3d, npts2d,
                        ens_avg, ens_stddev, is_SE, opts_dict, 0, 0, 0)
                    if has_zscore:
                        # Add the new run rmsz zscore to the dictionary "results"
                        pyEnsLib.addresults(results, 'zscore', Zscore,
                                            var_name, 'f' + str(fcount))

            # Evaluate the new run rmsz score if is in the range of the ensemble summary rmsz zscore range
            for fcount, fid in enumerate(ifiles):
                countzscore[fcount] = pyEnsLib.evaluatestatus(
                    'zscore', 'zscoreRange', variables, 'ens', results,
                    'f' + str(fcount))

        # Calculate the new run global mean
        mean3d, mean2d, varlist = pyEnsLib.generate_global_mean_for_summary(
            ifiles, var_name3d, var_name2d, is_SE, opts_dict['pepsi_gm'],
            opts_dict)
        means = np.concatenate((mean3d, mean2d), axis=0)

        # Add the new run global mean to the dictionary "results"
        for i in range(means.shape[1]):
            for j in range(means.shape[0]):
                pyEnsLib.addresults(results, 'means', means[j][i],
                                    ens_var_name[j], 'f' + str(i))

        # Evaluate the new run global mean if it is in the range of the ensemble summary global mean range
        for fcount, fid in enumerate(ifiles):
            countgm[fcount] = pyEnsLib.evaluatestatus('means', 'gmRange',
                                                      variables, 'gm', results,
                                                      'f' + str(fcount))

        # Calculate the PCA scores of the new run
        new_scores, var_list, comp_std_gm = pyEnsLib.standardized(
            means, mu_gm, sigma_gm, loadings_gm, ens_var_name, opts_dict,
            ens_avg, me)
        run_index, decision = pyEnsLib.comparePCAscores(
            ifiles, new_scores, sigma_scores_gm, opts_dict, me)

        # If there is failure, plot out standardized mean and compared standardized mean in box plots
        if opts_dict['prn_std_mean'] and decision == 'FAILED':
            import seaborn as sns
            category = {
                "all_outside99": [],
                "two_outside99": [],
                "one_outside99": [],
                "all_oneside_outside1QR": []
            }
            b = list(pyEnsLib.chunk(ens_var_name, 10))
            for f, alist in enumerate(b):
                for fc, avar in enumerate(alist):
                    dist_995 = np.percentile(std_gm[avar], 99.5)
                    dist_75 = np.percentile(std_gm[avar], 75)
                    dist_25 = np.percentile(std_gm[avar], 25)
                    dist_05 = np.percentile(std_gm[avar], 0.5)
                    c = 0
                    d = 0
                    p = 0
                    q = 0
                    for i in range(comp_std_gm[f + fc].size):
                        if comp_std_gm[f + fc][i] > dist_995:
                            c = c + 1
                        elif comp_std_gm[f + fc][i] < dist_05:
                            d = d + 1
                        elif (comp_std_gm[f + fc][i] < dist_995
                              and comp_std_gm[f + fc][i] > dist_75):
                            p = p + 1
                        elif (comp_std_gm[f + fc][i] > dist_05
                              and comp_std_gm[f + fc][i] < dist_25):
                            q = q + 1
                    if c == 3 or d == 3:
                        category["all_outside99"].append((avar, f + fc))
                    elif c == 2 or d == 2:
                        category["two_outside99"].append((avar, f + fc))
                    elif c == 1 or d == 1:
                        category["one_outside99"].append((avar, f + fc))
                    if p == 3 or q == 3:
                        category["all_oneside_outside1QR"].append(
                            (avar, f + fc))
            part_name = opts_dict['indir'].split('/')[-1]
            if not part_name:
                part_name = opts_dict['indir'].split('/')[-2]
            for key in sorted(category):
                list_array = []
                list_array2 = []
                list_var = []
                value = category[key]
                print "value len=", key, len(value)
                for each_var in value:
                    list_array.append(std_gm[each_var[0]])
                    list_array2.append(comp_std_gm[each_var[1]])
                    list_var.append(each_var[0])
                if len(value) != 0:
                    ax = sns.boxplot(data=list_array,
                                     whis=[0.5, 99.5],
                                     fliersize=0.0)
                    sns.stripplot(data=list_array2, jitter=True, color="r")
                    sns.plt.xticks(range(len(list_array)),
                                   list_var,
                                   fontsize=8,
                                   rotation=-45)
                    if decision == 'FAILED':
                        sns.plt.savefig(part_name + "_" + key + "_fail.png")
                    else:
                        sns.plt.savefig(part_name + "_" + key + "_pass.png")
                    sns.plt.clf()
            '''
            if len(run_index)>0:
               json_file=opts_dict['json_case']
               if (os.path.exists(json_file)):
                  fd=open(json_file)
                  metainfo=json.load(fd)
                  caseindex=metainfo['CaseIndex']
                  enspath=str(metainfo['EnsPath'][0])
                  #print caseindex
                  if (os.path.exists(enspath)):
                     i=0
                     comp_file=[]
                     search = '\.[0-9]{3}\.'
                     for name in in_files_list:
                        s=re.search(search,name)
                        in_files_index=s.group(0)
                        if in_files_index[1:4] in caseindex:
                           ens_index=str(caseindex[in_files_index[1:4]])
                           wildname='*.'+ens_index+'.*'
                           full_glob_str=os.path.join(enspath,wildname)
                           glob_file=glob.glob(full_glob_str)
                           comp_file.extend(glob_file)
                     print "comp_file=",comp_file                
                     pyEnsLib.plot_variable(in_files_list,comp_file,opts_dict,var_list,run_index,me)
            '''
        # Print out
        if opts_dict['printVarTest']:
            print '*********************************************** '
            print 'Variable-based testing (for reference only - not used to determine pass/fail)'
            print '*********************************************** '
            for fcount, fid in enumerate(ifiles):
                print ' '
                print 'Run ' + str(fcount + 1) + ":"
                print ' '
                if not gmonly:
                    print '***' + str(countzscore[fcount]), " of " + str(
                        len(ens_var_name)
                    ) + ' variables are outside of ensemble RMSZ distribution***'
                    pyEnsLib.printsummary(results, 'ens', 'zscore',
                                          'zscoreRange', (fcount), variables,
                                          'RMSZ')
                    print ' '
                print '***' + str(countgm[fcount]), " of " + str(
                    len(ens_var_name)
                ) + ' variables are outside of ensemble global mean distribution***'
                pyEnsLib.printsummary(results, 'gm', 'means', 'gmRange',
                                      fcount, variables, 'global mean')
                print ' '
                print '----------------------------------------------------------------------------'
    if me.get_rank() == 0:
        print ' '
        print "Testing complete."
        print ' '

Example #17

Show file

File: pyCECT.py Project: marysa/SimpleLand_cesm2_0_beta07

def main(argv):


    # Get command line stuff and store in a dictionary
    s='verbose sumfile= indir= input_globs= tslice= nPC= sigMul= minPCFail= minRunFail= numRunFile= printVarTest popens jsonfile= mpi_enable nbin= minrange= maxrange= outfile= casejson= npick= pepsi_gm test_failure pop_tol= pop_threshold='
    optkeys = s.split()
    try:
        opts, args = getopt.getopt(argv,"h",optkeys)
    except getopt.GetoptError:
        pyEnsLib.CECT_usage()
        sys.exit(2)


    # Set the default value for options
    opts_dict = {}
    opts_dict['input_globs'] = ''
    opts_dict['indir'] = ''
    opts_dict['tslice'] = 1
    opts_dict['nPC'] = 50
    opts_dict['sigMul'] = 2
    opts_dict['verbose'] = False
    opts_dict['minPCFail'] = 3
    opts_dict['minRunFail'] = 2
    opts_dict['numRunFile'] = 3
    opts_dict['printVarTest'] = False
    opts_dict['popens'] = False
    opts_dict['jsonfile'] = ''
    opts_dict['mpi_enable'] = False
    opts_dict['nbin'] = 40
    opts_dict['minrange'] = 0.0
    opts_dict['maxrange'] = 4.0
    opts_dict['outfile'] = 'testcase.result'
    opts_dict['casejson'] = ''
    opts_dict['npick'] = 10
    opts_dict['pepsi_gm'] = False
    opts_dict['test_failure'] = True
    opts_dict['pop_tol'] = 3.0
    opts_dict['pop_threshold'] = 0.90
    # Call utility library getopt_parseconfig to parse the option keys
    # and save to the dictionary
    caller = 'CECT'
    gmonly = False
    opts_dict = pyEnsLib.getopt_parseconfig(opts,optkeys,caller,opts_dict)
    popens = opts_dict['popens']

    # Print out timestamp, input ensemble file and new run directory
    dt=datetime.now()
    verbose = opts_dict['verbose']
    print('--------pyCECT--------')
    print(' ')
    print(dt.strftime("%A, %d. %B %Y %I:%M%p"))
    print(' ')
    print('Ensemble summary file = '+opts_dict['sumfile'])
    print(' ')
    print('Testcase file directory = '+opts_dict['indir']    )
    print(' ')
    print(' ')

    # Create a mpi simplecomm object
    if opts_dict['mpi_enable']:
        me=simplecomm.create_comm()
    else:
        me=simplecomm.create_comm(not opts_dict['mpi_enable'])

    ifiles=[]
    in_files=[]
    # Random pick pop files from not_pick_files list
    if opts_dict['casejson']:
       with open(opts_dict['casejson']) as fin:
            result=json.load(fin)
            in_files_first=result['not_pick_files']
            in_files=random.sample(in_files_first,opts_dict['npick'])
            print('Testcase files:')
            print('\n'.join(in_files))

    else:
       wildname='*'+opts_dict['input_globs']+'*'
       # Open all input files
       if (os.path.exists(opts_dict['indir'])):
          full_glob_str=os.path.join(opts_dict['indir'],wildname)
          glob_files=glob.glob(full_glob_str)
          in_files.extend(glob_files)
          #in_files_temp=os.listdir(opts_dict['indir'])
    in_files.sort()

    if popens:
        #Partition the input file list
        in_files_list=me.partition(in_files,func=EqualStride(),involved=True)

    else:
        # Random pick non pop files
        in_files_list=pyEnsLib.Random_pickup(in_files,opts_dict)
    for frun_file in in_files_list:
         if frun_file.find(opts_dict['indir']) != -1:
            frun_temp=frun_file
         else:
            frun_temp=opts_dict['indir']+'/'+frun_file
         if (os.path.isfile(frun_temp)):
             ifiles.append(Nio.open_file(frun_temp,"r"))
         else:
             print("COULD NOT LOCATE FILE " +frun_temp+" EXISTING")
             sys.exit()

    if popens:

        # Read in the included var list
        Var2d,Var3d=pyEnsLib.read_jsonlist(opts_dict['jsonfile'],'ESP')
        print(' ')
        print('Z-score tolerance = '+'{:3.2f}'.format(opts_dict['pop_tol']))
        print('ZPR = '+'{:.2%}'.format(opts_dict['pop_threshold']))
        zmall,n_timeslice=pyEnsLib.compare_raw_score(opts_dict,ifiles,me.get_rank(),Var3d,Var2d)
        #zmall = np.concatenate((Zscore3d,Zscore2d),axis=0)
        np.set_printoptions(threshold=np.nan)

        if opts_dict['mpi_enable']:
            zmall = pyEnsLib.gather_npArray_pop(zmall,me,(me.get_size(),len(Var3d)+len(Var2d),len(ifiles),opts_dict['nbin']))
            if me.get_rank()==0:
                fout = open(opts_dict['outfile'],"w")
        for i in range(me.get_size()):
            for j in zmall[i]:
                        np.savetxt(fout,j,fmt='%-7.2e')
    else:
    # Read all variables from the ensemble summary file
    ens_var_name,ens_avg,ens_stddev,ens_rmsz,ens_gm,num_3d,mu_gm,sigma_gm,loadings_gm,sigma_scores_gm,is_SE_sum=pyEnsLib.read_ensemble_summary(opts_dict['sumfile'])

    if len(ens_rmsz) == 0:
        gmonly = True
    # Add ensemble rmsz and global mean to the dictionary "variables"
    variables={}
    if not gmonly:
        for k,v in ens_rmsz.iteritems():
        pyEnsLib.addvariables(variables,k,'zscoreRange',v)

    for k,v in ens_gm.iteritems():
        pyEnsLib.addvariables(variables,k,'gmRange',v)

    # Get 3d variable name list and 2d variable name list seperately
    var_name3d=[]
    var_name2d=[]
    for vcount,v in enumerate(ens_var_name):
      if vcount < num_3d:
        var_name3d.append(v)
      else:
        var_name2d.append(v)

    # Get ncol and nlev value
    npts3d,npts2d,is_SE=pyEnsLib.get_ncol_nlev(ifiles[0])

        if (is_SE ^ is_SE_sum):
           print('Warning: please note the ensemble summary file is different from the testing files, they use different grids')


    # Compare the new run and the ensemble summary file to get rmsz score
    results={}
    countzscore=np.zeros(len(ifiles),dtype=np.int32)
    countgm=np.zeros(len(ifiles),dtype=np.int32)
    if not gmonly:
        for fcount,fid in enumerate(ifiles):
        otimeSeries = fid.variables
        for var_name in ens_var_name:
            orig=otimeSeries[var_name]
            Zscore,has_zscore=pyEnsLib.calculate_raw_score(var_name,orig[opts_dict['tslice']],npts3d,npts2d,ens_avg,ens_stddev,is_SE,opts_dict,0,0,0)
            if has_zscore:
            # Add the new run rmsz zscore to the dictionary "results"
            pyEnsLib.addresults(results,'zscore',Zscore,var_name,'f'+str(fcount))


        # Evaluate the new run rmsz score if is in the range of the ensemble summary rmsz zscore range
        for fcount,fid in enumerate(ifiles):
        countzscore[fcount]=pyEnsLib.evaluatestatus('zscore','zscoreRange',variables,'ens',results,'f'+str(fcount))

    # Calculate the new run global mean
    mean3d,mean2d=pyEnsLib.generate_global_mean_for_summary(ifiles,var_name3d,var_name2d,is_SE,opts_dict['pepsi_gm'],opts_dict)
    means=np.concatenate((mean3d,mean2d),axis=0)

    # Add the new run global mean to the dictionary "results"
    for i in range(means.shape[1]):
        for j in range(means.shape[0]):
        pyEnsLib.addresults(results,'means',means[j][i],ens_var_name[j],'f'+str(i))

    # Evaluate the new run global mean if it is in the range of the ensemble summary global mean range
    for fcount,fid in enumerate(ifiles):
        countgm[fcount]=pyEnsLib.evaluatestatus('means','gmRange',variables,'gm',results,'f'+str(fcount))

    # Calculate the PCA scores of the new run
    new_scores=pyEnsLib.standardized(means,mu_gm,sigma_gm,loadings_gm)
    pyEnsLib.comparePCAscores(ifiles,new_scores,sigma_scores_gm,opts_dict)

    # Print out
    if opts_dict['printVarTest']:
        print('*********************************************** ')
        print('Variable-based testing (for reference only - not used to determine pass/fail)')
        print('*********************************************** ')
        for fcount,fid in enumerate(ifiles):
        print(' ')
        print('Run '+str(fcount+1)+":")
        print(' ')
        if not gmonly:
            print('***'+str(countzscore[fcount])," of "+str(len(ens_var_name))+' variables are outside of ensemble RMSZ distribution***')
            pyEnsLib.printsummary(results,'ens','zscore','zscoreRange',(fcount),variables,'RMSZ')
            print(' ')
        print('***'+str(countgm[fcount])," of "+str(len(ens_var_name))+' variables are outside of ensemble global mean distribution***')
        pyEnsLib.printsummary(results,'gm','means','gmRange',fcount,variables,'global mean')
        print(' ')
        print('----------------------------------------------------------------------------')

if __name__ == "__main__":
    main(sys.argv[1:])
    print(' ')
    print("Testing complete.")

Example #18

Show file

File: simpleCommP1STests.py Project: kmpaul/ASAPPyTools

 def testPartitionListInvolved(self):
     data = range(5 + self.rank)
     sresult = self.scomm.partition(data, func=EqualStride(), involved=True)
     presult = self.pcomm.partition(data, func=EqualStride(), involved=True)
     self.assertEqual(sresult, presult)

Example #19

Show file

File: simpleCommP1STests.py Project: kmpaul/ASAPPyTools

 def testPartitionList(self):
     data = range(5 + self.rank)
     sresult = self.scomm.partition(data, func=EqualStride())
     presult = self.pcomm.partition(data, func=EqualStride())
     self.assertEqual(sresult, presult)

Example #20

Show file

File: pyCECT.py Project: yelizy/cime

def main(argv):

    # Get command line stuff and store in a dictionary
    s = """verbose sumfile= indir= input_globs= tslice= nPC= sigMul= 
         minPCFail= minRunFail= numRunFile= printVars popens 
         jsonfile= mpi_enable nbin= minrange= maxrange= outfile= 
         casejson= npick= pepsi_gm pop_tol= web_enabled
         pop_threshold= printStdMean fIndex= lev= eet= saveResults json_case= """
    optkeys = s.split()
    try:
        opts, args = getopt.getopt(argv, "h", optkeys)
    except getopt.GetoptError:
        pyEnsLib.CECT_usage()
        sys.exit(2)

    # Set the default value for options
    opts_dict = {}
    opts_dict['input_globs'] = ''
    opts_dict['indir'] = ''
    opts_dict['tslice'] = 1
    opts_dict['nPC'] = 50
    opts_dict['sigMul'] = 2
    opts_dict['verbose'] = False
    opts_dict['minPCFail'] = 3
    opts_dict['minRunFail'] = 2
    opts_dict['numRunFile'] = 3
    opts_dict['printVars'] = False
    opts_dict['popens'] = False
    opts_dict['jsonfile'] = ''
    opts_dict['mpi_enable'] = False
    opts_dict['nbin'] = 40
    opts_dict['minrange'] = 0.0
    opts_dict['maxrange'] = 4.0
    opts_dict['outfile'] = 'testcase.result'
    opts_dict['casejson'] = ''
    opts_dict['npick'] = 10
    opts_dict['pepsi_gm'] = False
    opts_dict['test_failure'] = True
    opts_dict['pop_tol'] = 3.0
    opts_dict['pop_threshold'] = 0.90
    opts_dict['printStdMean'] = False
    opts_dict['lev'] = 0
    opts_dict['eet'] = 0
    opts_dict['json_case'] = ''
    opts_dict['sumfile'] = ''
    opts_dict['web_enabled'] = False
    opts_dict['saveResults'] = False

    # Call utility library getopt_parseconfig to parse the option keys
    # and save to the dictionary
    caller = 'CECT'
    opts_dict = pyEnsLib.getopt_parseconfig(opts, optkeys, caller, opts_dict)
    popens = opts_dict['popens']

    #some mods for POP-ECT
    if popens == True:
        opts_dict['tslice'] = 0
        opts_dict['numRunFile'] = 1
        opts_dict['eet'] = 0
        opts_dict['mpi_enable'] = False

    # Create a mpi simplecomm object
    if opts_dict['mpi_enable']:
        me = simplecomm.create_comm()
    else:
        me = simplecomm.create_comm(not opts_dict['mpi_enable'])

    # Print out timestamp, input ensemble file and new run directory
    dt = datetime.now()
    verbose = opts_dict['verbose']
    if me.get_rank() == 0:
        print(' ')
        print('--------pyCECT--------')
        print(' ')
        print(dt.strftime("%A, %d. %B %Y %I:%M%p"))
        print(' ')
        if not opts_dict['web_enabled']:
            print('Ensemble summary file = ' + opts_dict['sumfile'])
        print(' ')
        print('Testcase file directory = ' + opts_dict['indir'])
        print(' ')
        print(' ')

    #make sure these are valid
    if opts_dict['web_enabled'] == False and os.path.isfile(
            opts_dict['sumfile']) == False:
        print("ERROR: Summary file name is not valid.")
        sys.exit()
    if os.path.exists(opts_dict['indir']) == False:
        print("ERROR: --indir path is not valid.")
        sys.exit()

    # Ensure sensible EET value
    if opts_dict['eet'] and opts_dict['numRunFile'] > opts_dict['eet']:
        pyEnsLib.CECT_usage()
        sys.exit(2)

    ifiles = []
    in_files = []
    # Random pick pop files from not_pick_files list
    if opts_dict['casejson']:
        with open(opts_dict['casejson']) as fin:
            result = json.load(fin)
            in_files_first = result['not_pick_files']
            in_files = random.sample(in_files_first, opts_dict['npick'])
            print('Testcase files:')
            print('\n'.join(in_files))

    elif opts_dict['json_case']:
        json_file = opts_dict['json_case']
        if (os.path.exists(json_file)):
            fd = open(json_file)
            metainfo = json.load(fd)
            if 'CaseName' in metainfo:
                casename = metainfo['CaseName']
                if (os.path.exists(opts_dict['indir'])):
                    for name in casename:
                        wildname = '*.' + name + '.*'
                        full_glob_str = os.path.join(opts_dict['indir'],
                                                     wildname)
                        glob_file = glob.glob(full_glob_str)
                        in_files.extend(glob_file)
        else:
            print("ERROR: " + opts_dict['json_case'] + " does not exist.")
            sys.exit()
        print("in_files=", in_files)
    else:
        wildname = '*' + str(opts_dict['input_globs']) + '*'
        # Open all input files
        if (os.path.exists(opts_dict['indir'])):
            full_glob_str = os.path.join(opts_dict['indir'], wildname)
            glob_files = glob.glob(full_glob_str)
            in_files.extend(glob_files)
            num_file = len(in_files)
            if num_file == 0:
                print("ERROR: no matching files for wildcard=" + wildname +
                      " found in specified --indir")
                sys.exit()
            else:
                print("Found " + str(num_file) +
                      " matching files in specified --indir")
            if opts_dict['numRunFile'] > num_file:
                print("ERROR: more files needed (" +
                      str(opts_dict['numRunFile']) +
                      ") than available in the indir (" + str(num_file) + ").")
                sys.exit()

    in_files.sort()
    #print in_files

    if popens:
        #Partition the input file list
        in_files_list = me.partition(in_files,
                                     func=EqualStride(),
                                     involved=True)

    else:
        # Random pick cam files
        in_files_list = pyEnsLib.Random_pickup(in_files, opts_dict)

    for frun_file in in_files_list:
        if frun_file.find(opts_dict['indir']) != -1:
            frun_temp = frun_file
        else:
            frun_temp = opts_dict['indir'] + '/' + frun_file
        if (os.path.isfile(frun_temp)):
            ifiles.append(frun_temp)
        else:
            print("ERROR: COULD NOT LOCATE FILE " + frun_temp)
            sys.exit()

    if opts_dict['web_enabled']:
        if len(opts_dict['sumfile']) == 0:
            opts_dict[
                'sumfile'] = '/glade/p/cesmdata/cseg/inputdata/validation/'
        #need to open ifiles

        opts_dict['sumfile'], machineid, compiler = pyEnsLib.search_sumfile(
            opts_dict, ifiles)
        if len(machineid) != 0 and len(compiler) != 0:
            print(' ')
            print('Validation file    : machineid = ' + machineid +
                  ', compiler = ' + compiler)
            print('Found summary file : ' + opts_dict['sumfile'])
            print(' ')
        else:
            print('Warning: machine and compiler are unknown')

    if popens:

        # Read in the included var list
        if not os.path.exists(opts_dict['jsonfile']):
            print(
                "ERROR: POP-ECT requires the specification of a valid json file via --jsonfile."
            )
            sys.exit()
        Var2d, Var3d = pyEnsLib.read_jsonlist(opts_dict['jsonfile'], 'ESP')
        print(' ')
        print('Z-score tolerance = ' + '{:3.2f}'.format(opts_dict['pop_tol']))
        print('ZPR = ' + '{:.2%}'.format(opts_dict['pop_threshold']))
        zmall, n_timeslice = pyEnsLib.pop_compare_raw_score(
            opts_dict, ifiles, me.get_rank(), Var3d, Var2d)

        np.set_printoptions(threshold=sys.maxsize)

        if opts_dict['mpi_enable']:
            zmall = pyEnsLib.gather_npArray_pop(
                zmall, me, (me.get_size(), len(Var3d) + len(Var2d),
                            len(ifiles), opts_dict['nbin']))
            if me.get_rank() == 0:
                fout = open(opts_dict['outfile'], "w")
                for i in range(me.get_size()):
                    for j in zmall[i]:
                        np.savetxt(fout, j, fmt='%-7.2e')
    #cam
    else:
        # Read all variables from the ensemble summary file
        ens_var_name, ens_avg, ens_stddev, ens_rmsz, ens_gm, num_3d, mu_gm, sigma_gm, loadings_gm, sigma_scores_gm, is_SE_sum, std_gm, std_gm_array, str_size = pyEnsLib.read_ensemble_summary(
            opts_dict['sumfile'])

        #Only doing gm

        # Add ensemble rmsz and global mean to the dictionary "variables"
        variables = {}

        for k, v in ens_gm.items():
            pyEnsLib.addvariables(variables, k, 'gmRange', v)

        # Get 3d variable name list and 2d variable name list separately
        var_name3d = []
        var_name2d = []
        for vcount, v in enumerate(ens_var_name):
            if vcount < num_3d:
                var_name3d.append(v)
            else:
                var_name2d.append(v)

        # Get ncol and nlev value
        npts3d, npts2d, is_SE = pyEnsLib.get_ncol_nlev(ifiles[0])

        if (is_SE ^ is_SE_sum):
            print(
                'Warning: please note the ensemble summary file is different from the testing files: they use different grids'
            )

        # Compare the new run and the ensemble summary file
        results = {}
        countgm = np.zeros(len(ifiles), dtype=np.int32)

        # Calculate the new run global mean
        mean3d, mean2d, varlist = pyEnsLib.generate_global_mean_for_summary(
            ifiles, var_name3d, var_name2d, is_SE, opts_dict['pepsi_gm'],
            opts_dict)
        means = np.concatenate((mean3d, mean2d), axis=0)

        # Add the new run global mean to the dictionary "results"
        for i in range(means.shape[1]):
            for j in range(means.shape[0]):
                pyEnsLib.addresults(results, 'means', means[j][i],
                                    ens_var_name[j], 'f' + str(i))

        # Evaluate the new run global mean if it is in the range of the ensemble summary global mean range
        for fcount, fid in enumerate(ifiles):
            countgm[fcount] = pyEnsLib.evaluatestatus('means', 'gmRange',
                                                      variables, 'gm', results,
                                                      'f' + str(fcount))

        # Calculate the PCA scores of the new run
        new_scores, var_list, comp_std_gm = pyEnsLib.standardized(
            means, mu_gm, sigma_gm, loadings_gm, ens_var_name, opts_dict,
            ens_avg, me)
        run_index, decision = pyEnsLib.comparePCAscores(
            ifiles, new_scores, sigma_scores_gm, opts_dict, me)

        # If there is failure, plot out standardized mean and compared standardized mean in box plots
        #        if opts_dict['printStdMean'] and decision == 'FAILED':
        if opts_dict['printStdMean']:

            import seaborn as sns
            import matplotlib
            matplotlib.use('Agg')  #don't display figures
            import matplotlib.pyplot as plt

            print(" ")
            print(
                '***************************************************************************** '
            )
            print(
                'Test run variable standardized means (for reference only - not used to determine pass/fail)'
            )
            print(
                '***************************************************************************** '
            )
            print(" ")

            category = {
                "all_outside99": [],
                "two_outside99": [],
                "one_outside99": [],
                "all_oneside_outside1QR": []
            }
            b = list(pyEnsLib.chunk(ens_var_name, 10))
            for f, alist in enumerate(b):
                for fc, avar in enumerate(alist):
                    dist_995 = np.percentile(std_gm[avar], 99.5)
                    dist_75 = np.percentile(std_gm[avar], 75)
                    dist_25 = np.percentile(std_gm[avar], 25)
                    dist_05 = np.percentile(std_gm[avar], 0.5)
                    c = 0
                    d = 0
                    p = 0
                    q = 0
                    for i in range(comp_std_gm[f + fc].size):
                        if comp_std_gm[f + fc][i] > dist_995:
                            c = c + 1
                        elif comp_std_gm[f + fc][i] < dist_05:
                            d = d + 1
                        elif (comp_std_gm[f + fc][i] < dist_995
                              and comp_std_gm[f + fc][i] > dist_75):
                            p = p + 1
                        elif (comp_std_gm[f + fc][i] > dist_05
                              and comp_std_gm[f + fc][i] < dist_25):
                            q = q + 1
                    if c == 3 or d == 3:
                        category["all_outside99"].append((avar, f + fc))
                    elif c == 2 or d == 2:
                        category["two_outside99"].append((avar, f + fc))
                    elif c == 1 or d == 1:
                        category["one_outside99"].append((avar, f + fc))
                    if p == 3 or q == 3:
                        category["all_oneside_outside1QR"].append(
                            (avar, f + fc))
            part_name = opts_dict['indir'].split('/')[-1]
            if not part_name:
                part_name = opts_dict['indir'].split('/')[-2]
            for key in sorted(category):
                list_array = []
                list_array2 = []
                list_var = []
                value = category[key]

                if key == "all_outside99":
                    print(
                        "*** ", len(value),
                        " variables have 3 test run global means outside of the 99th percentile."
                    )
                elif key == "two_outside99":
                    print(
                        "*** ", len(value),
                        " variables have 2 test run global means outside of the 99th percentile."
                    )
                elif key == "one_outside99":
                    print(
                        "*** ", len(value),
                        " variables have 1 test run global mean outside of the 99th percentile."
                    )
                elif key == "all_oneside_outside1QR":
                    print(
                        "*** ", len(value),
                        " variables have all test run global means outside of the first quartile (but not outside the 99th percentile)."
                    )

                if len(value) > 0:
                    print(" => generating plot ...")
                    if len(value) > 20:
                        print(
                            "    NOTE: truncating to only plot the first 20 variables."
                        )
                        value = value[0:20]

                for each_var in value:
                    list_array.append(std_gm[each_var[0]])
                    list_array2.append(comp_std_gm[each_var[1]])
                    name = each_var[0]
                    if isinstance(name, str) == False:
                        name = name.decode("utf-8")

                    list_var.append(name)

                if len(value) != 0:
                    ax = sns.boxplot(data=list_array,
                                     whis=[0.5, 99.5],
                                     fliersize=0.0)
                    sns.stripplot(data=list_array2, jitter=True, color="r")
                    plt.xticks(list(range(len(list_array))),
                               list_var,
                               fontsize=8,
                               rotation=-45)

                    if decision == 'FAILED':
                        plt.savefig(part_name + "_" + key + "_fail.png")
                    else:
                        plt.savefig(part_name + "_" + key + "_pass.png")
                    plt.close()


##
# Print file with info about new test runs....to a netcdf file
##
        if opts_dict['saveResults']:

            num_vars = comp_std_gm.shape[0]
            tsize = comp_std_gm.shape[1]
            esize = std_gm_array.shape[1]
            this_savefile = 'savefile.nc'
            if (verbose == True):
                print("VERBOSE: Creating ", this_savefile, "  ...")

            if os.path.exists(this_savefile):
                os.unlink(this_savefile)
            nc_savefile = nc.Dataset(this_savefile,
                                     "w",
                                     format="NETCDF4_CLASSIC")
            nc_savefile.createDimension('ens_size', esize)
            nc_savefile.createDimension('test_size', tsize)
            nc_savefile.createDimension('nvars', num_vars)
            nc_savefile.createDimension('str_size', str_size)

            # Set global attributes
            now = time.strftime("%c")
            nc_savefile.creation_date = now
            nc_savefile.title = 'PyCECT compare results file'
            nc_savefile.summaryfile = opts_dict['sumfile']
            #nc_savefile.testfiles = in_files

            #variables
            v_vars = nc_savefile.createVariable("vars", 'S1',
                                                ('nvars', 'str_size'))
            v_std_gm = nc_savefile.createVariable("std_gm", 'f8',
                                                  ('nvars', 'test_size'))
            v_scores = nc_savefile.createVariable("scores", 'f8',
                                                  ('nvars', 'test_size'))
            v_ens_sigma_scores = nc_savefile.createVariable(
                'ens_sigma_scores', 'f8', ('nvars', ))
            v_ens_std_gm = nc_savefile.createVariable("ens_std_gm", 'f8',
                                                      ('nvars', 'ens_size'))

            #hard-coded size
            str_out = nc.stringtochar(np.array(ens_var_name, 'S10'))

            v_vars[:] = str_out
            v_std_gm[:, :] = comp_std_gm[:, :]
            v_scores[:, :] = new_scores[:, :]
            v_ens_sigma_scores[:] = sigma_scores_gm[:]
            v_ens_std_gm[:, :] = std_gm_array[:, :]

            nc_savefile.close()

        # Print variables (optional)
        if opts_dict['printVars']:
            print(" ")
            print(
                '***************************************************************************** '
            )
            print(
                'Variable global mean information (for reference only - not used to determine pass/fail)'
            )
            print(
                '***************************************************************************** '
            )
            for fcount, fid in enumerate(ifiles):
                print(' ')
                print('Run ' + str(fcount + 1) + ":")
                print(' ')
                print(
                    '***' + str(countgm[fcount]),
                    " of " + str(len(ens_var_name)) +
                    ' variables are outside of ensemble global mean distribution***'
                )
                pyEnsLib.printsummary(results, 'gm', 'means', 'gmRange',
                                      fcount, variables, 'global mean')
                print(' ')
                print(
                    '----------------------------------------------------------------------------'
                )

    if me.get_rank() == 0:
        print(' ')
        print("Testing complete.")
        print(' ')

Example #21

Show file

def main(argv):

    # Get command line stuff and store in a dictionary
    s = 'tag= compset= esize= tslice= res= sumfile= indir= sumfiledir= mach= verbose jsonfile= mpi_enable maxnorm gmonly popens cumul regx= startMon= endMon= fIndex='
    optkeys = s.split()
    try:
        opts, args = getopt.getopt(argv, "h", optkeys)
    except getopt.GetoptError:
        pyEnsLib.EnsSum_usage()
        sys.exit(2)

    # Put command line options in a dictionary - also set defaults
    opts_dict = {}

    # Defaults
    opts_dict['tag'] = 'cesm2_0_beta08'
    opts_dict['compset'] = 'F2000'
    opts_dict['mach'] = 'cheyenne'
    opts_dict['esize'] = 350
    opts_dict['tslice'] = 1
    opts_dict['res'] = 'f19_f19'
    opts_dict['sumfile'] = 'ens.summary.nc'
    opts_dict['indir'] = './'
    opts_dict['sumfiledir'] = './'
    opts_dict['jsonfile'] = 'exclude_empty.json'
    opts_dict['verbose'] = False
    opts_dict['mpi_enable'] = False
    opts_dict['maxnorm'] = False
    opts_dict['gmonly'] = True
    opts_dict['popens'] = False
    opts_dict['cumul'] = False
    opts_dict['regx'] = 'test'
    opts_dict['startMon'] = 1
    opts_dict['endMon'] = 1
    opts_dict['fIndex'] = 151

    # This creates the dictionary of input arguments
    opts_dict = pyEnsLib.getopt_parseconfig(opts, optkeys, 'ES', opts_dict)

    verbose = opts_dict['verbose']

    st = opts_dict['esize']
    esize = int(st)

    if not (opts_dict['tag'] and opts_dict['compset'] and opts_dict['mach']
            or opts_dict['res']):
        print 'Please specify --tag, --compset, --mach and --res options'
        sys.exit()

    # Now find file names in indir
    input_dir = opts_dict['indir']
    # The var list that will be excluded
    ex_varlist = []
    inc_varlist = []

    # Create a mpi simplecomm object
    if opts_dict['mpi_enable']:
        me = simplecomm.create_comm()
    else:
        me = simplecomm.create_comm(not opts_dict['mpi_enable'])

    if me.get_rank() == 0:
        print 'Running pyEnsSum!'

    if me.get_rank() == 0 and (verbose == True):
        print opts_dict
        print 'Ensemble size for summary = ', esize

    exclude = False
    if me.get_rank() == 0:
        if opts_dict['jsonfile']:
            inc_varlist = []
            # Read in the excluded or included var list
            ex_varlist, exclude = pyEnsLib.read_jsonlist(
                opts_dict['jsonfile'], 'ES')
            if exclude == False:
                inc_varlist = ex_varlist
                ex_varlist = []
            # Read in the included var list
            #inc_varlist=pyEnsLib.read_jsonlist(opts_dict['jsonfile'],'ES')

    # Broadcast the excluded var list to each processor
    #if opts_dict['mpi_enable']:
    #   ex_varlist=me.partition(ex_varlist,func=Duplicate(),involved=True)
    # Broadcast the excluded var list to each processor
    if opts_dict['mpi_enable']:
        exclude = me.partition(exclude, func=Duplicate(), involved=True)
        if exclude:
            ex_varlist = me.partition(ex_varlist,
                                      func=Duplicate(),
                                      involved=True)
        else:
            inc_varlist = me.partition(inc_varlist,
                                       func=Duplicate(),
                                       involved=True)

    in_files = []
    if (os.path.exists(input_dir)):
        # Get the list of files
        in_files_temp = os.listdir(input_dir)
        in_files = sorted(in_files_temp)

        # Make sure we have enough
        num_files = len(in_files)
        if me.get_rank() == 0 and (verbose == True):
            print 'Number of files in input directory = ', num_files
        if (num_files < esize):
            if me.get_rank() == 0 and (verbose == True):
                print 'Number of files in input directory (',num_files,\
                 ') is less than specified ensemble size of ', esize
            sys.exit(2)
        if (num_files > esize):
            if me.get_rank() == 0 and (verbose == True):
                print 'NOTE: Number of files in ', input_dir, \
                 'is greater than specified ensemble size of ', esize ,\
                 '\nwill just use the first ',  esize, 'files'
    else:
        if me.get_rank() == 0:
            print 'Input directory: ', input_dir, ' not found'
        sys.exit(2)

    if opts_dict['cumul']:
        if opts_dict['regx']:
            in_files_list = get_cumul_filelist(opts_dict, opts_dict['indir'],
                                               opts_dict['regx'])
        in_files = me.partition(in_files_list,
                                func=EqualLength(),
                                involved=True)
        if me.get_rank() == 0 and (verbose == True):
            print 'in_files=', in_files

    # Open the files in the input directory
    o_files = []
    if me.get_rank() == 0 and opts_dict['verbose']:
        print 'Input files are: '
        print "\n".join(in_files)
        #for i in in_files:
        #    print "in_files =",i
    for onefile in in_files[0:esize]:
        if (os.path.isfile(input_dir + '/' + onefile)):
            o_files.append(Nio.open_file(input_dir + '/' + onefile, "r"))
        else:
            if me.get_rank() == 0:
                print "COULD NOT LOCATE FILE " + input_dir + onefile + "! EXITING...."
            sys.exit()

    # Store dimensions of the input fields
    if me.get_rank() == 0 and (verbose == True):
        print "Getting spatial dimensions"
    nlev = -1
    nilev = -1
    ncol = -1
    nlat = -1
    nlon = -1
    lonkey = ''
    latkey = ''
    # Look at first file and get dims
    input_dims = o_files[0].dimensions
    ndims = len(input_dims)

    for key in input_dims:
        if key == "lev":
            nlev = input_dims["lev"]
        elif key == "ilev":
            nilev = input_dims["ilev"]
        elif key == "ncol":
            ncol = input_dims["ncol"]
        elif (key == "nlon") or (key == "lon"):
            nlon = input_dims[key]
            lonkey = key
        elif (key == "nlat") or (key == "lat"):
            nlat = input_dims[key]
            latkey = key

    if (nlev == -1):
        if me.get_rank() == 0:
            print "COULD NOT LOCATE valid dimension lev => EXITING...."
        sys.exit()

    if ((ncol == -1) and ((nlat == -1) or (nlon == -1))):
        if me.get_rank() == 0:
            print "Need either lat/lon or ncol  => EXITING...."
        sys.exit()

    # Check if this is SE or FV data
    if (ncol != -1):
        is_SE = True
    else:
        is_SE = False

    # Make sure all files have the same dimensions
    if me.get_rank() == 0 and (verbose == True):
        print "Checking dimensions across files...."
        print 'lev = ', nlev
        if (is_SE == True):
            print 'ncol = ', ncol
        else:
            print 'nlat = ', nlat
            print 'nlon = ', nlon

    for count, this_file in enumerate(o_files):
        input_dims = this_file.dimensions
        if (is_SE == True):
            if (nlev != int(input_dims["lev"])
                    or (ncol != int(input_dims["ncol"]))):
                if me.get_rank() == 0:
                    print "Dimension mismatch between ", in_files[
                        0], 'and', in_files[0], '!!!'
                sys.exit()
        else:
            if ( nlev != int(input_dims["lev"]) or ( nlat != int(input_dims[latkey]))\
                  or ( nlon != int(input_dims[lonkey]))):
                if me.get_rank() == 0:
                    print "Dimension mismatch between ", in_files[
                        0], 'and', in_files[0], '!!!'
                sys.exit()

    # Get 2d vars, 3d vars and all vars (For now include all variables)
    vars_dict_all = o_files[0].variables
    # Remove the excluded variables (specified in json file) from variable dictionary
    #print len(vars_dict_all)
    if exclude:
        vars_dict = vars_dict_all
        for i in ex_varlist:
            if i in vars_dict:
                del vars_dict[i]
    #Given an included var list, remove all float var that are not on the list
    else:
        vars_dict = vars_dict_all.copy()
        for k, v in vars_dict_all.iteritems():
            if (k not in inc_varlist) and (vars_dict_all[k].typecode() == 'f'):
                #print vars_dict_all[k].typecode()
                #print k
                del vars_dict[k]

    num_vars = len(vars_dict)
    #print num_vars
    #if me.get_rank() == 0:
    #   for k,v in vars_dict.iteritems():
    #       print 'vars_dict',k,vars_dict[k].typecode()

    str_size = 0

    d2_var_names = []
    d3_var_names = []
    num_2d = 0
    num_3d = 0

    # Which are 2d, which are 3d and max str_size
    for k, v in vars_dict.iteritems():
        var = k
        vd = v.dimensions  # all the variable's dimensions (names)
        vr = v.rank  # num dimension
        vs = v.shape  # dim values
        is_2d = False
        is_3d = False
        if (is_SE == True):  # (time, lev, ncol) or (time, ncol)
            if ((vr == 2) and (vs[1] == ncol)):
                is_2d = True
                num_2d += 1
            elif ((vr == 3) and (vs[2] == ncol and vs[1] == nlev)):
                is_3d = True
                num_3d += 1
        else:  # (time, lev, nlon, nlon) or (time, nlat, nlon)
            if ((vr == 3) and (vs[1] == nlat and vs[2] == nlon)):
                is_2d = True
                num_2d += 1
            elif ((vr == 4) and (vs[2] == nlat and vs[3] == nlon and
                                 (vs[1] == nlev or vs[1] == nilev))):
                is_3d = True
                num_3d += 1

        if (is_3d == True):
            str_size = max(str_size, len(k))
            d3_var_names.append(k)
        elif (is_2d == True):
            str_size = max(str_size, len(k))
            d2_var_names.append(k)
        #else:
        #    print 'var=',k

    if me.get_rank() == 0 and (verbose == True):
        print 'Number of variables found:  ', num_3d + num_2d
        print '3D variables: ' + str(num_3d) + ', 2D variables: ' + str(num_2d)

    # Now sort these and combine (this sorts caps first, then lower case -
    # which is what we want)
    d2_var_names.sort()
    d3_var_names.sort()

    if esize < num_2d + num_3d:
        if me.get_rank() == 0:
            print "************************************************************************************************************************************"
            print "  Error: the total number of 3D and 2D variables " + str(
                num_2d + num_3d
            ) + " is larger than the number of ensemble files " + str(esize)
            print "  Cannot generate ensemble summary file, please remove more variables from your included variable list,"
            print "  or add more varaibles in your excluded variable list!!!"
            print "************************************************************************************************************************************"
        sys.exit()
    # All vars is 3d vars first (sorted), the 2d vars
    all_var_names = list(d3_var_names)
    all_var_names += d2_var_names
    n_all_var_names = len(all_var_names)

    #if me.get_rank() == 0 and (verbose == True):
    #    print 'num vars = ', n_all_var_names, '(3d = ', num_3d, ' and 2d = ', num_2d, ")"

    # Create new summary ensemble file
    this_sumfile = opts_dict["sumfile"]

    if me.get_rank() == 0 and (verbose == True):
        print "Creating ", this_sumfile, "  ..."
    if (me.get_rank() == 0 | opts_dict["popens"]):
        if os.path.exists(this_sumfile):
            os.unlink(this_sumfile)

        opt = Nio.options()
        opt.PreFill = False
        opt.Format = 'NetCDF4Classic'
        nc_sumfile = Nio.open_file(this_sumfile, 'w', options=opt)

        # Set dimensions
        if me.get_rank() == 0 and (verbose == True):
            print "Setting dimensions ....."
        if (is_SE == True):
            nc_sumfile.create_dimension('ncol', ncol)
        else:
            nc_sumfile.create_dimension('nlat', nlat)
            nc_sumfile.create_dimension('nlon', nlon)
        nc_sumfile.create_dimension('nlev', nlev)
        nc_sumfile.create_dimension('ens_size', esize)
        nc_sumfile.create_dimension('nvars', num_3d + num_2d)
        nc_sumfile.create_dimension('nvars3d', num_3d)
        nc_sumfile.create_dimension('nvars2d', num_2d)
        nc_sumfile.create_dimension('str_size', str_size)

        # Set global attributes
        now = time.strftime("%c")
        if me.get_rank() == 0 and (verbose == True):
            print "Setting global attributes ....."
        setattr(nc_sumfile, 'creation_date', now)
        setattr(nc_sumfile, 'title', 'CAM verification ensemble summary file')
        setattr(nc_sumfile, 'tag', opts_dict["tag"])
        setattr(nc_sumfile, 'compset', opts_dict["compset"])
        setattr(nc_sumfile, 'resolution', opts_dict["res"])
        setattr(nc_sumfile, 'machine', opts_dict["mach"])

        # Create variables
        if me.get_rank() == 0 and (verbose == True):
            print "Creating variables ....."
        v_lev = nc_sumfile.create_variable("lev", 'f', ('nlev', ))
        v_vars = nc_sumfile.create_variable("vars", 'S1',
                                            ('nvars', 'str_size'))
        v_var3d = nc_sumfile.create_variable("var3d", 'S1',
                                             ('nvars3d', 'str_size'))
        v_var2d = nc_sumfile.create_variable("var2d", 'S1',
                                             ('nvars2d', 'str_size'))
        if not opts_dict['gmonly']:
            if (is_SE == True):
                v_ens_avg3d = nc_sumfile.create_variable(
                    "ens_avg3d", 'f', ('nvars3d', 'nlev', 'ncol'))
                v_ens_stddev3d = nc_sumfile.create_variable(
                    "ens_stddev3d", 'f', ('nvars3d', 'nlev', 'ncol'))
                v_ens_avg2d = nc_sumfile.create_variable(
                    "ens_avg2d", 'f', ('nvars2d', 'ncol'))
                v_ens_stddev2d = nc_sumfile.create_variable(
                    "ens_stddev2d", 'f', ('nvars2d', 'ncol'))
            else:
                v_ens_avg3d = nc_sumfile.create_variable(
                    "ens_avg3d", 'f', ('nvars3d', 'nlev', 'nlat', 'nlon'))
                v_ens_stddev3d = nc_sumfile.create_variable(
                    "ens_stddev3d", 'f', ('nvars3d', 'nlev', 'nlat', 'nlon'))
                v_ens_avg2d = nc_sumfile.create_variable(
                    "ens_avg2d", 'f', ('nvars2d', 'nlat', 'nlon'))
                v_ens_stddev2d = nc_sumfile.create_variable(
                    "ens_stddev2d", 'f', ('nvars2d', 'nlat', 'nlon'))

            v_RMSZ = nc_sumfile.create_variable("RMSZ", 'f',
                                                ('nvars', 'ens_size'))
        v_gm = nc_sumfile.create_variable("global_mean", 'f',
                                          ('nvars', 'ens_size'))
        v_standardized_gm = nc_sumfile.create_variable("standardized_gm", 'f',
                                                       ('nvars', 'ens_size'))
        v_loadings_gm = nc_sumfile.create_variable('loadings_gm', 'f',
                                                   ('nvars', 'nvars'))
        v_mu_gm = nc_sumfile.create_variable('mu_gm', 'f', ('nvars', ))
        v_sigma_gm = nc_sumfile.create_variable('sigma_gm', 'f', ('nvars', ))
        v_sigma_scores_gm = nc_sumfile.create_variable('sigma_scores_gm', 'f',
                                                       ('nvars', ))

        # Assign vars, var3d and var2d
        if me.get_rank() == 0 and (verbose == True):
            print "Assigning vars, var3d, and var2d ....."

        eq_all_var_names = []
        eq_d3_var_names = []
        eq_d2_var_names = []

        l_eq = len(all_var_names)
        for i in range(l_eq):
            tt = list(all_var_names[i])
            l_tt = len(tt)
            if (l_tt < str_size):
                extra = list(' ') * (str_size - l_tt)
                tt.extend(extra)
            eq_all_var_names.append(tt)

        l_eq = len(d3_var_names)
        for i in range(l_eq):
            tt = list(d3_var_names[i])
            l_tt = len(tt)
            if (l_tt < str_size):
                extra = list(' ') * (str_size - l_tt)
                tt.extend(extra)
            eq_d3_var_names.append(tt)

        l_eq = len(d2_var_names)
        for i in range(l_eq):
            tt = list(d2_var_names[i])
            l_tt = len(tt)
            if (l_tt < str_size):
                extra = list(' ') * (str_size - l_tt)
                tt.extend(extra)
            eq_d2_var_names.append(tt)

        v_vars[:] = eq_all_var_names[:]
        v_var3d[:] = eq_d3_var_names[:]
        v_var2d[:] = eq_d2_var_names[:]

        # Time-invarient metadata
        if me.get_rank() == 0 and (verbose == True):
            print "Assigning time invariant metadata ....."
        lev_data = vars_dict["lev"]
        v_lev = lev_data

    # Form ensembles, each missing one member; compute RMSZs and global means
    #for each variable, we also do max norm also (currently done in pyStats)
    tslice = opts_dict['tslice']

    if not opts_dict['cumul']:
        # Partition the var list

        var3_list_loc = me.partition(d3_var_names,
                                     func=EqualStride(),
                                     involved=True)
        var2_list_loc = me.partition(d2_var_names,
                                     func=EqualStride(),
                                     involved=True)
    else:
        var3_list_loc = d3_var_names
        var2_list_loc = d2_var_names

    # Calculate global means #
    if me.get_rank() == 0 and (verbose == True):
        print "Calculating global means ....."
    if not opts_dict['cumul']:
        gm3d, gm2d, var_list = pyEnsLib.generate_global_mean_for_summary(
            o_files, var3_list_loc, var2_list_loc, is_SE, False, opts_dict)
    if me.get_rank() == 0 and (verbose == True):
        print "Finish calculating global means ....."

    # Calculate RMSZ scores
    if (not opts_dict['gmonly']) | (opts_dict['cumul']):
        if me.get_rank() == 0 and (verbose == True):
            print "Calculating RMSZ scores ....."
        zscore3d, zscore2d, ens_avg3d, ens_stddev3d, ens_avg2d, ens_stddev2d, temp1, temp2 = pyEnsLib.calc_rmsz(
            o_files, var3_list_loc, var2_list_loc, is_SE, opts_dict)

    # Calculate max norm ensemble
    if opts_dict['maxnorm']:
        if me.get_rank() == 0 and (verbose == True):
            print "Calculating max norm of ensembles ....."
        pyEnsLib.calculate_maxnormens(opts_dict, var3_list_loc)
        pyEnsLib.calculate_maxnormens(opts_dict, var2_list_loc)

    if opts_dict['mpi_enable'] & (not opts_dict['popens']):

        if not opts_dict['cumul']:
            # Gather the 3d variable results from all processors to the master processor
            slice_index = get_stride_list(len(d3_var_names), me)

            # Gather global means 3d results
            gm3d = gather_npArray(gm3d, me, slice_index,
                                  (len(d3_var_names), len(o_files)))
            if not opts_dict['gmonly']:
                # Gather zscore3d results
                zscore3d = gather_npArray(zscore3d, me, slice_index,
                                          (len(d3_var_names), len(o_files)))

                # Gather ens_avg3d and ens_stddev3d results
                shape_tuple3d = get_shape(ens_avg3d.shape, len(d3_var_names),
                                          me.get_rank())
                ens_avg3d = gather_npArray(ens_avg3d, me, slice_index,
                                           shape_tuple3d)
                ens_stddev3d = gather_npArray(ens_stddev3d, me, slice_index,
                                              shape_tuple3d)

            # Gather 2d variable results from all processors to the master processor
            slice_index = get_stride_list(len(d2_var_names), me)

            # Gather global means 2d results
            gm2d = gather_npArray(gm2d, me, slice_index,
                                  (len(d2_var_names), len(o_files)))

            var_list = gather_list(var_list, me)

            if not opts_dict['gmonly']:
                # Gather zscore2d results
                zscore2d = gather_npArray(zscore2d, me, slice_index,
                                          (len(d2_var_names), len(o_files)))

                # Gather ens_avg3d and ens_stddev2d results
                shape_tuple2d = get_shape(ens_avg2d.shape, len(d2_var_names),
                                          me.get_rank())
                ens_avg2d = gather_npArray(ens_avg2d, me, slice_index,
                                           shape_tuple2d)
                ens_stddev2d = gather_npArray(ens_stddev2d, me, slice_index,
                                              shape_tuple2d)

        else:
            gmall = np.concatenate((temp1, temp2), axis=0)
            gmall = pyEnsLib.gather_npArray_pop(
                gmall, me,
                (me.get_size(), len(d3_var_names) + len(d2_var_names)))
    # Assign to file:
    if me.get_rank() == 0 | opts_dict['popens']:
        if not opts_dict['cumul']:
            gmall = np.concatenate((gm3d, gm2d), axis=0)
            if not opts_dict['gmonly']:
                Zscoreall = np.concatenate((zscore3d, zscore2d), axis=0)
                v_RMSZ[:, :] = Zscoreall[:, :]
            if not opts_dict['gmonly']:
                if (is_SE == True):
                    v_ens_avg3d[:, :, :] = ens_avg3d[:, :, :]
                    v_ens_stddev3d[:, :, :] = ens_stddev3d[:, :, :]
                    v_ens_avg2d[:, :] = ens_avg2d[:, :]
                    v_ens_stddev2d[:, :] = ens_stddev2d[:, :]
                else:
                    v_ens_avg3d[:, :, :, :] = ens_avg3d[:, :, :, :]
                    v_ens_stddev3d[:, :, :, :] = ens_stddev3d[:, :, :, :]
                    v_ens_avg2d[:, :, :] = ens_avg2d[:, :, :]
                    v_ens_stddev2d[:, :, :] = ens_stddev2d[:, :, :]
        else:
            gmall_temp = np.transpose(gmall[:, :])
            gmall = gmall_temp
        mu_gm, sigma_gm, standardized_global_mean, loadings_gm, scores_gm = pyEnsLib.pre_PCA(
            gmall, all_var_names, var_list, me)
        v_gm[:, :] = gmall[:, :]
        v_standardized_gm[:, :] = standardized_global_mean[:, :]
        v_mu_gm[:] = mu_gm[:]
        v_sigma_gm[:] = sigma_gm[:].astype(np.float32)
        v_loadings_gm[:, :] = loadings_gm[:, :]
        v_sigma_scores_gm[:] = scores_gm[:]

        if me.get_rank() == 0:
            print "All Done"

Example #22

Show file

    def _inspect_input_files(self):
        """
        Inspect the input data files themselves.

        We check the file contents here, which means opening and reading heading information from the files.
        """
        # Set the I/O backend according to what is specified
        iobackend.set_backend(self._backend)

        # Initialize the list of variable names for each category
        udim = None
        timeta = []
        xtra_timeta = []
        tvmeta = []

        # Initialize the local dictionary of time-series variables and sizes
        all_tsvars = {}
        file_times = {}

        #===== INSPECT FIRST INPUT FILE (ON MASTER PROCESS ONLY) =====

        # Open first file
        if self._simplecomm.is_manager():
            ifile = iobackend.NCFile(self._input_filenames[0])

            # Look for the 'unlimited' dimension
            try:
                udim = next(
                    dim for dim in ifile.dimensions if ifile.unlimited(dim))
            except StopIteration:
                err_msg = 'Unlimited dimension not found.'
                raise LookupError(err_msg)

            # Get the first file's time values
            file_times[self._input_filenames[0]] = ifile.variables[udim][:]

            # Categorize each variable (only looking at first file)
            for var_name, var in ifile.variables.iteritems():
                if udim not in var.dimensions:
                    if var_name not in self._exclude_list:
                        timeta.append(var_name)
                elif var_name in self._metadata_names or (self._1d_metadata and len(var.dimensions) == 1):
                    tvmeta.append(var_name)
                elif self._time_series_names is None or var_name in self._time_series_names:
                    all_tsvars[var_name] = var.datatype.itemsize * var.size

            # Close the first file
            ifile.close()

            # Find variables only in the metadata file
            if self._metadata_filename is not None:
                ifile = iobackend.NCFile(self._metadata_filename)
                for var_name, var in ifile.variables.iteritems():
                    if udim not in var.dimensions and var_name not in timeta:
                        xtra_timeta.append(var_name)
                ifile.close()

        self._simplecomm.sync()

        # Send information to worker processes
        self._unlimited_dim = self._simplecomm.partition(
            udim, func=Duplicate(), involved=True)
        self._time_invariant_metadata = self._simplecomm.partition(
            timeta, func=Duplicate(), involved=True)
        self._time_invariant_metafile_vars = self._simplecomm.partition(
            xtra_timeta, func=Duplicate(), involved=True)
        self._time_variant_metadata = self._simplecomm.partition(
            tvmeta, func=Duplicate(), involved=True)
        all_tsvars = self._simplecomm.partition(
            all_tsvars, func=Duplicate(), involved=True)

        self._simplecomm.sync()
        if self._simplecomm.is_manager():
            self._vprint('  First input file inspected.', verbosity=2)

        #===== INSPECT REMAINING INPUT FILES (IN PARALLEL) =====

        # Get the list of variable names and missing variables
        var_names = set(
            all_tsvars.keys() + self._time_invariant_metadata + self._time_invariant_metafile_vars + self._time_variant_metadata)
        missing_vars = set()

        # Partition the remaining filenames to inspect
        input_filenames = self._simplecomm.partition(
            self._input_filenames[1:], func=EqualStride(), involved=True)

        # Make a pass through remaining files and:
        # (1) Make sure it has the 'unlimited' dimension
        # (2) Make sure this dimension is truely 'unlimited'
        # (3) Check that this dimension has a corresponding variable
        # (4) Check if there are any missing variables
        # (5) Get the time values from the files
        for ifilename in input_filenames:
            ifile = iobackend.NCFile(ifilename)

            # Determine the unlimited dimension
            if self._unlimited_dim not in ifile.dimensions:
                err_msg = 'Unlimited dimension not found in file "{0}"'.format(
                    ifilename)
                raise LookupError(err_msg)
            if not ifile.unlimited(self._unlimited_dim):
                err_msg = 'Dimension "{0}" not unlimited in file "{1}"'.format(
                    self._unlimited_dim, ifilename)
                raise LookupError(err_msg)
            if self._unlimited_dim not in ifile.variables:
                err_msg = 'Unlimited dimension variable not found in file "{0}"'.format(
                    ifilename)
                raise LookupError(err_msg)

            # Get the time values (list of NDArrays)
            file_times[ifilename] = ifile.variables[self._unlimited_dim][:]

            # Get the missing variables
            var_names_next = set(ifile.variables.keys())
            missing_vars.update(var_names - var_names_next)

            # Close the file
            ifile.close()

        self._simplecomm.sync()
        if self._simplecomm.is_manager():
            self._vprint('  Remaining input files inspected.', verbosity=2)

        #===== CHECK FOR MISSING VARIABLES =====

        # Gather all missing variables on the master process
        if self._simplecomm.get_size() > 1:
            if self._simplecomm.is_manager():
                for _ in range(1, self._simplecomm.get_size()):
                    missing_vars.update(self._simplecomm.collect()[1])
            else:
                self._simplecomm.collect(missing_vars)
        self._simplecomm.sync()

        # Check for missing variables only on master process
        if self._simplecomm.is_manager():

            # Remove metafile variables from missing vars set
            missing_vars -= set(self._time_invariant_metafile_vars)

            # Make sure that the list of variables in each file is the same
            if len(missing_vars) != 0:
                warning = ("WARNING: Some variables are not in all input files:{0}   "
                           "{1}").format(linesep, ', '.join(sorted(missing_vars)))
                self._vprint(warning, header=False, verbosity=0)

            self._vprint('  Checked for missing variables.', verbosity=2)

        #===== SORT INPUT FILES BY TIME =====

        # Gather the file time values onto the master process
        if self._simplecomm.get_size() > 1:
            if self._simplecomm.is_manager():
                for _ in range(1, self._simplecomm.get_size()):
                    file_times.update(self._simplecomm.collect()[1])
            else:
                self._simplecomm.collect(file_times)
        self._simplecomm.sync()

        # Check the order of the input files based on the time values
        if self._simplecomm.is_manager():

            # Determine the sort order based on the first time in the time
            # values
            old_order = range(len(self._input_filenames))
            new_order = sorted(
                old_order, key=lambda i: file_times[self._input_filenames[i]][0])

            # Re-order the list of input filenames and time values
            new_filenames = [self._input_filenames[i] for i in new_order]
            new_values = [file_times[self._input_filenames[i]]
                          for i in new_order]

            # Now, check that the largest time in each file is less than the smallest time
            # in the next file (so that the time spans of each file do not
            # overlap)
            for i in xrange(1, len(new_values)):
                if new_values[i - 1][-1] >= new_values[i][0]:
                    err_msg = ('Times in input files {0} and {1} appear to '
                               'overlap').format(new_filenames[i - 1], new_filenames[i])
                    raise ValueError(err_msg)

        else:
            new_filenames = None

        # Now that this is validated, save the time values and filename in the
        # new order
        self._input_filenames = self._simplecomm.partition(
            new_filenames, func=Duplicate(), involved=True)

        if self._simplecomm.is_manager():
            self._vprint('  Input files sorted by time.', verbosity=2)

        #===== FINALIZING OUTPUT =====
        self._simplecomm.sync()

        # Debug output
        if self._simplecomm.is_manager():
            self._vprint('  Time-Invariant Metadata: {0}'.format(
                ', '.join(self._time_invariant_metadata)), verbosity=1)
            if len(self._time_invariant_metafile_vars) > 0:
                self._vprint('  Additional Time-Invariant Metadata: {0}'.format(
                    ', '.join(self._time_invariant_metafile_vars)), verbosity=1)
            self._vprint('  Time-Variant Metadata: {0}'.format(
                ', '.join(self._time_variant_metadata)), verbosity=1)
            self._vprint(
                '  Time-Series Variables: {0}'.format(', '.join(all_tsvars.keys())), verbosity=1)

        # Add 'once' variable if writing to a once file
        # NOTE: This is a "cheat"!  There is no 'once' variable.  It's just
        #       a catch for all metadata IFF the 'once-file' is enabled.
        if self._use_once_file:
            all_tsvars['once'] = max(all_tsvars.values())

        # Partition the time-series variables across processors
        self._time_series_variables = self._simplecomm.partition(
            all_tsvars.items(), func=WeightBalanced(), involved=True)

Example #23

Show file

File: pyEnsSum.py Project: marysa/SimpleLand_cesm2_0_beta07

def main(argv):

    print('Running pyEnsSum!')

    # Get command line stuff and store in a dictionary
    s = 'tag= compset= esize= tslice= res= sumfile= indir= sumfiledir= mach= verbose jsonfile= mpi_enable maxnorm gmonly popens cumul regx= startMon= endMon= fIndex='
    optkeys = s.split()
    try:
        opts, args = getopt.getopt(argv, "h", optkeys)
    except getopt.GetoptError:
        pyEnsLib.EnsSum_usage()
        sys.exit(2)

    # Put command line options in a dictionary - also set defaults
    opts_dict={}

    # Defaults
    opts_dict['tag'] = ''
    opts_dict['compset'] = ''
    opts_dict['mach'] = ''
    opts_dict['esize'] = 151
    opts_dict['tslice'] = 0
    opts_dict['res'] = ''
    opts_dict['sumfile'] = 'ens.summary.nc'
    opts_dict['indir'] = './'
    opts_dict['sumfiledir'] = './'
    opts_dict['jsonfile'] = ''
    opts_dict['verbose'] = True
    opts_dict['mpi_enable'] = False
    opts_dict['maxnorm'] = False
    opts_dict['gmonly'] = False
    opts_dict['popens'] = False
    opts_dict['cumul'] = False
    opts_dict['regx'] = 'test'
    opts_dict['startMon'] = 1
    opts_dict['endMon'] = 1
    opts_dict['fIndex'] = 151

    # This creates the dictionary of input arguments
    opts_dict = pyEnsLib.getopt_parseconfig(opts,optkeys,'ES',opts_dict)

    verbose = opts_dict['verbose']

    st = opts_dict['esize']
    esize = int(st)

    if (verbose == True):
        print(opts_dict)
        print('Ensemble size for summary = ', esize)

    if not (opts_dict['tag'] and opts_dict['compset'] and opts_dict['mach'] or opts_dict['res']):
       print('Please specify --tag, --compset, --mach and --res options')
       sys.exit()

    # Now find file names in indir
    input_dir = opts_dict['indir']
    # The var list that will be excluded
    ex_varlist=[]

    # Create a mpi simplecomm object
    if opts_dict['mpi_enable']:
        me=simplecomm.create_comm()
    else:
        me=simplecomm.create_comm(not opts_dict['mpi_enable'])


    if me.get_rank() == 0:
    if opts_dict['jsonfile']:
        # Read in the excluded var list
        ex_varlist=pyEnsLib.read_jsonlist(opts_dict['jsonfile'],'ES')

    # Broadcast the excluded var list to each processor
    if opts_dict['mpi_enable']:
    ex_varlist=me.partition(ex_varlist,func=Duplicate(),involved=True)

    in_files=[]
    if(os.path.exists(input_dir)):
        # Get the list of files
        in_files_temp = os.listdir(input_dir)
        in_files=sorted(in_files_temp)
        #print in_files
        # Make sure we have enough
        num_files = len(in_files)
        if (verbose == True):
            print('Number of files in input directory = ', num_files)
        if (num_files < esize):
            print('Number of files in input directory (',num_files,
                ') is less than specified ensemble size of ', esize)
            sys.exit(2)
        if (num_files > esize):
            print('NOTE: Number of files in ', input_dir,
                'is greater than specified ensemble size of ', esize,
                '\nwill just use the first ',  esize, 'files')
    else:
        print('Input directory: ',input_dir,' not found')
        sys.exit(2)

    if opts_dict['cumul']:
        if opts_dict['regx']:
           in_files_list=get_cumul_filelist(opts_dict,opts_dict['indir'],opts_dict['regx'])
        in_files=me.partition(in_files_list,func=EqualLength(),involved=True)
        if me.get_rank()==0:
           print('in_files=',in_files)

    # Open the files in the input directory
    o_files=[]
    for onefile in in_files[0:esize]:
        if (os.path.isfile(input_dir+'/' + onefile)):
            o_files.append(Nio.open_file(input_dir+'/' + onefile,"r"))
        else:
            print("COULD NOT LOCATE FILE "+ input_dir + onefile + "! EXITING....")
            sys.exit()

    # Store dimensions of the input fields
    if (verbose == True):
        print("Getting spatial dimensions")
    nlev = -1
    ncol = -1
    nlat = -1
    nlon = -1
    lonkey=''
    latkey=''
    # Look at first file and get dims
    input_dims = o_files[0].dimensions
    ndims = len(input_dims)

    for key in input_dims:
        if key == "lev":
            nlev = input_dims["lev"]
        elif key == "ncol":
            ncol = input_dims["ncol"]
        elif (key == "nlon") or (key =="lon"):
            nlon = input_dims[key]
            lonkey=key
        elif (key == "nlat") or (key == "lat"):
            nlat = input_dims[key]
            latkey=key

    if (nlev == -1) :
        print("COULD NOT LOCATE valid dimension lev => EXITING....")
        sys.exit()

    if (( ncol == -1) and ((nlat == -1) or (nlon == -1))):
        print("Need either lat/lon or ncol  => EXITING....")
        sys.exit()

    # Check if this is SE or FV data
    if (ncol != -1):
        is_SE = True
    else:
        is_SE = False

    # Make sure all files have the same dimensions
    if (verbose == True):
        print("Checking dimensions across files....")
        print('lev = ', nlev)
        if (is_SE == True):
            print('ncol = ', ncol)
        else:
            print('nlat = ', nlat)
            print('nlon = ', nlon)

    for count, this_file in enumerate(o_files):
        input_dims = this_file.dimensions
        if (is_SE == True):
            if ( nlev != int(input_dims["lev"]) or ( ncol != int(input_dims["ncol"]))):
                print("Dimension mismatch between ", in_files[0], 'and', in_files[0], '!!!')
                sys.exit()
        else:
            if ( nlev != int(input_dims["lev"]) or ( nlat != int(input_dims[latkey]))\
                  or ( nlon != int(input_dims[lonkey]))):
                print("Dimension mismatch between ", in_files[0], 'and', in_files[0], '!!!')
                sys.exit()

    # Get 2d vars, 3d vars and all vars (For now include all variables)
    vars_dict = o_files[0].variables
    # Remove the excluded variables (specified in json file) from variable dictionary
    if ex_varlist:
    for i in ex_varlist:
            if i in vars_dict:
           del vars_dict[i]
    num_vars = len(vars_dict)
    if (verbose == True):
        print('Number of variables (including metadata) found =  ', num_vars)
    str_size = 0

    d2_var_names = []
    d3_var_names = []
    num_2d = 0
    num_3d = 0

    # Which are 2d, which are 3d and max str_size
    for k,v in vars_dict.iteritems():
        var = k
        vd = v.dimensions # all the variable's dimensions (names)
        vr = v.rank # num dimension
        vs = v.shape # dim values
        is_2d = False
        is_3d = False
        if (is_SE == True): # (time, lev, ncol) or (time, ncol)
        if ((vr == 2) and (vs[1] == ncol)):
        is_2d = True
        num_2d += 1
        elif ((vr == 3) and (vs[2] == ncol and vs[1] == nlev )):
        is_3d = True
        num_3d += 1
        else: # (time, lev, nlon, nlon) or (time, nlat, nlon)
            if ((vr == 3) and (vs[1] == nlat and vs[2] == nlon)):
                is_2d = True
                num_2d += 1
            elif ((vr == 4) and (vs[2] == nlat and vs[3] == nlon and vs[1] == nlev )):
                is_3d = True
                num_3d += 1
        if (is_3d == True) :
            str_size = max(str_size, len(k))
            d3_var_names.append(k)
        elif  (is_2d == True):
            str_size = max(str_size, len(k))
            d2_var_names.append(k)


    # Now sort these and combine (this sorts caps first, then lower case -
    # which is what we want)
    d2_var_names.sort()
    d3_var_names.sort()


    # All vars is 3d vars first (sorted), the 2d vars
    all_var_names = list(d3_var_names)
    all_var_names += d2_var_names
    n_all_var_names = len(all_var_names)

    if (verbose == True):
        print('num vars = ', n_all_var_names, '(3d = ', num_3d, ' and 2d = ', num_2d, ")")

    # Create new summary ensemble file
    this_sumfile = opts_dict["sumfile"]

    if (verbose == True):
        print("Creating ", this_sumfile, "  ...")
    if(me.get_rank() ==0 | opts_dict["popens"]):
    if os.path.exists(this_sumfile):
        os.unlink(this_sumfile)

    opt = Nio.options()
    opt.PreFill = False
    opt.Format = 'NetCDF4Classic'
    nc_sumfile = Nio.open_file(this_sumfile, 'w', options=opt)

    # Set dimensions
    if (verbose == True):
        print("Setting dimensions .....")
    if (is_SE == True):
        nc_sumfile.create_dimension('ncol', ncol)
    else:
        nc_sumfile.create_dimension('nlat', nlat)
        nc_sumfile.create_dimension('nlon', nlon)
    nc_sumfile.create_dimension('nlev', nlev)
    nc_sumfile.create_dimension('ens_size', esize)
    nc_sumfile.create_dimension('nvars', num_3d + num_2d)
    nc_sumfile.create_dimension('nvars3d', num_3d)
    nc_sumfile.create_dimension('nvars2d', num_2d)
    nc_sumfile.create_dimension('str_size', str_size)

    # Set global attributes
    now = time.strftime("%c")
    if (verbose == True):
        print("Setting global attributes .....")
    setattr(nc_sumfile, 'creation_date',now)
    setattr(nc_sumfile, 'title', 'CAM verification ensemble summary file')
    setattr(nc_sumfile, 'tag', opts_dict["tag"])
    setattr(nc_sumfile, 'compset', opts_dict["compset"])
    setattr(nc_sumfile, 'resolution', opts_dict["res"])
    setattr(nc_sumfile, 'machine', opts_dict["mach"])

    # Create variables
    if (verbose == True):
        print("Creating variables .....")
    v_lev = nc_sumfile.create_variable("lev", 'f', ('nlev',))
    v_vars = nc_sumfile.create_variable("vars", 'S1', ('nvars', 'str_size'))
    v_var3d = nc_sumfile.create_variable("var3d", 'S1', ('nvars3d', 'str_size'))
    v_var2d = nc_sumfile.create_variable("var2d", 'S1', ('nvars2d', 'str_size'))
        if not opts_dict['gmonly']:
        if (is_SE == True):
        v_ens_avg3d = nc_sumfile.create_variable("ens_avg3d", 'f', ('nvars3d', 'nlev', 'ncol'))
        v_ens_stddev3d = nc_sumfile.create_variable("ens_stddev3d", 'f', ('nvars3d', 'nlev', 'ncol'))
        v_ens_avg2d = nc_sumfile.create_variable("ens_avg2d", 'f', ('nvars2d', 'ncol'))
        v_ens_stddev2d = nc_sumfile.create_variable("ens_stddev2d", 'f', ('nvars2d', 'ncol'))
        else:
        v_ens_avg3d = nc_sumfile.create_variable("ens_avg3d", 'f', ('nvars3d', 'nlev', 'nlat', 'nlon'))
        v_ens_stddev3d = nc_sumfile.create_variable("ens_stddev3d", 'f', ('nvars3d', 'nlev', 'nlat', 'nlon'))
        v_ens_avg2d = nc_sumfile.create_variable("ens_avg2d", 'f', ('nvars2d', 'nlat', 'nlon'))
        v_ens_stddev2d = nc_sumfile.create_variable("ens_stddev2d", 'f', ('nvars2d', 'nlat', 'nlon'))

        v_RMSZ = nc_sumfile.create_variable("RMSZ", 'f', ('nvars', 'ens_size'))
    v_gm = nc_sumfile.create_variable("global_mean", 'f', ('nvars', 'ens_size'))
    v_loadings_gm = nc_sumfile.create_variable('loadings_gm','f',('nvars','nvars'))
    v_mu_gm = nc_sumfile.create_variable('mu_gm','f',('nvars',))
    v_sigma_gm = nc_sumfile.create_variable('sigma_gm','f',('nvars',))
    v_sigma_scores_gm = nc_sumfile.create_variable('sigma_scores_gm','f',('nvars',))


    # Assign vars, var3d and var2d
    if (verbose == True):
        print("Assigning vars, var3d, and var2d .....")

    eq_all_var_names =[]
    eq_d3_var_names = []
    eq_d2_var_names = []

    l_eq = len(all_var_names)
    for i in range(l_eq):
        tt = list(all_var_names[i])
        l_tt = len(tt)
        if (l_tt < str_size):
        extra = list(' ')*(str_size - l_tt)
        tt.extend(extra)
        eq_all_var_names.append(tt)

    l_eq = len(d3_var_names)
    for i in range(l_eq):
        tt = list(d3_var_names[i])
        l_tt = len(tt)
        if (l_tt < str_size):
        extra = list(' ')*(str_size - l_tt)
        tt.extend(extra)
        eq_d3_var_names.append(tt)

    l_eq = len(d2_var_names)
    for i in range(l_eq):
        tt = list(d2_var_names[i])
        l_tt = len(tt)
        if (l_tt < str_size):
        extra = list(' ')*(str_size - l_tt)
        tt.extend(extra)
        eq_d2_var_names.append(tt)

    v_vars[:] = eq_all_var_names[:]
    v_var3d[:] = eq_d3_var_names[:]
    v_var2d[:] = eq_d2_var_names[:]

    # Time-invarient metadata
    if (verbose == True):
        print("Assigning time invariant metadata .....")
    lev_data = vars_dict["lev"]
    v_lev = lev_data

    # Form ensembles, each missing one member; compute RMSZs and global means
    #for each variable, we also do max norm also (currently done in pyStats)
    tslice = opts_dict['tslice']

    if not opts_dict['cumul']:
        # Partition the var list
        var3_list_loc=me.partition(d3_var_names,func=EqualStride(),involved=True)
        var2_list_loc=me.partition(d2_var_names,func=EqualStride(),involved=True)
    else:
        var3_list_loc=d3_var_names
        var2_list_loc=d2_var_names

    # Calculate global means #
    if (verbose == True):
        print("Calculating global means .....")
    if not opts_dict['cumul']:
        gm3d,gm2d = pyEnsLib.generate_global_mean_for_summary(o_files,var3_list_loc,var2_list_loc , is_SE, False,opts_dict)
    if (verbose == True):
        print("Finish calculating global means .....")

    # Calculate RMSZ scores
    if (verbose == True):
        print("Calculating RMSZ scores .....")
    if (not opts_dict['gmonly']) | (opts_dict['cumul']):
        zscore3d,zscore2d,ens_avg3d,ens_stddev3d,ens_avg2d,ens_stddev2d,temp1,temp2=pyEnsLib.calc_rmsz(o_files,var3_list_loc,var2_list_loc,is_SE,opts_dict)

    # Calculate max norm ensemble
    if opts_dict['maxnorm']:
    if (verbose == True):
        print("Calculating max norm of ensembles .....")
    pyEnsLib.calculate_maxnormens(opts_dict,var3_list_loc)
    pyEnsLib.calculate_maxnormens(opts_dict,var2_list_loc)

    if opts_dict['mpi_enable'] & ( not opts_dict['popens']):

        if not opts_dict['cumul']:
        # Gather the 3d variable results from all processors to the master processor
        slice_index=get_stride_list(len(d3_var_names),me)

        # Gather global means 3d results
        gm3d=gather_npArray(gm3d,me,slice_index,(len(d3_var_names),len(o_files)))

        if not opts_dict['gmonly']:
        # Gather zscore3d results
        zscore3d=gather_npArray(zscore3d,me,slice_index,(len(d3_var_names),len(o_files)))

        # Gather ens_avg3d and ens_stddev3d results
        shape_tuple3d=get_shape(ens_avg3d.shape,len(d3_var_names),me.get_rank())
        ens_avg3d=gather_npArray(ens_avg3d,me,slice_index,shape_tuple3d)
        ens_stddev3d=gather_npArray(ens_stddev3d,me,slice_index,shape_tuple3d)

        # Gather 2d variable results from all processors to the master processor
        slice_index=get_stride_list(len(d2_var_names),me)

        # Gather global means 2d results
        gm2d=gather_npArray(gm2d,me,slice_index,(len(d2_var_names),len(o_files)))

        if not opts_dict['gmonly']:
        # Gather zscore2d results
        zscore2d=gather_npArray(zscore2d,me,slice_index,(len(d2_var_names),len(o_files)))

        # Gather ens_avg3d and ens_stddev2d results
        shape_tuple2d=get_shape(ens_avg2d.shape,len(d2_var_names),me.get_rank())
        ens_avg2d=gather_npArray(ens_avg2d,me,slice_index,shape_tuple2d)
        ens_stddev2d=gather_npArray(ens_stddev2d,me,slice_index,shape_tuple2d)

        else:
        gmall=np.concatenate((temp1,temp2),axis=0)
            gmall=pyEnsLib.gather_npArray_pop(gmall,me,(me.get_size(),len(d3_var_names)+len(d2_var_names)))
    # Assign to file:
    if me.get_rank() == 0 | opts_dict['popens'] :
        if not opts_dict['cumul']:
        gmall=np.concatenate((gm3d,gm2d),axis=0)
        if not opts_dict['gmonly']:
        Zscoreall=np.concatenate((zscore3d,zscore2d),axis=0)
        v_RMSZ[:,:]=Zscoreall[:,:]
        if not opts_dict['gmonly']:
        if (is_SE == True):
            v_ens_avg3d[:,:,:]=ens_avg3d[:,:,:]
            v_ens_stddev3d[:,:,:]=ens_stddev3d[:,:,:]
            v_ens_avg2d[:,:]=ens_avg2d[:,:]
            v_ens_stddev2d[:,:]=ens_stddev2d[:,:]
        else:
            v_ens_avg3d[:,:,:,:]=ens_avg3d[:,:,:,:]
            v_ens_stddev3d[:,:,:,:]=ens_stddev3d[:,:,:,:]
            v_ens_avg2d[:,:,:]=ens_avg2d[:,:,:]
            v_ens_stddev2d[:,:,:]=ens_stddev2d[:,:,:]
        else:
            gmall_temp=np.transpose(gmall[:,:])
            gmall=gmall_temp
    mu_gm,sigma_gm,standardized_global_mean,loadings_gm,scores_gm=pyEnsLib.pre_PCA(gmall)
    v_gm[:,:]=gmall[:,:]
    v_mu_gm[:]=mu_gm[:]
    v_sigma_gm[:]=sigma_gm[:].astype(np.float32)
    v_loadings_gm[:,:]=loadings_gm[:,:]
    v_sigma_scores_gm[:]=scores_gm[:]

    print("All Done")

def get_cumul_filelist(opts_dict,indir,regx):
   if not opts_dict['indir']:
      print('input dir is not specified')
      sys.exit(2)
   #regx='(pgi(.)*-(01|02))'
   regx_list=["mon","gnu","pgi"]
   all_files=[]
   for prefix in regx_list:
       for i in range(opts_dict['fIndex'],opts_dict['fIndex']+opts_dict['esize']/3):
       for j in range(opts_dict['startMon'],opts_dict['endMon']+1):
           mon_str=str(j).zfill(2)
           regx='(^'+prefix+'(.)*'+str(i)+'(.)*-('+mon_str+'))'
           print('regx=',regx)
           res=[f for f in os.listdir(indir) if re.search(regx,f)]
           in_files=sorted(res)
           all_files.extend(in_files)
   print("all_files=",all_files)
   #in_files=res
   return all_files





#
# Get the shape of all variable list in tuple for all processor
#
def get_shape(shape_tuple,shape1,rank):
    lst=list(shape_tuple)
    lst[0]=shape1
    shape_tuple=tuple(lst)
    return shape_tuple

#
# Get the mpi partition list for each processor
#
def get_stride_list(len_of_list,me):
    slice_index=[]
    for i in range(me.get_size()):
    index_arr=np.arange(len_of_list)
    slice_index.append(index_arr[i::me.get_size()])
    return slice_index

#
# Gather arrays from each processor by the var_list to the master processor and make it an array
#
def gather_npArray(npArray,me,slice_index,array_shape):
    the_array=np.zeros(array_shape,dtype=np.float32)
    if me.get_rank()==0:
    k=0
    for j in slice_index[me.get_rank()]:
         the_array[j,:]=npArray[k,:]
         k=k+1
    for i in range(1,me.get_size()):
    if me.get_rank() == 0:
        rank,npArray=me.collect()
        k=0
        for j in slice_index[rank]:
        the_array[j,:]=npArray[k,:]
        k=k+1
    if me.get_rank() != 0:
    message={"from_rank":me.get_rank(),"shape":npArray.shape}
    me.collect(npArray)
    me.sync()
    return the_array

if __name__ == "__main__":
    main(sys.argv[1:])