Ejemplo n.º 1
0
 def setUp(self):
     #print 'Creating temporary file: ', filename
     self.filename = tempfile.mktemp(prefix="test_", suffix=".nc")
     do_setup(self.filename)
     opt = Nio.options()
     opt.MaskedArrayMode = 'MaskedAlways'
     self.f = Nio.open_file(self.filename, options=opt)
Ejemplo n.º 2
0
    def create_file(self, new_file_name, ncformat, hist_string=None):
        """
        Create a NetCDF file for writing.

        Parameters:
            new_file_name (str): Name, including full path, of the new
                                 NetCDF file to create.
            ncformat (str): Type of NetCDF file to create.
                            Options:
                            'netcdf4c': NetCDF4 with Level 1 compression
                            'NetCDF4Classic': NetCDF4 Classic
                            'Classic': NetCDF3
                            'netcdfLarge': NetCDF 64bit Offset
            hist_string (str): Optional.  A string to append to the histroy attribute.

        Returns:
            new_file (NioFile): A pointer to a NioFile object.
        """
        # Set pyNIO netcdf file options
        opt = Nio.options()
        # The netcdf output format
        if "netcdf4c" in ncformat:
            opt.Format = "NetCDF4Classic"
            if ncformat[-1].isdigit():
                opt.CompressionLevel = ncformat[-1]
        elif ncformat == "netcdf4":
            opt.Format = "NetCDF4Classic"
        elif ncformat == "netcdf":
            opt.Format = "Classic"
        elif ncformat == "netcdfLarge":
            opt.Format = "64BitOffset"
        else:
            print(
                "WARNING: Selected netcdf file format (",
                ncformat,
                ") is not recongnized.",
            )
            print("Defaulting to netcdf4Classic format.")
            opt.Format = "NetCDF4Classic"
        opt.PreFill = False
        if hist_string is None:
            hist_string = "clim-convert" + new_file_name
        # Open new output file
        new_file = Nio.open_file(new_file_name, "w", options=opt, history=hist_string)

        return new_file
Ejemplo n.º 3
0
    def test_large(self):
        #
        # Creating a file
        #
        #init_time = time.clock()
        opt = Nio.options()
        opt.Format = "LargeFile"
        opt.PreFill = False
        f = Nio.open_file(self.filename, 'w', options=opt)

        f.title = "Testing large files and dimensions"

        f.create_dimension('big', 2500000000)

        bigvar = f.create_variable('bigvar', "b", ('big',))
        #print("created bigvar")
        # note it is incredibly slow to write a scalar to a large file variable
        # so create an temporary variable x that will get assigned in steps

        x = np.empty(1000000,dtype = 'int8')
        #print x
        x[:] = 42
        t = list(range(0,2500000000,1000000))
        ii = 0
        for i in t:
           if (i == 0):
            continue
           #print(t[ii],i)
           bigvar[t[ii]:i] = x[:]
           ii += 1
        x[:] = 84
        bigvar[2499000000:2500000000] = x[:]

        bigvar[-1] = 84
        bigvar.units = "big var units"
        #print bigvar[-1]
        #print(bigvar.dimensions)

        # check unlimited status

        #print(f)
        nt.assert_equal(bigvar.dimensions, ('big',))
        nt.assert_equal(f.unlimited('big'), False)
        nt.assert_equal(f.attributes, {'title': 'Testing large files and dimensions'})
        nt.assert_equal(f.dimensions, {'big': 2500000000})
        nt.assert_equal(list(f.variables.keys()), ['bigvar'])
        #print("closing file")
        #print('elapsed time: ',time.clock() - init_time)
        f.close()
        #quit()
        #
        # Reading a file
        #
        #print('opening file for read')
        #print('elapsed time: ',time.clock() - init_time)
        f = Nio.open_file(self.filename, 'r')

        #print('file is open')
        #print('elapsed time: ',time.clock() - init_time)
        nt.assert_equal(f.attributes, {'title': 'Testing large files and dimensions'})
        nt.assert_equal(f.dimensions, {'big': 2500000000})
        nt.assert_equal(list(f.variables.keys()), ['bigvar'])
        #print(f.dimensions)
        #print(list(f.variables.keys()))
        #print(f)
        #print("reading variable")
        #print('elapsed time: ',time.clock() - init_time)
        x = f.variables['bigvar']
        #print(x[0],x[1000000],x[249000000],x[2499999999])
        nt.assert_equal((x[0],x[1000000],x[249000000],x[2499999999]), (42, 42, 42, 84))
        #print("max and min")
        min = x[:].min()
        max = x[:].max()
        nt.assert_equal((x[:].min(), x[:].max()), (42, 84))

        # check unlimited status
        nt.assert_equal(f.variables['bigvar'].dimensions, ('big',))
        nt.assert_equal(f.unlimited('big'), False)

        #print("closing file")
        #print('elapsed time: ',time.clock() - init_time)
        f.close()
Ejemplo n.º 4
0
    def __init__(self, specifier, serial=False, verbosity=1,
                 skip_existing=False, overwrite=False,
                 once=False, simplecomm=None):
        """
        Constructor

        Parameters:
            specifier (Specifier): An instance of the Specifier class, 
                defining the input specification for this reshaper operation.

        Keyword Arguments:
            serial (bool): True or False, indicating whether the operation
                should be performed in serial (True) or parallel
                (False).  The default is to assume parallel operation
                (but serial will be chosen if the mpi4py cannot be
                found when trying to initialize decomposition.
            verbosity(int): Level of printed output (stdout).  A value of 0 
                means no output, and a higher value means more output.  The
                default value is 1.
            skip_existing (bool): Flag specifying whether to skip the generation
                of time-series for variables with time-series files that already
                exist.  Default is False.
            overwrite (bool): Flag specifying whether to forcefully overwrite
                output files if they already exist.  Default is False.
            once (bool): True or False, indicating whether the Reshaper should
                write all metadata to a 'once' file (separately).
            simplecomm (SimpleComm): A SimpleComm object to handle the parallel 
                communication, if necessary
        """

        # Type checking (or double-checking)
        if not isinstance(specifier, Specifier):
            err_msg = "Input must be given in the form of a Specifier object"
            raise TypeError(err_msg)
        if type(serial) is not bool:
            err_msg = "Serial indicator must be True or False."
            raise TypeError(err_msg)
        if type(verbosity) is not int:
            err_msg = "Verbosity level must be an integer."
            raise TypeError(err_msg)
        if type(skip_existing) is not bool:
            err_msg = "Skip_existing flag must be True or False."
            raise TypeError(err_msg)
        if type(once) is not bool:
            err_msg = "Once-file indicator must be True or False."
            raise TypeError(err_msg)
        if simplecomm is not None:
            if not (isinstance(simplecomm, SimpleComm) or \
                    isinstance(simplecomm, SimpleCommMPI)):
                err_msg = ("Simple communicator object is not a SimpleComm or ",
                           "SimpleCommMPI")
                raise TypeError(err_msg)

        # Whether to write a once file
        self._use_once_file = once

        # Internal timer data
        self._timer = TimeKeeper()

        # Dictionary storing read/write data amounts
        self.assumed_block_size = float(4 * 1024 * 1024)
        self._byte_counts = {}

        self._timer.start('Initializing Simple Communicator')
        if simplecomm is None:
            simplecomm = create_comm(serial=serial)
        # Reference to the simple communicator
        self._simplecomm = simplecomm
        self._timer.stop('Initializing Simple Communicator')

        # Contruct the print header
        header = ''.join(['[', str(self._simplecomm.get_rank()),
                          '/', str(self._simplecomm.get_size()), '] '])

        # Reference to the verbose printer tool
        self._vprint = VPrinter(header=header, verbosity=verbosity)

        # Debug output starting
        if self._simplecomm.is_manager():
            self._vprint('Initializing Reshaper', verbosity=1)

        # Validate the user input data
        self._timer.start('Specifier Validation')
        specifier.validate()
        self._timer.stop('Specifier Validation')
        if self._simplecomm.is_manager():
            self._vprint('Specifier validated', verbosity=1)

        # Setup PyNIO options (including disabling the default PreFill option)
        opt = Nio.options()
        opt.PreFill = False

        # Determine the Format and CompressionLevel options
        # from the NetCDF format string in the Specifier
        if specifier.netcdf_format == 'netcdf':
            opt.Format = 'Classic'
        elif specifier.netcdf_format == 'netcdf4':
            opt.Format = 'NetCDF4Classic'
            opt.CompressionLevel = 0
        elif specifier.netcdf_format == 'netcdf4c':
            opt.Format = 'NetCDF4Classic'
            opt.CompressionLevel = specifier.netcdf_deflate
            if self._simplecomm.is_manager():
                self._vprint('PyNIO compression level: {0}'.format(\
                    specifier.netcdf_deflate), verbosity=2)

        self._nio_options = opt
        if self._simplecomm.is_manager():
            self._vprint('PyNIO options set', verbosity=2)

        # Open all of the input files
        self._timer.start('Open Input Files')
        self._input_files = []
        for filename in specifier.input_file_list:
            self._input_files.append(Nio.open_file(filename, "r"))
        self._timer.stop('Open Input Files')
        if self._simplecomm.is_manager():
            self._vprint('Input files opened', verbosity=2)

        # Validate the input files themselves
        self._timer.start('Input File Validation')
        self._validate_input_files(specifier)
        self._timer.stop('Input File Validation')
        if self._simplecomm.is_manager():
            self._vprint('Input files validated', verbosity=2)

        # Sort the input files by time
        self._timer.start('Sort Input Files')
        self._sort_input_files_by_time(specifier)
        self._timer.stop('Sort Input Files')
        if self._simplecomm.is_manager():
            self._vprint('Input files sorted', verbosity=2)

        # Retrieve and sort the variables in each time-slice file
        # (To determine if it is time-invariant metadata, time-variant
        # metadata, or if it is a time-series variable)
        self._timer.start('Sort Variables')
        self._sort_variables(specifier)
        self._timer.stop('Sort Variables')
        if self._simplecomm.is_manager():
            self._vprint('Variables sorted', verbosity=2)

        # Validate the output files
        self._timer.start('Output File Validation')
        self._validate_output_files(specifier, skip_existing, overwrite)
        self._timer.stop('Output File Validation')
        if self._simplecomm.is_manager():
            self._vprint('Output files validated', verbosity=2)

        # Helpful debugging message
        if self._simplecomm.is_manager():
            self._vprint('Reshaper initialized.', verbosity=1)

        # Sync before continuing..
        self._simplecomm.sync()
Ejemplo n.º 5
0
 def setUp(self):
     do_setup(filename)
     opt = Nio.options()
     opt.UseAxisAttribute = True
     self.f = Nio.open_file(filename, options=opt)
Ejemplo n.º 6
0
 def setUp(self):
     # print 'Creating temporary file: ', filename
     do_setup(filename)
     opt = Nio.options()
     opt.MaskedArrayMode = "MaskedAlways"
     self.f = Nio.open_file(filename, options=opt)
Ejemplo n.º 7
0
 def setUp(self):
     do_setup(filename)
     opt = Nio.options()
     opt.MaskedArrayMode = "MaskedExplicit"
     self.f = Nio.open_file(filename, options=opt)
Ejemplo n.º 8
0
 def setUp(self):
     self.filename = tempfile.mktemp(prefix="test_", suffix=".nc")
     do_setup(self.filename)
     opt = Nio.options()
     opt.MaskedArrayMode = 'MaskedExplicit'
     self.f = Nio.open_file(self.filename, options=opt)
Ejemplo n.º 9
0
    def open_new_file(self,
                      file_name,
                      z_values=numpy.arange(10),
                      z_units='m',
                      var_names=['X'],
                      long_names=[None],
                      units_names=['None'],
                      dtypes=['float64'],
                      time_units='minutes',
                      comment=''):

        #----------------------------------------------------
        # Notes: It might be okay to have "nz" be an
        #        unlimited dimension, like "time".  This
        #        would mean replacing "int(profile_length)"
        #        with "None".
        #----------------------------------------------------

        #--------------------------------------------------
        # Try to import the Nio module from PyNIO package
        #--------------------------------------------------
        Nio = self.import_nio()
        if not (Nio): return False

        #----------------------------
        # Does file already exist ?
        #----------------------------
        file_name = file_utils.check_overwrite(file_name)
        self.file_name = file_name

        #---------------------------------------
        # Check and store the time series info
        #---------------------------------------
        self.format = 'ncps'
        self.file_name = file_name
        self.time_index = 0
        if (long_names[0] is None):
            long_names = var_names
        #-------------------------------------------
        self.z_values = z_values
        self.z_units = z_units
        nz = numpy.size(z_values)
        #-------------------------------------------
        # We may not need to save these in self.
        # I don't think they're used anywhere yet.
        #-------------------------------------------
        self.var_names = var_names
        self.long_names = long_names
        self.units_names = units_names
        self.dtypes = dtypes

        #---------------------------------------------
        # Create array of Nio type codes from dtypes
        #---------------------------------------------
        nio_type_map = self.get_nio_type_map()
        nio_type_codes = []
        if (len(dtypes) == len(var_names)):
            for dtype in dtypes:
                nio_type_code = nio_type_map[dtype.lower()]
                nio_type_codes.append(nio_type_code)
        else:
            dtype = dtypes[0]
            nio_type_code = nio_type_map[dtype.lower()]
            for k in xrange(len(var_names)):
                nio_type_codes.append(nio_type_code)
        self.nio_type_codes = nio_type_codes

        #-------------------------------------
        # Open a new netCDF file for writing
        #-------------------------------------
        # Sample output from time.asctime():
        #     "Thu Oct  8 17:10:18 2009"
        #-------------------------------------
        opt = Nio.options()
        opt.PreFill = False  # (for efficiency)
        opt.HeaderReserveSpace = 4000  # (4000 bytes, for efficiency)
        history = "Created using PyNIO " + Nio.__version__ + " on "
        history = history + time.asctime() + ". "
        history = history + comment

        try:
            ncps_unit = Nio.open_file(file_name,
                                      mode="w",
                                      options=opt,
                                      history=history)
            OK = True
        except:
            OK = False
            return OK

        #------------------------------------------------
        # Create an unlimited time dimension (via None)
        #------------------------------------------------
        # Without using "int()" here, we get this:
        #     TypeError: size must be None or integer
        #------------------------------------------------
        ncps_unit.create_dimension("nz", int(nz))
        ncps_unit.create_dimension("time", None)

        #-------------------------
        # Create a time variable
        #---------------------------------------------------
        #('f' = float32; must match in add_values_at_IDs()
        #---------------------------------------------------
        # NB! Can't use "time" vs. "tvar" here unless we
        #     add "import time" inside this function.
        #---------------------------------------------------
        tvar = ncps_unit.create_variable('time', 'd', ("time", ))
        ncps_unit.variables['time'].units = time_units

        #--------------------------------------
        # Create a distance/depth variable, z
        #--------------------------------------
        zvar = ncps_unit.create_variable('z', 'd', ("nz", ))
        zvar[:] = z_values  # (store the z-values)
        ncps_unit.variables['z'].units = z_units

        #-----------------------------------
        # Create variables using var_names
        #-----------------------------------
        # Returns "var" as a PyNIO object
        #---------------------------------------------------
        # NB! The 3rd argument here (dimension), must be a
        #     tuple.  If there is only one dimension, then
        #     we need to add a comma, as shown.
        #---------------------------------------------------
        for k in xrange(len(var_names)):
            var_name = var_names[k]
            var = ncps_unit.create_variable(var_name, nio_type_codes[k],
                                            ("time", "nz"))

            #------------------------------------
            # Create attributes of the variable
            #------------------------------------
            ncps_unit.variables[var_name].long_name = long_names[k]
            ncps_unit.variables[var_name].units = units_names[k]

            #----------------------------------
            # Specify a "nodata" fill value ?
            #----------------------------------
            var._FillValue = -9999.0  ## Does this jive with Prefill above ??

        self.ncps_unit = ncps_unit
        return OK
Ejemplo n.º 10
0
def var_nc2d(parameters=['AREA', 'HEFF'],
             ofile='MIT_output_2d',
             bswap=1,
             sstart_date="seconds since 2002-10-01 07:00",
             deltaT=1800,
             FillValue=-1.0e+23,
             dump='no'):
    '''
	Convert 2d fields produced by MITgcm to netCDF format with use of Nio module. 
	Names of the files should be defined in form of the list, even if we have only one variable.

	I put everything on the C grid!
	
	You have to have following files in the the directory where you run your code:

	XC.data
	XC.meta
	YC.data
	YC.meta
	maskCtrlC.data
	maskCtrlC.meta
	
	Input:
	    parameters		- list with names of the variables (like AREA or AREAtave).
	    ofile 		- name of the output file.
	    bswap       	- do we need a byte swap? Yes (1) or no (0) [default 1]
	    sstart_date		- should be "seconds since", [default "seconds since 2002-10-01 07:00"
	    deltaT		- time step in seconds
	    FillValue		- missing value
	    dump 		- if dump='yes' will return numpy array with data
	'''
    lon = mitbin2('XC.data', bswap)[0, 0, :, :]
    lat = mitbin2('YC.data', bswap)[0, 0, :, :]
    lsmask = mitbin2('maskCtrlC.data', bswap)[:, 0, :, :]
    fileList = glob.glob(parameters[0] + "*.data")

    if os.path.exists(ofile + ".nc") == True:
        os.system("rm " + ofile + ".nc")

    ndim, xdim, ydim, zdim, datatype, nrecords, timeStepNumber = rmeta(
        fileList[0][:-4] + "meta")

    ttime = numpy.zeros((len(fileList)))
    #ttime[0] = timeStepNumber*deltaT

    opt = Nio.options()
    opt.PreFill = False
    opt.HeaderReserveSpace = 4000
    f = Nio.open_file(ofile + ".nc", "w", opt)

    f.title = "MITgcm variables in netCDF format"
    f.create_dimension('x', xdim)
    f.create_dimension('y', ydim)
    f.create_dimension('time', ttime.shape[0])

    f.create_variable('time', 'd', ('time', ))
    f.variables['time'].units = sstart_date

    f.create_variable('latitude', 'd', ('x', 'y'))
    f.variables['latitude'].long_name = "latitude"
    f.variables['latitude'].units = "degrees_north"
    f.variables['latitude'].standard_name = "grid_latitude"
    f.variables['latitude'][:] = lat[:]

    f.create_variable('longitude', 'd', ('x', 'y'))
    f.variables['longitude'].long_name = "longitude"
    f.variables['longitude'].units = "degrees_east"
    f.variables['longitude'].standard_name = "grid_longitude"
    f.variables['longitude'][:] = lon[:]

    for parameter in parameters:

        f.create_variable(parameter, 'd', ('time', 'x', 'y'))

        f.variables[parameter].long_name = gatrib(parameter)[0]
        f.variables[parameter].units = gatrib(parameter)[1]
        f.variables[parameter]._FillValue = FillValue
        f.variables[parameter].missing_value = FillValue

        adatemp_final = numpy.zeros((len(fileList), xdim, ydim))

        iterator = 0
        for fileName in fileList:

            adatemp = mitbin2(parameter + fileName[-16:], bswap=bswap)[0,
                                                                       0, :, :]
            ndim, xdim, ydim, zdim, datatype, nrecords, timeStepNumber = rmeta(
                fileName[:-4] + "meta")
            adatemp = numpy.where(adatemp[:] < -1.0e+20, FillValue, adatemp[:])
            adatemp = numpy.where(lsmask[:] == 0, FillValue, adatemp[:])
            adatemp_final[iterator, :, :] = adatemp
            ttime[iterator] = timeStepNumber * deltaT
            iterator = iterator + 1

        f.variables[parameter][:] = adatemp_final

    f.variables['time'][:] = ttime
    f.close()
    if dump == 'yes':
        return adatemp
Ejemplo n.º 11
0
def nc3d(parameters=['adxx_atemp'],
         ofile='adxx',
         iteration='0',
         bswap=1,
         sstart_date="seconds since 2002-10-01 07:00",
         deltaT=1200,
         xx_period=240000.0,
         FillValue=-1.0e+23,
         meta=None,
         dump="no"):
    '''
	Convert 3d fields from adxx* and xx* fles to netCDF format with use of Nio module.
	Names of the files should be defined in form of the list, even if we have only one variable.

	I put everything on the C grid!
	
	You have to have following files in the the directory where you run your code:

	XC.data
	XC.meta
	YC.data
	YC.meta
	DRC.data
	DRC.meta
	maskCtrlC.data
	maskCtrlC.meta
	
	Input:
	    parameters		- list with names of the variables.
	    ofile 		- name of the output file.
	    iteration		- iteration of optimisation, should be STRING!
	    bswap       	- do we need a byte swap? Yes (1) or no (0) [default 1]
	    sstart_date		- should be "seconds since", [default "seconds since 2002-10-01 07:00"
	    deltaT		- time step in seconds
	    xx_period		- xx_*period
	    FillValue		- missing value
	    meta		- flag to fix problem with wrong adxx*.meta files. 
				  If meta = 'xx', use .meta file from xx files 
	    dump 		- if dump='yes' will return numpy array with data
	'''
    lon = mitbin2('XC.data', bswap)[0, 0, :, :]
    lat = mitbin2('YC.data', bswap)[0, 0, :, :]
    lev = mitbin2('DRC.data', bswap)[0, :, 0, 0]
    lev = numpy.cumsum(lev)
    lsmask = mitbin2('maskCtrlC.data', bswap)[:, :, :, :]

    if os.path.exists(ofile + ".nc") == True:
        os.system("rm " + ofile + ".nc")

    if meta == None:
        ndim, xdim, ydim, zdim, datatype, nrecords, timeStepNumber = rmeta(
            parameters[0] + "." + iteration.zfill(10) + ".meta")
    elif meta == 'xx':
        ndim, xdim, ydim, zdim, datatype, nrecords, timeStepNumber = rmeta(
            parameters[0][2:] + "." + iteration.zfill(10) + ".meta")
    if nrecords == 1:
        ttime = numpy.zeros((nrecords))
        ttime[0] = timeStepNumber * deltaT
    elif nrecords > 1:
        ttime = numpy.zeros((nrecords))
        for i in range(nrecords):
            ttime[i] = xx_period * i

    opt = Nio.options()
    opt.PreFill = False
    opt.HeaderReserveSpace = 4000
    f = Nio.open_file(ofile + ".nc", "w", opt)

    f.title = "MITgcm variables in netCDF format"
    f.create_dimension('x', xdim)
    f.create_dimension('y', ydim)
    f.create_dimension('z', zdim)
    f.create_dimension('time', ttime.shape[0])

    f.create_variable('time', 'd', ('time', ))
    f.variables['time'].units = sstart_date
    f.variables['time'][:] = ttime

    f.create_variable('z', 'd', ('z', ))
    f.variables['z'].units = "meters"
    f.variables['z'][:] = lev[:]

    f.create_variable('latitude', 'd', ('x', 'y'))
    f.variables['latitude'].long_name = "latitude"
    f.variables['latitude'].units = "degrees_north"
    f.variables['latitude'].standard_name = "grid_latitude"
    f.variables['latitude'][:] = lat[:]

    f.create_variable('longitude', 'd', ('x', 'y'))
    f.variables['longitude'].long_name = "longitude"
    f.variables['longitude'].units = "degrees_east"
    f.variables['longitude'].standard_name = "grid_longitude"
    f.variables['longitude'][:] = lon[:]

    #vvariables = ["atemp","aqh", "uwind", "vwind", ]
    #vvariables = ["atemp"]

    for parameter in parameters:
        adatemp = mitbin2(parameter + "." + iteration.zfill(10) + ".data",
                          bswap=bswap,
                          meta=meta)[:, :, :, :]

        #	adatemp = numpy.where(adatemp[:] > 1.0e+12, 0, adatemp[:])
        adatemp = numpy.where(adatemp[:] < -1.0e+20, FillValue, adatemp[:])
        adatemp = numpy.where(lsmask[:] == 0, FillValue, adatemp[:])
        f.create_variable(parameter, 'd', ('time', 'z', 'x', 'y'))

        nname, unit, grid = gatrib(parameter)

        f.variables[parameter].long_name = nname
        f.variables[parameter].units = unit
        f.variables[parameter].grid = grid
        f.variables[parameter]._FillValue = FillValue
        #print(adatemp.shape())

        f.variables[parameter][:] = adatemp

    f.close()
    if dump == 'yes':
        return adatemp
Ejemplo n.º 12
0
def nc3d(parameters=['adxx_atemp'], ofile='adxx', iteration='0', bswap=1, sstart_date = "seconds since 2002-10-01 07:00", deltaT=1200, xx_period=240000.0, FillValue=-1.0e+23, meta=None, dump="no"):
	'''
	Convert 3d fields from adxx* and xx* fles to netCDF format with use of Nio module.
	Names of the files should be defined in form of the list, even if we have only one variable.

	I put everything on the C grid!
	
	You have to have following files in the the directory where you run your code:

	XC.data
	XC.meta
	YC.data
	YC.meta
	DRC.data
	DRC.meta
	maskCtrlC.data
	maskCtrlC.meta
	
	Input:
	    parameters		- list with names of the variables.
	    ofile 		- name of the output file.
	    iteration		- iteration of optimisation, should be STRING!
	    bswap       	- do we need a byte swap? Yes (1) or no (0) [default 1]
	    sstart_date		- should be "seconds since", [default "seconds since 2002-10-01 07:00"
	    deltaT		- time step in seconds
	    xx_period		- xx_*period
	    FillValue		- missing value
	    meta		- flag to fix problem with wrong adxx*.meta files. 
				  If meta = 'xx', use .meta file from xx files 
	    dump 		- if dump='yes' will return numpy array with data
	'''
	lon = mitbin2('XC.data',bswap)[0,0,:,:]
	lat = mitbin2('YC.data',bswap)[0,0,:,:]
	lev = mitbin2('DRC.data',bswap)[0,:,0,0]
	lev = numpy.cumsum(lev)
	lsmask = mitbin2('maskCtrlC.data',bswap)[:,:,:,:]
	
	if os.path.exists(ofile+".nc") == True:
		os.system("rm "+ofile+".nc")
	
	if meta == None:
		ndim, xdim, ydim, zdim, datatype, nrecords, timeStepNumber = rmeta(parameters[0]+"."+iteration.zfill(10)+".meta")
	elif meta == 'xx':
		ndim, xdim, ydim, zdim, datatype, nrecords, timeStepNumber = rmeta(parameters[0][2:]+"."+iteration.zfill(10)+".meta")
	if nrecords == 1:
		ttime = numpy.zeros((nrecords))
		ttime[0] = timeStepNumber*deltaT
	elif nrecords > 1:
		ttime = numpy.zeros((nrecords))
		for i in range (nrecords):
	    		ttime[i] = xx_period*i
    
	opt = Nio.options()
	opt.PreFill = False
	opt.HeaderReserveSpace = 4000
	f = Nio.open_file(ofile+".nc","w",opt)

	f.title = "MITgcm variables in netCDF format"
	f.create_dimension('x',xdim)
	f.create_dimension('y',ydim)
	f.create_dimension('z',zdim)
	f.create_dimension('time',ttime.shape[0])

	f.create_variable('time','d',('time',))
	f.variables['time'].units        = sstart_date 
	f.variables['time'][:] = ttime

	f.create_variable('z','d',('z',))
	f.variables['z'].units        = "meters" 
	f.variables['z'][:] =  lev[:]

	
	f.create_variable('latitude','d',('x','y'))
	f.variables['latitude'].long_name        = "latitude"
	f.variables['latitude'].units            = "degrees_north"
	f.variables['latitude'].standard_name    = "grid_latitude"
	f.variables['latitude'][:] = lat[:]

	f.create_variable('longitude','d',('x','y'))
	f.variables['longitude'].long_name        = "longitude"
	f.variables['longitude'].units            = "degrees_east"
	f.variables['longitude'].standard_name    = "grid_longitude"
	f.variables['longitude'][:] = lon[:]

	#vvariables = ["atemp","aqh", "uwind", "vwind", ]
	#vvariables = ["atemp"]

	for parameter in parameters:
		adatemp = mitbin2(parameter+"."+iteration.zfill(10)+".data", bswap=bswap, meta=meta)[:,:,:,:]

	#	adatemp = numpy.where(adatemp[:] > 1.0e+12, 0, adatemp[:])
		adatemp = numpy.where(adatemp[:] < -1.0e+20, FillValue, adatemp[:])
		adatemp = numpy.where(lsmask[:]==0, FillValue, adatemp[:])
       		f.create_variable(parameter,'d',('time','z','x','y'))
		
		nname, unit, grid = gatrib(parameter)
		
        	f.variables[parameter].long_name    = nname
        	f.variables[parameter].units        = unit
		f.variables[parameter].grid         = grid
        	f.variables[parameter]._FillValue   = FillValue
		#print(adatemp.shape())

	        f.variables[parameter][:] = adatemp
		
	f.close()
	if dump == 'yes':
		return adatemp
Ejemplo n.º 13
0
def var_nc3d(parameters=['Ttave'], ofile='MIT_output_3d', bswap=1, sstart_date = "seconds since 2002-10-01 07:00", deltaT=1200, FillValue=-1.0e+23, dump="no"):
	'''
	Convert 3d fields produced by MITgcm to netCDF format with use of Nio module. 
	Names of the files should be defined in form of the list, even if we have only one variable.

	I put everything on the C grid!
	
	You have to have following files in the the directory where you run your code:

	XC.data
	XC.meta
	YC.data
	YC.meta
	DRC.data
	DRC.meta
	maskCtrlC.data
	maskCtrlC.meta
	
	Input:
	    parameters		- list with names of the variables.
	    ofile 		- name of the output file.
	    iteration		- iteration of optimisation, should be STRING!
	    bswap       	- do we need a byte swap? Yes (1) or no (0) [default 1]
	    sstart_date		- should be "seconds since", [default "seconds since 2002-10-01 07:00"
	    deltaT		- time step in seconds
	    xx_period		- xx_*period
	    FillValue		- missing value
	    meta		- flag to fix problem with wrong adxx*.meta files. 
				  If meta = 'xx', use .meta file from xx files 
	    dump 		- if dump='yes' will return numpy array with data
	'''
	lon = mitbin2('XC.data',bswap)[0,0,:,:]
	lat = mitbin2('YC.data',bswap)[0,0,:,:]
	lev = mitbin2('DRC.data',bswap)[0,:,0,0]
	lev = numpy.cumsum(lev)
	lsmask = mitbin2('maskCtrlC.data',bswap)[:,:,:,:]
	
	fileList = glob.glob(parameters[0]+"*.data")
	
	if os.path.exists(ofile+".nc") == True:
		os.system("rm "+ofile+".nc")
	
	ndim, xdim, ydim, zdim, datatype, nrecords, timeStepNumber = rmeta(fileList[0][:-4]+"meta")

	ttime = numpy.zeros((len(fileList)))

    
	opt = Nio.options()
	opt.PreFill = False
	opt.HeaderReserveSpace = 4000
	f = Nio.open_file(ofile+".nc","w",opt)

	f.title = "MITgcm variables in netCDF format"
	f.create_dimension('x',xdim)
	f.create_dimension('y',ydim)
	f.create_dimension('z',zdim)
	f.create_dimension('time',ttime.shape[0])

	f.create_variable('time','d',('time',))
	f.variables['time'].units        = sstart_date 


	f.create_variable('z','d',('z',))
	f.variables['z'].units        = "meters" 
	f.variables['z'][:] =  lev[:]

	
	f.create_variable('latitude','d',('x','y'))
	f.variables['latitude'].long_name        = "latitude"
	f.variables['latitude'].units            = "degrees_north"
	f.variables['latitude'].standard_name    = "grid_latitude"
	f.variables['latitude'][:] = lat[:]

	f.create_variable('longitude','d',('x','y'))
	f.variables['longitude'].long_name        = "longitude"
	f.variables['longitude'].units            = "degrees_east"
	f.variables['longitude'].standard_name    = "grid_longitude"
	f.variables['longitude'][:] = lon[:]

	#vvariables = ["atemp","aqh", "uwind", "vwind", ]
	#vvariables = ["atemp"]

	for parameter in parameters:
		f.create_variable(parameter,'d',('time','z','x','y'))
		
		f.variables[parameter].long_name    = gatrib(parameter)[0]
        	f.variables[parameter].units        = gatrib(parameter)[1]
        	f.variables[parameter]._FillValue   = FillValue
		f.variables[parameter].missing_value = FillValue
	  
		adatemp_final = numpy.zeros((len(fileList), zdim, xdim, ydim))
		
		for ind, fileName in enumerate(fileList):
		  adatemp = mitbin2(parameter+fileName[-16:], bswap=bswap)[:,:,:,:]
		  ndim, xdim, ydim, zdim, datatype, nrecords, timeStepNumber = rmeta(fileName[:-4]+"meta")

		#	adatemp = numpy.where(adatemp[:] > 1.0e+12, 0, adatemp[:])
		  adatemp = numpy.where(adatemp[:] < -1.0e+20, FillValue, adatemp[:])
		  adatemp = numpy.where(lsmask[:]==0, FillValue, adatemp[:])
		  adatemp_final[ind,:,:,:] = adatemp
		  ttime[ind] = timeStepNumber*deltaT
		  

	        f.variables[parameter][:] = adatemp
		
	f.close()
	if dump == 'yes':
		return adatemp 
Ejemplo n.º 14
0
def var_nc2d(parameters=['AREA','HEFF'], ofile='MIT_output_2d', bswap=1, sstart_date = "seconds since 2002-10-01 07:00", deltaT=1800, FillValue=-1.0e+23, dump='no'):
	'''
	Convert 2d fields produced by MITgcm to netCDF format with use of Nio module. 
	Names of the files should be defined in form of the list, even if we have only one variable.

	I put everything on the C grid!
	
	You have to have following files in the the directory where you run your code:

	XC.data
	XC.meta
	YC.data
	YC.meta
	maskCtrlC.data
	maskCtrlC.meta
	
	Input:
	    parameters		- list with names of the variables (like AREA or AREAtave).
	    ofile 		- name of the output file.
	    bswap       	- do we need a byte swap? Yes (1) or no (0) [default 1]
	    sstart_date		- should be "seconds since", [default "seconds since 2002-10-01 07:00"
	    deltaT		- time step in seconds
	    FillValue		- missing value
	    dump 		- if dump='yes' will return numpy array with data
	'''
	lon = mitbin2('XC.data',bswap)[0,0,:,:]
	lat = mitbin2('YC.data',bswap)[0,0,:,:]
	lsmask = mitbin2('maskCtrlC.data',bswap)[:,0,:,:]
	fileList = glob.glob(parameters[0]+"*.data")
	
	if os.path.exists(ofile+".nc") == True:
		os.system("rm "+ofile+".nc")
	
	ndim, xdim, ydim, zdim, datatype, nrecords, timeStepNumber = rmeta(fileList[0][:-4]+"meta")
	
	
	ttime = numpy.zeros((len(fileList)))
	#ttime[0] = timeStepNumber*deltaT
	
	opt = Nio.options()
	opt.PreFill = False
	opt.HeaderReserveSpace = 4000
	f = Nio.open_file(ofile+".nc","w",opt)

	f.title = "MITgcm variables in netCDF format"
	f.create_dimension('x',xdim)
	f.create_dimension('y',ydim)
	f.create_dimension('time',ttime.shape[0])

	f.create_variable('time','d',('time',))
	f.variables['time'].units        = sstart_date 
	


	f.create_variable('latitude','d',('x','y'))
	f.variables['latitude'].long_name        = "latitude"
	f.variables['latitude'].units            = "degrees_north"
	f.variables['latitude'].standard_name    = "grid_latitude"
	f.variables['latitude'][:] = lat[:]

	f.create_variable('longitude','d',('x','y'))
	f.variables['longitude'].long_name        = "longitude"
	f.variables['longitude'].units            = "degrees_east"
	f.variables['longitude'].standard_name    = "grid_longitude"
	f.variables['longitude'][:] = lon[:]

	for parameter in parameters:
		
		f.create_variable(parameter,'d',('time','x','y'))

        	f.variables[parameter].long_name    = gatrib(parameter)[0]
        	f.variables[parameter].units        = gatrib(parameter)[1]
        	f.variables[parameter]._FillValue   = FillValue
		f.variables[parameter].missing_value = FillValue

		adatemp_final = numpy.zeros((len(fileList), xdim, ydim))
	        
		iterator = 0
		for fileName in fileList:
			
			adatemp = mitbin2(parameter+fileName[-16:], bswap=bswap)[0,0,:,:]
			ndim, xdim, ydim, zdim, datatype, nrecords, timeStepNumber = rmeta(fileName[:-4]+"meta")
			adatemp = numpy.where(adatemp[:] < -1.0e+20, FillValue, adatemp[:])
			adatemp = numpy.where(lsmask[:]==0, FillValue, adatemp[:])
			adatemp_final[iterator,:,:] = adatemp
			ttime[iterator] = timeStepNumber*deltaT
			iterator = iterator + 1
			
		f.variables[parameter][:] = adatemp_final
 	
	f.variables['time'][:] = ttime
	f.close()
	if dump == 'yes':
		return adatemp
Ejemplo n.º 15
0
    def open_new_file(self, file_name,
                      z_values=numpy.arange(10),
                      z_units='m',
                      var_names=['X'],
                      long_names=[None],
                      units_names=['None'],
                      dtypes=['float64'],
                      time_units='minutes',
                      comment=''):

        #----------------------------------------------------
        # Notes: It might be okay to have "nz" be an
        #        unlimited dimension, like "time".  This
        #        would mean replacing "int(profile_length)"
        #        with "None".
        #----------------------------------------------------
        
        #--------------------------------------------------
        # Try to import the Nio module from PyNIO package
        #--------------------------------------------------
        Nio = self.import_nio()
        if not(Nio): return False

        #----------------------------
        # Does file already exist ?
        #----------------------------
        file_name = file_utils.check_overwrite( file_name )
        self.file_name = file_name
        
        #---------------------------------------
        # Check and store the time series info
        #---------------------------------------
        self.format     = 'ncps'
        self.file_name  = file_name
        self.time_index = 0
        if (long_names[0] == None):
            long_names = var_names
        #-------------------------------------------            
        self.z_values  = z_values
        self.z_units   = z_units
        nz             = numpy.size(z_values)
        #-------------------------------------------
        # We may not need to save these in self.
        # I don't think they're used anywhere yet.
        #-------------------------------------------
        self.var_names   = var_names 
        self.long_names  = long_names
        self.units_names = units_names
        self.dtypes      = dtypes

        #---------------------------------------------
        # Create array of Nio type codes from dtypes
        #---------------------------------------------
        nio_type_map   = self.get_nio_type_map()
        nio_type_codes = []
        if (len(dtypes) == len(var_names)):
            for dtype in dtypes:
               nio_type_code = nio_type_map[ dtype.lower() ]
               nio_type_codes.append( nio_type_code )
        else:
            dtype = dtypes[0]
            nio_type_code = nio_type_map[ dtype.lower() ]
            for k in xrange(len(var_names)):
                nio_type_codes.append( nio_type_code )                
        self.nio_type_codes = nio_type_codes        
            
        #-------------------------------------
        # Open a new netCDF file for writing
        #-------------------------------------
        # Sample output from time.asctime():
        #     "Thu Oct  8 17:10:18 2009"
        #-------------------------------------
        opt = Nio.options()
        opt.PreFill = False            # (for efficiency)
        opt.HeaderReserveSpace = 4000  # (4000 bytes, for efficiency)
        history = "Created using PyNIO " + Nio.__version__ + " on "
        history = history + time.asctime() + ". " 
        history = history + comment

        try:
            ncps_unit = Nio.open_file(file_name, mode="w",
                                      options=opt, history=history )
            OK = True
        except:
            OK = False
            return OK
        
        #------------------------------------------------
        # Create an unlimited time dimension (via None)
        #------------------------------------------------
        # Without using "int()" here, we get this:
        #     TypeError: size must be None or integer
        #------------------------------------------------
        ncps_unit.create_dimension("nz", int(nz))
        ncps_unit.create_dimension("time", None)

        #-------------------------
        # Create a time variable
        #---------------------------------------------------
        #('f' = float32; must match in add_values_at_IDs()
        #---------------------------------------------------
        # NB! Can't use "time" vs. "tvar" here unless we
        #     add "import time" inside this function.
        #---------------------------------------------------
        tvar = ncps_unit.create_variable('time', 'd', ("time",))
        ncps_unit.variables['time'].units = time_units

        #--------------------------------------
        # Create a distance/depth variable, z
        #--------------------------------------
        zvar = ncps_unit.create_variable('z', 'd', ("nz",))
        zvar[ : ] = z_values  # (store the z-values)
        ncps_unit.variables['z'].units = z_units
        
        #-----------------------------------
        # Create variables using var_names
        #-----------------------------------
        # Returns "var" as a PyNIO object
        #---------------------------------------------------
        # NB! The 3rd argument here (dimension), must be a
        #     tuple.  If there is only one dimension, then
        #     we need to add a comma, as shown.
        #---------------------------------------------------
        for k in xrange(len(var_names)):
            var_name = var_names[k]
            var = ncps_unit.create_variable(var_name, nio_type_codes[k],
                                            ("time", "nz"))
        
            #------------------------------------
            # Create attributes of the variable
            #------------------------------------
            ncps_unit.variables[var_name].long_name = long_names[k]
            ncps_unit.variables[var_name].units     = units_names[k]        

            #----------------------------------
            # Specify a "nodata" fill value ?
            #----------------------------------
            var._FillValue = -9999.0    ## Does this jive with Prefill above ??
            
        self.ncps_unit = ncps_unit
        return OK
Ejemplo n.º 16
0
def main(argv):

    print('Running pyEnsSum!')

    # Get command line stuff and store in a dictionary
    s = 'tag= compset= esize= tslice= res= sumfile= indir= sumfiledir= mach= verbose jsonfile= mpi_enable maxnorm gmonly popens cumul regx= startMon= endMon= fIndex='
    optkeys = s.split()
    try:
        opts, args = getopt.getopt(argv, "h", optkeys)
    except getopt.GetoptError:
        pyEnsLib.EnsSum_usage()
        sys.exit(2)

    # Put command line options in a dictionary - also set defaults
    opts_dict={}

    # Defaults
    opts_dict['tag'] = ''
    opts_dict['compset'] = ''
    opts_dict['mach'] = ''
    opts_dict['esize'] = 151
    opts_dict['tslice'] = 0
    opts_dict['res'] = ''
    opts_dict['sumfile'] = 'ens.summary.nc'
    opts_dict['indir'] = './'
    opts_dict['sumfiledir'] = './'
    opts_dict['jsonfile'] = ''
    opts_dict['verbose'] = True
    opts_dict['mpi_enable'] = False
    opts_dict['maxnorm'] = False
    opts_dict['gmonly'] = False
    opts_dict['popens'] = False
    opts_dict['cumul'] = False
    opts_dict['regx'] = 'test'
    opts_dict['startMon'] = 1
    opts_dict['endMon'] = 1
    opts_dict['fIndex'] = 151

    # This creates the dictionary of input arguments
    opts_dict = pyEnsLib.getopt_parseconfig(opts,optkeys,'ES',opts_dict)

    verbose = opts_dict['verbose']

    st = opts_dict['esize']
    esize = int(st)

    if (verbose == True):
        print(opts_dict)
        print('Ensemble size for summary = ', esize)

    if not (opts_dict['tag'] and opts_dict['compset'] and opts_dict['mach'] or opts_dict['res']):
       print('Please specify --tag, --compset, --mach and --res options')
       sys.exit()

    # Now find file names in indir
    input_dir = opts_dict['indir']
    # The var list that will be excluded
    ex_varlist=[]

    # Create a mpi simplecomm object
    if opts_dict['mpi_enable']:
        me=simplecomm.create_comm()
    else:
        me=simplecomm.create_comm(not opts_dict['mpi_enable'])


    if me.get_rank() == 0:
    if opts_dict['jsonfile']:
        # Read in the excluded var list
        ex_varlist=pyEnsLib.read_jsonlist(opts_dict['jsonfile'],'ES')

    # Broadcast the excluded var list to each processor
    if opts_dict['mpi_enable']:
    ex_varlist=me.partition(ex_varlist,func=Duplicate(),involved=True)

    in_files=[]
    if(os.path.exists(input_dir)):
        # Get the list of files
        in_files_temp = os.listdir(input_dir)
        in_files=sorted(in_files_temp)
        #print in_files
        # Make sure we have enough
        num_files = len(in_files)
        if (verbose == True):
            print('Number of files in input directory = ', num_files)
        if (num_files < esize):
            print('Number of files in input directory (',num_files,
                ') is less than specified ensemble size of ', esize)
            sys.exit(2)
        if (num_files > esize):
            print('NOTE: Number of files in ', input_dir,
                'is greater than specified ensemble size of ', esize,
                '\nwill just use the first ',  esize, 'files')
    else:
        print('Input directory: ',input_dir,' not found')
        sys.exit(2)

    if opts_dict['cumul']:
        if opts_dict['regx']:
           in_files_list=get_cumul_filelist(opts_dict,opts_dict['indir'],opts_dict['regx'])
        in_files=me.partition(in_files_list,func=EqualLength(),involved=True)
        if me.get_rank()==0:
           print('in_files=',in_files)

    # Open the files in the input directory
    o_files=[]
    for onefile in in_files[0:esize]:
        if (os.path.isfile(input_dir+'/' + onefile)):
            o_files.append(Nio.open_file(input_dir+'/' + onefile,"r"))
        else:
            print("COULD NOT LOCATE FILE "+ input_dir + onefile + "! EXITING....")
            sys.exit()

    # Store dimensions of the input fields
    if (verbose == True):
        print("Getting spatial dimensions")
    nlev = -1
    ncol = -1
    nlat = -1
    nlon = -1
    lonkey=''
    latkey=''
    # Look at first file and get dims
    input_dims = o_files[0].dimensions
    ndims = len(input_dims)

    for key in input_dims:
        if key == "lev":
            nlev = input_dims["lev"]
        elif key == "ncol":
            ncol = input_dims["ncol"]
        elif (key == "nlon") or (key =="lon"):
            nlon = input_dims[key]
            lonkey=key
        elif (key == "nlat") or (key == "lat"):
            nlat = input_dims[key]
            latkey=key

    if (nlev == -1) :
        print("COULD NOT LOCATE valid dimension lev => EXITING....")
        sys.exit()

    if (( ncol == -1) and ((nlat == -1) or (nlon == -1))):
        print("Need either lat/lon or ncol  => EXITING....")
        sys.exit()

    # Check if this is SE or FV data
    if (ncol != -1):
        is_SE = True
    else:
        is_SE = False

    # Make sure all files have the same dimensions
    if (verbose == True):
        print("Checking dimensions across files....")
        print('lev = ', nlev)
        if (is_SE == True):
            print('ncol = ', ncol)
        else:
            print('nlat = ', nlat)
            print('nlon = ', nlon)

    for count, this_file in enumerate(o_files):
        input_dims = this_file.dimensions
        if (is_SE == True):
            if ( nlev != int(input_dims["lev"]) or ( ncol != int(input_dims["ncol"]))):
                print("Dimension mismatch between ", in_files[0], 'and', in_files[0], '!!!')
                sys.exit()
        else:
            if ( nlev != int(input_dims["lev"]) or ( nlat != int(input_dims[latkey]))\
                  or ( nlon != int(input_dims[lonkey]))):
                print("Dimension mismatch between ", in_files[0], 'and', in_files[0], '!!!')
                sys.exit()

    # Get 2d vars, 3d vars and all vars (For now include all variables)
    vars_dict = o_files[0].variables
    # Remove the excluded variables (specified in json file) from variable dictionary
    if ex_varlist:
    for i in ex_varlist:
            if i in vars_dict:
           del vars_dict[i]
    num_vars = len(vars_dict)
    if (verbose == True):
        print('Number of variables (including metadata) found =  ', num_vars)
    str_size = 0

    d2_var_names = []
    d3_var_names = []
    num_2d = 0
    num_3d = 0

    # Which are 2d, which are 3d and max str_size
    for k,v in vars_dict.iteritems():
        var = k
        vd = v.dimensions # all the variable's dimensions (names)
        vr = v.rank # num dimension
        vs = v.shape # dim values
        is_2d = False
        is_3d = False
        if (is_SE == True): # (time, lev, ncol) or (time, ncol)
        if ((vr == 2) and (vs[1] == ncol)):
        is_2d = True
        num_2d += 1
        elif ((vr == 3) and (vs[2] == ncol and vs[1] == nlev )):
        is_3d = True
        num_3d += 1
        else: # (time, lev, nlon, nlon) or (time, nlat, nlon)
            if ((vr == 3) and (vs[1] == nlat and vs[2] == nlon)):
                is_2d = True
                num_2d += 1
            elif ((vr == 4) and (vs[2] == nlat and vs[3] == nlon and vs[1] == nlev )):
                is_3d = True
                num_3d += 1
        if (is_3d == True) :
            str_size = max(str_size, len(k))
            d3_var_names.append(k)
        elif  (is_2d == True):
            str_size = max(str_size, len(k))
            d2_var_names.append(k)


    # Now sort these and combine (this sorts caps first, then lower case -
    # which is what we want)
    d2_var_names.sort()
    d3_var_names.sort()


    # All vars is 3d vars first (sorted), the 2d vars
    all_var_names = list(d3_var_names)
    all_var_names += d2_var_names
    n_all_var_names = len(all_var_names)

    if (verbose == True):
        print('num vars = ', n_all_var_names, '(3d = ', num_3d, ' and 2d = ', num_2d, ")")

    # Create new summary ensemble file
    this_sumfile = opts_dict["sumfile"]

    if (verbose == True):
        print("Creating ", this_sumfile, "  ...")
    if(me.get_rank() ==0 | opts_dict["popens"]):
    if os.path.exists(this_sumfile):
        os.unlink(this_sumfile)

    opt = Nio.options()
    opt.PreFill = False
    opt.Format = 'NetCDF4Classic'
    nc_sumfile = Nio.open_file(this_sumfile, 'w', options=opt)

    # Set dimensions
    if (verbose == True):
        print("Setting dimensions .....")
    if (is_SE == True):
        nc_sumfile.create_dimension('ncol', ncol)
    else:
        nc_sumfile.create_dimension('nlat', nlat)
        nc_sumfile.create_dimension('nlon', nlon)
    nc_sumfile.create_dimension('nlev', nlev)
    nc_sumfile.create_dimension('ens_size', esize)
    nc_sumfile.create_dimension('nvars', num_3d + num_2d)
    nc_sumfile.create_dimension('nvars3d', num_3d)
    nc_sumfile.create_dimension('nvars2d', num_2d)
    nc_sumfile.create_dimension('str_size', str_size)

    # Set global attributes
    now = time.strftime("%c")
    if (verbose == True):
        print("Setting global attributes .....")
    setattr(nc_sumfile, 'creation_date',now)
    setattr(nc_sumfile, 'title', 'CAM verification ensemble summary file')
    setattr(nc_sumfile, 'tag', opts_dict["tag"])
    setattr(nc_sumfile, 'compset', opts_dict["compset"])
    setattr(nc_sumfile, 'resolution', opts_dict["res"])
    setattr(nc_sumfile, 'machine', opts_dict["mach"])

    # Create variables
    if (verbose == True):
        print("Creating variables .....")
    v_lev = nc_sumfile.create_variable("lev", 'f', ('nlev',))
    v_vars = nc_sumfile.create_variable("vars", 'S1', ('nvars', 'str_size'))
    v_var3d = nc_sumfile.create_variable("var3d", 'S1', ('nvars3d', 'str_size'))
    v_var2d = nc_sumfile.create_variable("var2d", 'S1', ('nvars2d', 'str_size'))
        if not opts_dict['gmonly']:
        if (is_SE == True):
        v_ens_avg3d = nc_sumfile.create_variable("ens_avg3d", 'f', ('nvars3d', 'nlev', 'ncol'))
        v_ens_stddev3d = nc_sumfile.create_variable("ens_stddev3d", 'f', ('nvars3d', 'nlev', 'ncol'))
        v_ens_avg2d = nc_sumfile.create_variable("ens_avg2d", 'f', ('nvars2d', 'ncol'))
        v_ens_stddev2d = nc_sumfile.create_variable("ens_stddev2d", 'f', ('nvars2d', 'ncol'))
        else:
        v_ens_avg3d = nc_sumfile.create_variable("ens_avg3d", 'f', ('nvars3d', 'nlev', 'nlat', 'nlon'))
        v_ens_stddev3d = nc_sumfile.create_variable("ens_stddev3d", 'f', ('nvars3d', 'nlev', 'nlat', 'nlon'))
        v_ens_avg2d = nc_sumfile.create_variable("ens_avg2d", 'f', ('nvars2d', 'nlat', 'nlon'))
        v_ens_stddev2d = nc_sumfile.create_variable("ens_stddev2d", 'f', ('nvars2d', 'nlat', 'nlon'))

        v_RMSZ = nc_sumfile.create_variable("RMSZ", 'f', ('nvars', 'ens_size'))
    v_gm = nc_sumfile.create_variable("global_mean", 'f', ('nvars', 'ens_size'))
    v_loadings_gm = nc_sumfile.create_variable('loadings_gm','f',('nvars','nvars'))
    v_mu_gm = nc_sumfile.create_variable('mu_gm','f',('nvars',))
    v_sigma_gm = nc_sumfile.create_variable('sigma_gm','f',('nvars',))
    v_sigma_scores_gm = nc_sumfile.create_variable('sigma_scores_gm','f',('nvars',))


    # Assign vars, var3d and var2d
    if (verbose == True):
        print("Assigning vars, var3d, and var2d .....")

    eq_all_var_names =[]
    eq_d3_var_names = []
    eq_d2_var_names = []

    l_eq = len(all_var_names)
    for i in range(l_eq):
        tt = list(all_var_names[i])
        l_tt = len(tt)
        if (l_tt < str_size):
        extra = list(' ')*(str_size - l_tt)
        tt.extend(extra)
        eq_all_var_names.append(tt)

    l_eq = len(d3_var_names)
    for i in range(l_eq):
        tt = list(d3_var_names[i])
        l_tt = len(tt)
        if (l_tt < str_size):
        extra = list(' ')*(str_size - l_tt)
        tt.extend(extra)
        eq_d3_var_names.append(tt)

    l_eq = len(d2_var_names)
    for i in range(l_eq):
        tt = list(d2_var_names[i])
        l_tt = len(tt)
        if (l_tt < str_size):
        extra = list(' ')*(str_size - l_tt)
        tt.extend(extra)
        eq_d2_var_names.append(tt)

    v_vars[:] = eq_all_var_names[:]
    v_var3d[:] = eq_d3_var_names[:]
    v_var2d[:] = eq_d2_var_names[:]

    # Time-invarient metadata
    if (verbose == True):
        print("Assigning time invariant metadata .....")
    lev_data = vars_dict["lev"]
    v_lev = lev_data

    # Form ensembles, each missing one member; compute RMSZs and global means
    #for each variable, we also do max norm also (currently done in pyStats)
    tslice = opts_dict['tslice']

    if not opts_dict['cumul']:
        # Partition the var list
        var3_list_loc=me.partition(d3_var_names,func=EqualStride(),involved=True)
        var2_list_loc=me.partition(d2_var_names,func=EqualStride(),involved=True)
    else:
        var3_list_loc=d3_var_names
        var2_list_loc=d2_var_names

    # Calculate global means #
    if (verbose == True):
        print("Calculating global means .....")
    if not opts_dict['cumul']:
        gm3d,gm2d = pyEnsLib.generate_global_mean_for_summary(o_files,var3_list_loc,var2_list_loc , is_SE, False,opts_dict)
    if (verbose == True):
        print("Finish calculating global means .....")

    # Calculate RMSZ scores
    if (verbose == True):
        print("Calculating RMSZ scores .....")
    if (not opts_dict['gmonly']) | (opts_dict['cumul']):
        zscore3d,zscore2d,ens_avg3d,ens_stddev3d,ens_avg2d,ens_stddev2d,temp1,temp2=pyEnsLib.calc_rmsz(o_files,var3_list_loc,var2_list_loc,is_SE,opts_dict)

    # Calculate max norm ensemble
    if opts_dict['maxnorm']:
    if (verbose == True):
        print("Calculating max norm of ensembles .....")
    pyEnsLib.calculate_maxnormens(opts_dict,var3_list_loc)
    pyEnsLib.calculate_maxnormens(opts_dict,var2_list_loc)

    if opts_dict['mpi_enable'] & ( not opts_dict['popens']):

        if not opts_dict['cumul']:
        # Gather the 3d variable results from all processors to the master processor
        slice_index=get_stride_list(len(d3_var_names),me)

        # Gather global means 3d results
        gm3d=gather_npArray(gm3d,me,slice_index,(len(d3_var_names),len(o_files)))

        if not opts_dict['gmonly']:
        # Gather zscore3d results
        zscore3d=gather_npArray(zscore3d,me,slice_index,(len(d3_var_names),len(o_files)))

        # Gather ens_avg3d and ens_stddev3d results
        shape_tuple3d=get_shape(ens_avg3d.shape,len(d3_var_names),me.get_rank())
        ens_avg3d=gather_npArray(ens_avg3d,me,slice_index,shape_tuple3d)
        ens_stddev3d=gather_npArray(ens_stddev3d,me,slice_index,shape_tuple3d)

        # Gather 2d variable results from all processors to the master processor
        slice_index=get_stride_list(len(d2_var_names),me)

        # Gather global means 2d results
        gm2d=gather_npArray(gm2d,me,slice_index,(len(d2_var_names),len(o_files)))

        if not opts_dict['gmonly']:
        # Gather zscore2d results
        zscore2d=gather_npArray(zscore2d,me,slice_index,(len(d2_var_names),len(o_files)))

        # Gather ens_avg3d and ens_stddev2d results
        shape_tuple2d=get_shape(ens_avg2d.shape,len(d2_var_names),me.get_rank())
        ens_avg2d=gather_npArray(ens_avg2d,me,slice_index,shape_tuple2d)
        ens_stddev2d=gather_npArray(ens_stddev2d,me,slice_index,shape_tuple2d)

        else:
        gmall=np.concatenate((temp1,temp2),axis=0)
            gmall=pyEnsLib.gather_npArray_pop(gmall,me,(me.get_size(),len(d3_var_names)+len(d2_var_names)))
    # Assign to file:
    if me.get_rank() == 0 | opts_dict['popens'] :
        if not opts_dict['cumul']:
        gmall=np.concatenate((gm3d,gm2d),axis=0)
        if not opts_dict['gmonly']:
        Zscoreall=np.concatenate((zscore3d,zscore2d),axis=0)
        v_RMSZ[:,:]=Zscoreall[:,:]
        if not opts_dict['gmonly']:
        if (is_SE == True):
            v_ens_avg3d[:,:,:]=ens_avg3d[:,:,:]
            v_ens_stddev3d[:,:,:]=ens_stddev3d[:,:,:]
            v_ens_avg2d[:,:]=ens_avg2d[:,:]
            v_ens_stddev2d[:,:]=ens_stddev2d[:,:]
        else:
            v_ens_avg3d[:,:,:,:]=ens_avg3d[:,:,:,:]
            v_ens_stddev3d[:,:,:,:]=ens_stddev3d[:,:,:,:]
            v_ens_avg2d[:,:,:]=ens_avg2d[:,:,:]
            v_ens_stddev2d[:,:,:]=ens_stddev2d[:,:,:]
        else:
            gmall_temp=np.transpose(gmall[:,:])
            gmall=gmall_temp
    mu_gm,sigma_gm,standardized_global_mean,loadings_gm,scores_gm=pyEnsLib.pre_PCA(gmall)
    v_gm[:,:]=gmall[:,:]
    v_mu_gm[:]=mu_gm[:]
    v_sigma_gm[:]=sigma_gm[:].astype(np.float32)
    v_loadings_gm[:,:]=loadings_gm[:,:]
    v_sigma_scores_gm[:]=scores_gm[:]

    print("All Done")

def get_cumul_filelist(opts_dict,indir,regx):
   if not opts_dict['indir']:
      print('input dir is not specified')
      sys.exit(2)
   #regx='(pgi(.)*-(01|02))'
   regx_list=["mon","gnu","pgi"]
   all_files=[]
   for prefix in regx_list:
       for i in range(opts_dict['fIndex'],opts_dict['fIndex']+opts_dict['esize']/3):
       for j in range(opts_dict['startMon'],opts_dict['endMon']+1):
           mon_str=str(j).zfill(2)
           regx='(^'+prefix+'(.)*'+str(i)+'(.)*-('+mon_str+'))'
           print('regx=',regx)
           res=[f for f in os.listdir(indir) if re.search(regx,f)]
           in_files=sorted(res)
           all_files.extend(in_files)
   print("all_files=",all_files)
   #in_files=res
   return all_files





#
# Get the shape of all variable list in tuple for all processor
#
def get_shape(shape_tuple,shape1,rank):
    lst=list(shape_tuple)
    lst[0]=shape1
    shape_tuple=tuple(lst)
    return shape_tuple

#
# Get the mpi partition list for each processor
#
def get_stride_list(len_of_list,me):
    slice_index=[]
    for i in range(me.get_size()):
    index_arr=np.arange(len_of_list)
    slice_index.append(index_arr[i::me.get_size()])
    return slice_index

#
# Gather arrays from each processor by the var_list to the master processor and make it an array
#
def gather_npArray(npArray,me,slice_index,array_shape):
    the_array=np.zeros(array_shape,dtype=np.float32)
    if me.get_rank()==0:
    k=0
    for j in slice_index[me.get_rank()]:
         the_array[j,:]=npArray[k,:]
         k=k+1
    for i in range(1,me.get_size()):
    if me.get_rank() == 0:
        rank,npArray=me.collect()
        k=0
        for j in slice_index[rank]:
        the_array[j,:]=npArray[k,:]
        k=k+1
    if me.get_rank() != 0:
    message={"from_rank":me.get_rank(),"shape":npArray.shape}
    me.collect(npArray)
    me.sync()
    return the_array

if __name__ == "__main__":
    main(sys.argv[1:])
Ejemplo n.º 17
0
def main(argv):
    print 'Running pyEnsSumPop!'

    # Get command line stuff and store in a dictionary
    s = 'nyear= nmonth= npert= tag= res= mach= compset= sumfile= indir= tslice= verbose jsonfile= mpi_enable zscoreonly nrand= rand seq= jsondir='
    optkeys = s.split()
    try:
        opts, args = getopt.getopt(argv, "h", optkeys)
    except getopt.GetoptError:
        pyEnsLib.EnsSumPop_usage()
        sys.exit(2)

    # Put command line options in a dictionary - also set defaults
    opts_dict = {}

    # Defaults
    opts_dict['tag'] = 'cesm1_2_0'
    opts_dict['compset'] = 'FC5'
    opts_dict['mach'] = 'yellowstone'
    opts_dict['tslice'] = 0
    opts_dict['nyear'] = 3
    opts_dict['nmonth'] = 12
    opts_dict['npert'] = 40
    opts_dict['nbin'] = 40
    opts_dict['minrange'] = 0.0
    opts_dict['maxrange'] = 4.0
    opts_dict['res'] = 'ne30_ne30'
    opts_dict['sumfile'] = 'ens.pop.summary.nc'
    opts_dict['indir'] = './'
    opts_dict['jsonfile'] = ''
    opts_dict['verbose'] = True
    opts_dict['mpi_enable'] = False
    opts_dict['zscoreonly'] = False
    opts_dict['popens'] = True
    opts_dict['nrand'] = 40
    opts_dict['rand'] = False
    opts_dict['seq'] = 0
    opts_dict['jsondir'] = '/glade/scratch/haiyingx/'

    # This creates the dictionary of input arguments
    print "before parseconfig"
    opts_dict = pyEnsLib.getopt_parseconfig(opts, optkeys, 'ESP', opts_dict)

    verbose = opts_dict['verbose']
    nbin = opts_dict['nbin']

    if verbose:
        print opts_dict

    # Now find file names in indir
    input_dir = opts_dict['indir']

    # Create a mpi simplecomm object
    if opts_dict['mpi_enable']:
        me = simplecomm.create_comm()
    else:
        me = simplecomm.create_comm(not opts_dict['mpi_enable'])
    if opts_dict['jsonfile']:
        # Read in the included var list
        Var2d, Var3d = pyEnsLib.read_jsonlist(opts_dict['jsonfile'], 'ESP')
        str_size = 0
        for str in Var3d:
            if str_size < len(str):
                str_size = len(str)
        for str in Var2d:
            if str_size < len(str):
                str_size = len(str)

    in_files = []
    if (os.path.exists(input_dir)):
        # Pick up the 'nrand' random number of input files to generate summary files
        if opts_dict['rand']:
            in_files = pyEnsLib.Random_pickup_pop(input_dir, opts_dict,
                                                  opts_dict['nrand'])
        else:
            # Get the list of files
            in_files_temp = os.listdir(input_dir)
            in_files = sorted(in_files_temp)
        # Make sure we have enough
        num_files = len(in_files)
    else:
        print 'Input directory: ', input_dir, ' not found'
        sys.exit(2)

    # Create a mpi simplecomm object
    if opts_dict['mpi_enable']:
        me = simplecomm.create_comm()
    else:
        me = simplecomm.create_comm(not opts_dict['mpi_enable'])
    #Partition the input file list
    in_file_list = me.partition(in_files, func=EqualStride(), involved=True)

    # Open the files in the input directory
    o_files = []
    for onefile in in_file_list:
        if (os.path.isfile(input_dir + '/' + onefile)):
            o_files.append(Nio.open_file(input_dir + '/' + onefile, "r"))
        else:
            print "COULD NOT LOCATE FILE " + input_dir + onefile + "! EXITING...."
            sys.exit()

    print in_file_list

    # Store dimensions of the input fields
    if (verbose == True):
        print "Getting spatial dimensions"
    nlev = -1
    nlat = -1
    nlon = -1

    # Look at first file and get dims
    input_dims = o_files[0].dimensions
    ndims = len(input_dims)

    # Make sure all files have the same dimensions
    for key in input_dims:
        if key == "z_t":
            nlev = input_dims["z_t"]
        elif key == "nlon":
            nlon = input_dims["nlon"]
        elif key == "nlat":
            nlat = input_dims["nlat"]

    for count, this_file in enumerate(o_files):
        input_dims = this_file.dimensions
        if ( nlev != int(input_dims["z_t"]) or ( nlat != int(input_dims["nlat"]))\
              or ( nlon != int(input_dims["nlon"]))):
            print "Dimension mismatch between ", in_file_list[
                0], 'and', in_file_list[count], '!!!'
            sys.exit()

    # Create new summary ensemble file
    this_sumfile = opts_dict["sumfile"]

    if verbose:
        print "Creating ", this_sumfile, "  ..."
    if (me.get_rank() == 0):
        if os.path.exists(this_sumfile):
            os.unlink(this_sumfile)
        opt = Nio.options()
        opt.PreFill = False
        opt.Format = 'NetCDF4Classic'

        nc_sumfile = Nio.open_file(this_sumfile, 'w', options=opt)

        # Set dimensions
        if (verbose == True):
            print "Setting dimensions ....."
        nc_sumfile.create_dimension('nlat', nlat)
        nc_sumfile.create_dimension('nlon', nlon)
        nc_sumfile.create_dimension('nlev', nlev)
        nc_sumfile.create_dimension('time', None)
        nc_sumfile.create_dimension('ens_size', opts_dict['npert'])
        nc_sumfile.create_dimension('nbin', opts_dict['nbin'])
        nc_sumfile.create_dimension('nvars', len(Var3d) + len(Var2d))
        nc_sumfile.create_dimension('nvars3d', len(Var3d))
        nc_sumfile.create_dimension('nvars2d', len(Var2d))
        nc_sumfile.create_dimension('str_size', str_size)

        # Set global attributes
        now = time.strftime("%c")
        if (verbose == True):
            print "Setting global attributes ....."
        setattr(nc_sumfile, 'creation_date', now)
        setattr(nc_sumfile, 'title', 'POP verification ensemble summary file')
        setattr(nc_sumfile, 'tag', opts_dict["tag"])
        setattr(nc_sumfile, 'compset', opts_dict["compset"])
        setattr(nc_sumfile, 'resolution', opts_dict["res"])
        setattr(nc_sumfile, 'machine', opts_dict["mach"])

        # Create variables
        if (verbose == True):
            print "Creating variables ....."
        v_lev = nc_sumfile.create_variable("lev", 'f', ('nlev', ))
        v_vars = nc_sumfile.create_variable("vars", 'S1',
                                            ('nvars', 'str_size'))
        v_var3d = nc_sumfile.create_variable("var3d", 'S1',
                                             ('nvars3d', 'str_size'))
        v_var2d = nc_sumfile.create_variable("var2d", 'S1',
                                             ('nvars2d', 'str_size'))
        v_time = nc_sumfile.create_variable("time", 'd', ('time', ))
        v_ens_avg3d = nc_sumfile.create_variable(
            "ens_avg3d", 'f', ('time', 'nvars3d', 'nlev', 'nlat', 'nlon'))
        v_ens_stddev3d = nc_sumfile.create_variable(
            "ens_stddev3d", 'f', ('time', 'nvars3d', 'nlev', 'nlat', 'nlon'))
        v_ens_avg2d = nc_sumfile.create_variable(
            "ens_avg2d", 'f', ('time', 'nvars2d', 'nlat', 'nlon'))
        v_ens_stddev2d = nc_sumfile.create_variable(
            "ens_stddev2d", 'f', ('time', 'nvars2d', 'nlat', 'nlon'))

        v_RMSZ = nc_sumfile.create_variable(
            "RMSZ", 'f', ('time', 'nvars', 'ens_size', 'nbin'))
        if not opts_dict['zscoreonly']:
            v_gm = nc_sumfile.create_variable("global_mean", 'f',
                                              ('time', 'nvars', 'ens_size'))

        # Assign vars, var3d and var2d
        if (verbose == True):
            print "Assigning vars, var3d, and var2d ....."

        eq_all_var_names = []
        eq_d3_var_names = []
        eq_d2_var_names = []
        all_var_names = list(Var3d)
        all_var_names += Var2d
        l_eq = len(all_var_names)
        for i in range(l_eq):
            tt = list(all_var_names[i])
            l_tt = len(tt)
            if (l_tt < str_size):
                extra = list(' ') * (str_size - l_tt)
                tt.extend(extra)
            eq_all_var_names.append(tt)

        l_eq = len(Var3d)
        for i in range(l_eq):
            tt = list(Var3d[i])
            l_tt = len(tt)
            if (l_tt < str_size):
                extra = list(' ') * (str_size - l_tt)
                tt.extend(extra)
            eq_d3_var_names.append(tt)

        l_eq = len(Var2d)
        for i in range(l_eq):
            tt = list(Var2d[i])
            l_tt = len(tt)
            if (l_tt < str_size):
                extra = list(' ') * (str_size - l_tt)
                tt.extend(extra)
            eq_d2_var_names.append(tt)

        v_vars[:] = eq_all_var_names[:]
        v_var3d[:] = eq_d3_var_names[:]
        v_var2d[:] = eq_d2_var_names[:]

        # Time-invarient metadata
        if (verbose == True):
            print "Assigning time invariant metadata ....."
        vars_dict = o_files[0].variables
        lev_data = vars_dict["z_t"]
        v_lev = lev_data

    # Time-varient metadata
    if verbose:
        print "Assigning time variant metadata ....."
    vars_dict = o_files[0].variables
    time_value = vars_dict['time']
    time_array = np.array([time_value])
    time_array = pyEnsLib.gather_npArray_pop(time_array, me, (me.get_size(), ))
    if me.get_rank() == 0:
        v_time[:] = time_array[:]

    # Calculate global mean, average, standard deviation
    if verbose:
        print "Calculating global means ....."
    is_SE = False
    tslice = 0
    if not opts_dict['zscoreonly']:
        gm3d, gm2d = pyEnsLib.generate_global_mean_for_summary(
            o_files, Var3d, Var2d, is_SE, False, opts_dict)
    if verbose:
        print "Finish calculating global means ....."

    # Calculate RMSZ scores
    if (verbose == True):
        print "Calculating RMSZ scores ....."
    zscore3d, zscore2d, ens_avg3d, ens_stddev3d, ens_avg2d, ens_stddev2d, temp1, temp2 = pyEnsLib.calc_rmsz(
        o_files, Var3d, Var2d, is_SE, opts_dict)

    # Collect from all processors
    if opts_dict['mpi_enable']:
        # Gather the 3d variable results from all processors to the master processor
        # Gather global means 3d results
        if not opts_dict['zscoreonly']:
            gmall = np.concatenate((gm3d, gm2d), axis=0)
            #print "before gather, gmall.shape=",gmall.shape
            gmall = pyEnsLib.gather_npArray_pop(
                gmall, me,
                (me.get_size(), len(Var3d) + len(Var2d), len(o_files)))
        zmall = np.concatenate((zscore3d, zscore2d), axis=0)
        zmall = pyEnsLib.gather_npArray_pop(
            zmall, me,
            (me.get_size(), len(Var3d) + len(Var2d), len(o_files), nbin))
        #print 'zmall=',zmall

        #print "after gather, gmall.shape=",gmall.shape
        ens_avg3d = pyEnsLib.gather_npArray_pop(
            ens_avg3d, me, (me.get_size(), len(Var3d), nlev, (nlat), nlon))
        ens_avg2d = pyEnsLib.gather_npArray_pop(ens_avg2d, me,
                                                (me.get_size(), len(Var2d),
                                                 (nlat), nlon))
        ens_stddev3d = pyEnsLib.gather_npArray_pop(
            ens_stddev3d, me, (me.get_size(), len(Var3d), nlev, (nlat), nlon))
        ens_stddev2d = pyEnsLib.gather_npArray_pop(ens_stddev2d, me,
                                                   (me.get_size(), len(Var2d),
                                                    (nlat), nlon))

    # Assign to file:
    if me.get_rank() == 0:
        #Zscoreall=np.concatenate((zscore3d,zscore2d),axis=0)
        v_RMSZ[:, :, :, :] = zmall[:, :, :, :]
        v_ens_avg3d[:, :, :, :, :] = ens_avg3d[:, :, :, :, :]
        v_ens_stddev3d[:, :, :, :, :] = ens_stddev3d[:, :, :, :, :]
        v_ens_avg2d[:, :, :, :] = ens_avg2d[:, :, :, :]
        v_ens_stddev2d[:, :, :, :] = ens_stddev2d[:, :, :, :]
        if not opts_dict['zscoreonly']:
            v_gm[:, :, :] = gmall[:, :, :]
        print "All done"
Ejemplo n.º 18
0
def var_nc3d(parameters=['Ttave'],
             ofile='MIT_output_3d',
             bswap=1,
             sstart_date="seconds since 2002-10-01 07:00",
             deltaT=1200,
             FillValue=-1.0e+23,
             dump="no"):
    '''
	Convert 3d fields produced by MITgcm to netCDF format with use of Nio module. 
	Names of the files should be defined in form of the list, even if we have only one variable.

	I put everything on the C grid!
	
	You have to have following files in the the directory where you run your code:

	XC.data
	XC.meta
	YC.data
	YC.meta
	DRC.data
	DRC.meta
	maskCtrlC.data
	maskCtrlC.meta
	
	Input:
	    parameters		- list with names of the variables.
	    ofile 		- name of the output file.
	    iteration		- iteration of optimisation, should be STRING!
	    bswap       	- do we need a byte swap? Yes (1) or no (0) [default 1]
	    sstart_date		- should be "seconds since", [default "seconds since 2002-10-01 07:00"
	    deltaT		- time step in seconds
	    xx_period		- xx_*period
	    FillValue		- missing value
	    meta		- flag to fix problem with wrong adxx*.meta files. 
				  If meta = 'xx', use .meta file from xx files 
	    dump 		- if dump='yes' will return numpy array with data
	'''
    lon = mitbin2('XC.data', bswap)[0, 0, :, :]
    lat = mitbin2('YC.data', bswap)[0, 0, :, :]
    lev = mitbin2('DRC.data', bswap)[0, :, 0, 0]
    lev = numpy.cumsum(lev)
    lsmask = mitbin2('maskCtrlC.data', bswap)[:, :, :, :]

    fileList = glob.glob(parameters[0] + "*.data")

    if os.path.exists(ofile + ".nc") == True:
        os.system("rm " + ofile + ".nc")

    ndim, xdim, ydim, zdim, datatype, nrecords, timeStepNumber = rmeta(
        fileList[0][:-4] + "meta")

    ttime = numpy.zeros((len(fileList)))

    opt = Nio.options()
    opt.PreFill = False
    opt.HeaderReserveSpace = 4000
    f = Nio.open_file(ofile + ".nc", "w", opt)

    f.title = "MITgcm variables in netCDF format"
    f.create_dimension('x', xdim)
    f.create_dimension('y', ydim)
    f.create_dimension('z', zdim)
    f.create_dimension('time', ttime.shape[0])

    f.create_variable('time', 'd', ('time', ))
    f.variables['time'].units = sstart_date

    f.create_variable('z', 'd', ('z', ))
    f.variables['z'].units = "meters"
    f.variables['z'][:] = lev[:]

    f.create_variable('latitude', 'd', ('x', 'y'))
    f.variables['latitude'].long_name = "latitude"
    f.variables['latitude'].units = "degrees_north"
    f.variables['latitude'].standard_name = "grid_latitude"
    f.variables['latitude'][:] = lat[:]

    f.create_variable('longitude', 'd', ('x', 'y'))
    f.variables['longitude'].long_name = "longitude"
    f.variables['longitude'].units = "degrees_east"
    f.variables['longitude'].standard_name = "grid_longitude"
    f.variables['longitude'][:] = lon[:]

    #vvariables = ["atemp","aqh", "uwind", "vwind", ]
    #vvariables = ["atemp"]

    for parameter in parameters:
        f.create_variable(parameter, 'd', ('time', 'z', 'x', 'y'))

        f.variables[parameter].long_name = gatrib(parameter)[0]
        f.variables[parameter].units = gatrib(parameter)[1]
        f.variables[parameter]._FillValue = FillValue
        f.variables[parameter].missing_value = FillValue

        adatemp_final = numpy.zeros((len(fileList), zdim, xdim, ydim))

        for ind, fileName in enumerate(fileList):
            adatemp = mitbin2(parameter + fileName[-16:],
                              bswap=bswap)[:, :, :, :]
            ndim, xdim, ydim, zdim, datatype, nrecords, timeStepNumber = rmeta(
                fileName[:-4] + "meta")

            #	adatemp = numpy.where(adatemp[:] > 1.0e+12, 0, adatemp[:])
            adatemp = numpy.where(adatemp[:] < -1.0e+20, FillValue, adatemp[:])
            adatemp = numpy.where(lsmask[:] == 0, FillValue, adatemp[:])
            adatemp_final[ind, :, :, :] = adatemp
            ttime[ind] = timeStepNumber * deltaT

        f.variables[parameter][:] = adatemp

    f.close()
    if dump == 'yes':
        return adatemp
Ejemplo n.º 19
0
import numpy as np
import Nio

fn = "MSG3-SEVI-MSG15-0100-NA-20130521001244.164000000Z-1074164.h5"
opt = Nio.options()
opt.FileStructure = "advanced"
f = Nio.open_file(fn, "r", options=opt)
# f = Nio.open_file(fn)
print f.variables.keys()

# print f.groups
# n = 0
# for key in f.groups.keys():
#    n += 1
#    print "groun %d: <%s>" %(n, key)

# g = f.groups['/U_MARF/MSG/Level1_5/DATA/Channel_07']
g = f.groups["U-MARF/MSG/Level1.5/DATA/Channel 07"]
print g

palette = g.variables["Palette"]
print palette

print "\nLineSideInfo_DESCR:"
lsid = g.variables["LineSideInfo_DESCR"][:]
print lsid[:]
dims = lsid.shape
for n in xrange(dims[0]):
    name = str(lsid[:][n][0])
    value = str(lsid[:][n][1])
Ejemplo n.º 20
0
def create_ave_file(my_file,
                    outfile,
                    hist_string,
                    ncformat,
                    years,
                    collapse_dim=''):
    '''
    Opens up/Creates a new file to put the computed averages into.

    @param my_file       A sampled input file pointer.

    @param outfile       Filename of the new output/average file.

    @param hist_string   A string that contains the file history for the history attribute

    @param ncformat      Format to write the NetCDF file out as.

    @param collapse_dim  Dimension to collapse across 

    @return new_file     Returns a file pointer to the newly opened file.
    '''
    dims = my_file.dimensions
    attr = my_file.attributes
    vars = {}

    new_file_name = outfile
    # Set pyNIO netcdf file options
    opt = Nio.options()
    # The netcdf output format
    if (ncformat == 'netcdf4c'):
        opt.Format = 'NetCDF4Classic'
        opt.CompressionLevel = 1
    elif (ncformat == 'netcdf4'):
        opt.Format = 'NetCDF4Classic'
    elif (ncformat == 'netcdf'):
        opt.Format = 'Classic'
    elif (ncformat == 'netcdfLarge'):
        opt.Format = '64BitOffset'
    else:
        print "WARNING: Seltected netcdf file format (", ncformat, ") is not recongnized."
        print "Defaulting to netcdf3Classic format."
        opt.Format = 'Classic'
    opt.PreFill = False
    new_file = Nio.open_file(new_file_name,
                             "w",
                             options=opt,
                             history=hist_string)
    #setattr(new_file,'yrs_averaged',years)

    # Create attributes, dimensions, and variables
    for n, v in attr.items():
        if n == 'history':
            v = hist_string + '\n' + v
        setattr(new_file, n, v)
    for var_d, l in dims.items():
        if var_d == "time":
            if "time" not in collapse_dim:
                new_file.create_dimension(var_d, None)
        else:
            if var_d not in collapse_dim:
                new_file.create_dimension(var_d, l)
    setattr(new_file, 'yrs_averaged', years)
    return new_file
Ejemplo n.º 21
0
 def setUp(self):
     do_setup(filename)
     opt = Nio.options()
     opt.MaskedArrayMode = 'MaskedExplicit'
     self.f = Nio.open_file(filename, options=opt)
Ejemplo n.º 22
0
def main(argv):


    # Get command line stuff and store in a dictionary
    s = 'tag= compset= esize= tslice= res= sumfile= indir= sumfiledir= mach= verbose jsonfile= mpi_enable maxnorm gmonly popens cumul regx= startMon= endMon= fIndex='
    optkeys = s.split()
    try: 
        opts, args = getopt.getopt(argv, "h", optkeys)
    except getopt.GetoptError:
        pyEnsLib.EnsSum_usage()
        sys.exit(2)

    # Put command line options in a dictionary - also set defaults
    opts_dict={}
    
    # Defaults
    opts_dict['tag'] = 'cesm2_0_beta08'
    opts_dict['compset'] = 'F2000'
    opts_dict['mach'] = 'cheyenne'
    opts_dict['esize'] = 350
    opts_dict['tslice'] = 1
    opts_dict['res'] = 'f19_f19'
    opts_dict['sumfile'] = 'ens.summary.nc'
    opts_dict['indir'] = './'
    opts_dict['sumfiledir'] = './'
    opts_dict['jsonfile'] = 'exclude_empty.json'
    opts_dict['verbose'] = False
    opts_dict['mpi_enable'] = False
    opts_dict['maxnorm'] = False
    opts_dict['gmonly'] = True
    opts_dict['popens'] = False
    opts_dict['cumul'] = False
    opts_dict['regx'] = 'test'
    opts_dict['startMon'] = 1
    opts_dict['endMon'] = 1
    opts_dict['fIndex'] = 151

    # This creates the dictionary of input arguments 
    opts_dict = pyEnsLib.getopt_parseconfig(opts,optkeys,'ES',opts_dict)

    verbose = opts_dict['verbose']

    st = opts_dict['esize']
    esize = int(st)


    if not (opts_dict['tag'] and opts_dict['compset'] and opts_dict['mach'] or opts_dict['res']):
       print 'Please specify --tag, --compset, --mach and --res options'
       sys.exit()
       
    # Now find file names in indir
    input_dir = opts_dict['indir']
    # The var list that will be excluded
    ex_varlist=[]
    inc_varlist=[]

    # Create a mpi simplecomm object
    if opts_dict['mpi_enable']:
        me=simplecomm.create_comm()
    else:
        me=simplecomm.create_comm(not opts_dict['mpi_enable'])
    
    if me.get_rank() == 0:
       print 'Running pyEnsSum!'

    if me.get_rank() ==0 and (verbose == True):
        print opts_dict
        print 'Ensemble size for summary = ', esize

    exclude=False
    if me.get_rank() == 0:
        if opts_dict['jsonfile']:
            inc_varlist=[]
            # Read in the excluded or included var list
            ex_varlist,exclude=pyEnsLib.read_jsonlist(opts_dict['jsonfile'],'ES')
            if exclude == False:
               inc_varlist=ex_varlist
               ex_varlist=[]
            # Read in the included var list
            #inc_varlist=pyEnsLib.read_jsonlist(opts_dict['jsonfile'],'ES')

    # Broadcast the excluded var list to each processor
    #if opts_dict['mpi_enable']:
    #   ex_varlist=me.partition(ex_varlist,func=Duplicate(),involved=True)
    # Broadcast the excluded var list to each processor
    if opts_dict['mpi_enable']:
        exclude=me.partition(exclude,func=Duplicate(),involved=True)
        if exclude:
           ex_varlist=me.partition(ex_varlist,func=Duplicate(),involved=True)
        else:
           inc_varlist=me.partition(inc_varlist,func=Duplicate(),involved=True)
        
    in_files=[]
    if(os.path.exists(input_dir)):
        # Get the list of files
        in_files_temp = os.listdir(input_dir)
        in_files=sorted(in_files_temp)

        # Make sure we have enough
        num_files = len(in_files)
        if me.get_rank()==0 and (verbose == True):
            print 'Number of files in input directory = ', num_files
        if (num_files < esize):
            if me.get_rank()==0 and (verbose == True):
               print 'Number of files in input directory (',num_files,\
                ') is less than specified ensemble size of ', esize
            sys.exit(2)
        if (num_files > esize):
            if me.get_rank()==0 and (verbose == True):
               print 'NOTE: Number of files in ', input_dir, \
                'is greater than specified ensemble size of ', esize ,\
                '\nwill just use the first ',  esize, 'files'
    else:
        if me.get_rank()==0:
           print 'Input directory: ',input_dir,' not found'
        sys.exit(2)

    if opts_dict['cumul']:
        if opts_dict['regx']:
           in_files_list=get_cumul_filelist(opts_dict,opts_dict['indir'],opts_dict['regx'])
        in_files=me.partition(in_files_list,func=EqualLength(),involved=True)
        if me.get_rank()==0 and (verbose == True):
           print 'in_files=',in_files

    # Open the files in the input directory
    o_files=[]
    if me.get_rank() == 0 and opts_dict['verbose']:
       print 'Input files are: '
       print "\n".join(in_files)
       #for i in in_files:
       #    print "in_files =",i
    for onefile in in_files[0:esize]:
        if (os.path.isfile(input_dir+'/' + onefile)):
            o_files.append(Nio.open_file(input_dir+'/' + onefile,"r"))
        else:
            if me.get_rank()==0:
               print "COULD NOT LOCATE FILE "+ input_dir + onefile + "! EXITING...."
            sys.exit() 

    # Store dimensions of the input fields
    if me.get_rank()==0 and (verbose == True):
        print "Getting spatial dimensions"
    nlev = -1
    nilev = -1
    ncol = -1
    nlat = -1
    nlon = -1
    lonkey=''
    latkey=''
    # Look at first file and get dims
    input_dims = o_files[0].dimensions
    ndims = len(input_dims)

    for key in input_dims:
        if key == "lev":
            nlev = input_dims["lev"]
        elif key == "ilev":
            nilev = input_dims["ilev"]
        elif key == "ncol":
            ncol = input_dims["ncol"]
        elif (key == "nlon") or (key =="lon"):
            nlon = input_dims[key]
            lonkey=key
        elif (key == "nlat") or (key == "lat"):
            nlat = input_dims[key]
            latkey=key
        
    if (nlev == -1) : 
        if me.get_rank()==0: 
           print "COULD NOT LOCATE valid dimension lev => EXITING...."
        sys.exit() 

    if (( ncol == -1) and ((nlat == -1) or (nlon == -1))):
        if me.get_rank()==0: 
           print "Need either lat/lon or ncol  => EXITING...."
        sys.exit()            

    # Check if this is SE or FV data
    if (ncol != -1):
        is_SE = True 
    else:
        is_SE = False    

    # Make sure all files have the same dimensions
    if me.get_rank()==0 and (verbose == True):
        print "Checking dimensions across files...."
        print 'lev = ', nlev
        if (is_SE == True):
            print 'ncol = ', ncol
        else:
            print 'nlat = ', nlat
            print 'nlon = ', nlon

    for count, this_file in enumerate(o_files):
        input_dims = this_file.dimensions     
        if (is_SE == True):
            if ( nlev != int(input_dims["lev"]) or ( ncol != int(input_dims["ncol"]))):
                if me.get_rank() == 0:
                   print "Dimension mismatch between ", in_files[0], 'and', in_files[0], '!!!'
                sys.exit() 
        else:
            if ( nlev != int(input_dims["lev"]) or ( nlat != int(input_dims[latkey]))\
                  or ( nlon != int(input_dims[lonkey]))): 
                if me.get_rank() == 0:
                   print "Dimension mismatch between ", in_files[0], 'and', in_files[0], '!!!'
                sys.exit() 

    # Get 2d vars, 3d vars and all vars (For now include all variables) 
    vars_dict_all = o_files[0].variables
    # Remove the excluded variables (specified in json file) from variable dictionary
    #print len(vars_dict_all)
    if exclude:
        vars_dict=vars_dict_all
        for i in ex_varlist:
          if i in vars_dict:
            del vars_dict[i]
    #Given an included var list, remove all float var that are not on the list
    else:
        vars_dict=vars_dict_all.copy()
        for k,v in vars_dict_all.iteritems():
           if (k not in inc_varlist) and (vars_dict_all[k].typecode()=='f'):
            #print vars_dict_all[k].typecode()
            #print k
            del vars_dict[k]
 
    num_vars = len(vars_dict)
    #print num_vars
    #if me.get_rank() == 0:
    #   for k,v in vars_dict.iteritems():
    #       print 'vars_dict',k,vars_dict[k].typecode()

    str_size = 0

    d2_var_names = []
    d3_var_names = []
    num_2d = 0
    num_3d = 0

    # Which are 2d, which are 3d and max str_size 
    for k,v in vars_dict.iteritems():  
        var = k
        vd = v.dimensions # all the variable's dimensions (names)
        vr = v.rank # num dimension
        vs = v.shape # dim values
        is_2d = False
        is_3d = False
        if (is_SE == True): # (time, lev, ncol) or (time, ncol)
            if ((vr == 2) and (vs[1] == ncol)):  
                is_2d = True 
                num_2d += 1
            elif ((vr == 3) and (vs[2] == ncol and vs[1] == nlev )):  
                is_3d = True 
                num_3d += 1
        else: # (time, lev, nlon, nlon) or (time, nlat, nlon)
            if ((vr == 3) and (vs[1] == nlat and vs[2] == nlon)):  
                is_2d = True 
                num_2d += 1
            elif ((vr == 4) and (vs[2] == nlat and vs[3] == nlon and (vs[1] == nlev or vs[1]==nilev ))):  
                is_3d = True 
                num_3d += 1
                    
        if (is_3d == True) :
            str_size = max(str_size, len(k))
            d3_var_names.append(k)
        elif  (is_2d == True):    
            str_size = max(str_size, len(k))
            d2_var_names.append(k)
        #else:
        #    print 'var=',k

    if me.get_rank() == 0 and (verbose == True):
        print 'Number of variables found:  ', num_3d+num_2d
        print '3D variables: '+str(num_3d)+', 2D variables: '+str(num_2d)

    # Now sort these and combine (this sorts caps first, then lower case - 
    # which is what we want)
    d2_var_names.sort()       
    d3_var_names.sort()

    if esize<num_2d+num_3d:
       if me.get_rank()==0:
          print "************************************************************************************************************************************"
          print "  Error: the total number of 3D and 2D variables "+str(num_2d+num_3d)+" is larger than the number of ensemble files "+str(esize)
          print "  Cannot generate ensemble summary file, please remove more variables from your included variable list,"
          print "  or add more varaibles in your excluded variable list!!!"
          print "************************************************************************************************************************************"
       sys.exit()
    # All vars is 3d vars first (sorted), the 2d vars
    all_var_names = list(d3_var_names)
    all_var_names += d2_var_names
    n_all_var_names = len(all_var_names)

    #if me.get_rank() == 0 and (verbose == True):
    #    print 'num vars = ', n_all_var_names, '(3d = ', num_3d, ' and 2d = ', num_2d, ")"

    # Create new summary ensemble file
    this_sumfile = opts_dict["sumfile"]

    if me.get_rank() == 0 and (verbose == True):
        print "Creating ", this_sumfile, "  ..."
    if(me.get_rank() ==0 | opts_dict["popens"]):
        if os.path.exists(this_sumfile):
            os.unlink(this_sumfile)

        opt = Nio.options()
        opt.PreFill = False
        opt.Format = 'NetCDF4Classic'
        nc_sumfile = Nio.open_file(this_sumfile, 'w', options=opt)

        # Set dimensions
        if me.get_rank() == 0 and (verbose == True):
            print "Setting dimensions ....."
        if (is_SE == True):
            nc_sumfile.create_dimension('ncol', ncol)
        else:
            nc_sumfile.create_dimension('nlat', nlat)
            nc_sumfile.create_dimension('nlon', nlon)
        nc_sumfile.create_dimension('nlev', nlev)
        nc_sumfile.create_dimension('ens_size', esize)
        nc_sumfile.create_dimension('nvars', num_3d + num_2d)
        nc_sumfile.create_dimension('nvars3d', num_3d)
        nc_sumfile.create_dimension('nvars2d', num_2d)
        nc_sumfile.create_dimension('str_size', str_size)

        # Set global attributes
        now = time.strftime("%c")
        if me.get_rank() == 0 and (verbose == True):
            print "Setting global attributes ....."
        setattr(nc_sumfile, 'creation_date',now)
        setattr(nc_sumfile, 'title', 'CAM verification ensemble summary file')
        setattr(nc_sumfile, 'tag', opts_dict["tag"]) 
        setattr(nc_sumfile, 'compset', opts_dict["compset"]) 
        setattr(nc_sumfile, 'resolution', opts_dict["res"]) 
        setattr(nc_sumfile, 'machine', opts_dict["mach"]) 

        # Create variables
        if me.get_rank() == 0 and (verbose == True):
            print "Creating variables ....."
        v_lev = nc_sumfile.create_variable("lev", 'f', ('nlev',))
        v_vars = nc_sumfile.create_variable("vars", 'S1', ('nvars', 'str_size'))
        v_var3d = nc_sumfile.create_variable("var3d", 'S1', ('nvars3d', 'str_size'))
        v_var2d = nc_sumfile.create_variable("var2d", 'S1', ('nvars2d', 'str_size'))
        if not opts_dict['gmonly']:
            if (is_SE == True):
                v_ens_avg3d = nc_sumfile.create_variable("ens_avg3d", 'f', ('nvars3d', 'nlev', 'ncol'))
                v_ens_stddev3d = nc_sumfile.create_variable("ens_stddev3d", 'f', ('nvars3d', 'nlev', 'ncol'))
                v_ens_avg2d = nc_sumfile.create_variable("ens_avg2d", 'f', ('nvars2d', 'ncol'))
                v_ens_stddev2d = nc_sumfile.create_variable("ens_stddev2d", 'f', ('nvars2d', 'ncol'))
            else:
                v_ens_avg3d = nc_sumfile.create_variable("ens_avg3d", 'f', ('nvars3d', 'nlev', 'nlat', 'nlon'))
                v_ens_stddev3d = nc_sumfile.create_variable("ens_stddev3d", 'f', ('nvars3d', 'nlev', 'nlat', 'nlon'))
                v_ens_avg2d = nc_sumfile.create_variable("ens_avg2d", 'f', ('nvars2d', 'nlat', 'nlon'))
                v_ens_stddev2d = nc_sumfile.create_variable("ens_stddev2d", 'f', ('nvars2d', 'nlat', 'nlon'))

            v_RMSZ = nc_sumfile.create_variable("RMSZ", 'f', ('nvars', 'ens_size'))
        v_gm = nc_sumfile.create_variable("global_mean", 'f', ('nvars', 'ens_size'))
        v_standardized_gm=nc_sumfile.create_variable("standardized_gm",'f',('nvars','ens_size'))
        v_loadings_gm = nc_sumfile.create_variable('loadings_gm','f',('nvars','nvars'))
        v_mu_gm = nc_sumfile.create_variable('mu_gm','f',('nvars',))
        v_sigma_gm = nc_sumfile.create_variable('sigma_gm','f',('nvars',))
        v_sigma_scores_gm = nc_sumfile.create_variable('sigma_scores_gm','f',('nvars',))


        # Assign vars, var3d and var2d
        if me.get_rank() == 0 and (verbose == True):
            print "Assigning vars, var3d, and var2d ....."

        eq_all_var_names =[]
        eq_d3_var_names = []
        eq_d2_var_names = []

        l_eq = len(all_var_names)
        for i in range(l_eq):
            tt = list(all_var_names[i])
            l_tt = len(tt)
            if (l_tt < str_size):
                extra = list(' ')*(str_size - l_tt)
                tt.extend(extra)
            eq_all_var_names.append(tt)

        l_eq = len(d3_var_names)
        for i in range(l_eq):
            tt = list(d3_var_names[i])
            l_tt = len(tt)
            if (l_tt < str_size):
                extra = list(' ')*(str_size - l_tt)
                tt.extend(extra)
            eq_d3_var_names.append(tt)

        l_eq = len(d2_var_names)
        for i in range(l_eq):
            tt = list(d2_var_names[i])
            l_tt = len(tt)
            if (l_tt < str_size):
                extra = list(' ')*(str_size - l_tt)
                tt.extend(extra)
            eq_d2_var_names.append(tt)

        v_vars[:] = eq_all_var_names[:]
        v_var3d[:] = eq_d3_var_names[:]
        v_var2d[:] = eq_d2_var_names[:]

        # Time-invarient metadata
        if me.get_rank() == 0 and (verbose == True):
            print "Assigning time invariant metadata ....."
        lev_data = vars_dict["lev"]
        v_lev = lev_data

    # Form ensembles, each missing one member; compute RMSZs and global means
    #for each variable, we also do max norm also (currently done in pyStats)
    tslice = opts_dict['tslice']

    if not opts_dict['cumul']:
        # Partition the var list
        
        var3_list_loc=me.partition(d3_var_names,func=EqualStride(),involved=True)
        var2_list_loc=me.partition(d2_var_names,func=EqualStride(),involved=True)
    else:
        var3_list_loc=d3_var_names
        var2_list_loc=d2_var_names

    # Calculate global means #
    if me.get_rank() == 0 and (verbose == True):
        print "Calculating global means ....."
    if not opts_dict['cumul']:
        gm3d,gm2d,var_list = pyEnsLib.generate_global_mean_for_summary(o_files,var3_list_loc,var2_list_loc , is_SE, False,opts_dict)
    if me.get_rank() == 0 and (verbose == True):
        print "Finish calculating global means ....."

    # Calculate RMSZ scores  
    if (not opts_dict['gmonly']) | (opts_dict['cumul']):
        if me.get_rank() == 0 and (verbose == True):
            print "Calculating RMSZ scores ....."
        zscore3d,zscore2d,ens_avg3d,ens_stddev3d,ens_avg2d,ens_stddev2d,temp1,temp2=pyEnsLib.calc_rmsz(o_files,var3_list_loc,var2_list_loc,is_SE,opts_dict)    

    # Calculate max norm ensemble
    if opts_dict['maxnorm']:
        if me.get_rank() == 0 and (verbose == True):
            print "Calculating max norm of ensembles ....."
        pyEnsLib.calculate_maxnormens(opts_dict,var3_list_loc)
        pyEnsLib.calculate_maxnormens(opts_dict,var2_list_loc)

    if opts_dict['mpi_enable'] & ( not opts_dict['popens']):

        if not opts_dict['cumul']:
            # Gather the 3d variable results from all processors to the master processor
            slice_index=get_stride_list(len(d3_var_names),me)
         
            # Gather global means 3d results
            gm3d=gather_npArray(gm3d,me,slice_index,(len(d3_var_names),len(o_files)))
            if not opts_dict['gmonly']:
                # Gather zscore3d results
                zscore3d=gather_npArray(zscore3d,me,slice_index,(len(d3_var_names),len(o_files)))

                # Gather ens_avg3d and ens_stddev3d results
                shape_tuple3d=get_shape(ens_avg3d.shape,len(d3_var_names),me.get_rank())
                ens_avg3d=gather_npArray(ens_avg3d,me,slice_index,shape_tuple3d) 
                ens_stddev3d=gather_npArray(ens_stddev3d,me,slice_index,shape_tuple3d) 

            # Gather 2d variable results from all processors to the master processor
            slice_index=get_stride_list(len(d2_var_names),me)

            # Gather global means 2d results
            gm2d=gather_npArray(gm2d,me,slice_index,(len(d2_var_names),len(o_files)))

            var_list=gather_list(var_list,me)

            if not opts_dict['gmonly']:
                # Gather zscore2d results
                zscore2d=gather_npArray(zscore2d,me,slice_index,(len(d2_var_names),len(o_files)))

                # Gather ens_avg3d and ens_stddev2d results
                shape_tuple2d=get_shape(ens_avg2d.shape,len(d2_var_names),me.get_rank())
                ens_avg2d=gather_npArray(ens_avg2d,me,slice_index,shape_tuple2d) 
                ens_stddev2d=gather_npArray(ens_stddev2d,me,slice_index,shape_tuple2d) 

        else:
            gmall=np.concatenate((temp1,temp2),axis=0)
            gmall=pyEnsLib.gather_npArray_pop(gmall,me,(me.get_size(),len(d3_var_names)+len(d2_var_names)))
    # Assign to file:
    if me.get_rank() == 0 | opts_dict['popens'] :
        if not opts_dict['cumul']:
            gmall=np.concatenate((gm3d,gm2d),axis=0)
            if not opts_dict['gmonly']:
                Zscoreall=np.concatenate((zscore3d,zscore2d),axis=0)
                v_RMSZ[:,:]=Zscoreall[:,:]
            if not opts_dict['gmonly']:
                if (is_SE == True):
                    v_ens_avg3d[:,:,:]=ens_avg3d[:,:,:]
                    v_ens_stddev3d[:,:,:]=ens_stddev3d[:,:,:]
                    v_ens_avg2d[:,:]=ens_avg2d[:,:]
                    v_ens_stddev2d[:,:]=ens_stddev2d[:,:]
                else:
                    v_ens_avg3d[:,:,:,:]=ens_avg3d[:,:,:,:]
                    v_ens_stddev3d[:,:,:,:]=ens_stddev3d[:,:,:,:]
                    v_ens_avg2d[:,:,:]=ens_avg2d[:,:,:]
                    v_ens_stddev2d[:,:,:]=ens_stddev2d[:,:,:]
        else:
            gmall_temp=np.transpose(gmall[:,:])
            gmall=gmall_temp
        mu_gm,sigma_gm,standardized_global_mean,loadings_gm,scores_gm=pyEnsLib.pre_PCA(gmall,all_var_names,var_list,me)
        v_gm[:,:]=gmall[:,:]
        v_standardized_gm[:,:]=standardized_global_mean[:,:]
        v_mu_gm[:]=mu_gm[:]
        v_sigma_gm[:]=sigma_gm[:].astype(np.float32)
        v_loadings_gm[:,:]=loadings_gm[:,:]
        v_sigma_scores_gm[:]=scores_gm[:]

        if me.get_rank() == 0:
           print "All Done"
Ejemplo n.º 23
0
def main(argv):

    print "Running pyEnsSum!"

    # Get command line stuff and store in a dictionary
    s = "tag= compset= esize= tslice= res= sumfile= indir= sumfiledir= mach= verbose jsonfile= mpi_enable maxnorm gmonly popens cumul regx= startMon= endMon= fIndex="
    optkeys = s.split()
    try:
        opts, args = getopt.getopt(argv, "h", optkeys)
    except getopt.GetoptError:
        pyEnsLib.EnsSum_usage()
        sys.exit(2)

    # Put command line options in a dictionary - also set defaults
    opts_dict = {}

    # Defaults
    opts_dict["tag"] = ""
    opts_dict["compset"] = ""
    opts_dict["mach"] = ""
    opts_dict["esize"] = 151
    opts_dict["tslice"] = 0
    opts_dict["res"] = ""
    opts_dict["sumfile"] = "ens.summary.nc"
    opts_dict["indir"] = "./"
    opts_dict["sumfiledir"] = "./"
    opts_dict["jsonfile"] = ""
    opts_dict["verbose"] = True
    opts_dict["mpi_enable"] = False
    opts_dict["maxnorm"] = False
    opts_dict["gmonly"] = False
    opts_dict["popens"] = False
    opts_dict["cumul"] = False
    opts_dict["regx"] = "test"
    opts_dict["startMon"] = 1
    opts_dict["endMon"] = 1
    opts_dict["fIndex"] = 151

    # This creates the dictionary of input arguments
    opts_dict = pyEnsLib.getopt_parseconfig(opts, optkeys, "ES", opts_dict)

    verbose = opts_dict["verbose"]

    st = opts_dict["esize"]
    esize = int(st)

    if verbose == True:
        print opts_dict
        print "Ensemble size for summary = ", esize

    if not (opts_dict["tag"] and opts_dict["compset"] and opts_dict["mach"] or opts_dict["res"]):
        print "Please specify --tag, --compset, --mach and --res options"
        sys.exit()

    # Now find file names in indir
    input_dir = opts_dict["indir"]
    # The var list that will be excluded
    ex_varlist = []

    # Create a mpi simplecomm object
    if opts_dict["mpi_enable"]:
        me = simplecomm.create_comm()
    else:
        me = simplecomm.create_comm(not opts_dict["mpi_enable"])

    if me.get_rank() == 0:
        if opts_dict["jsonfile"]:
            # Read in the excluded var list
            ex_varlist = pyEnsLib.read_jsonlist(opts_dict["jsonfile"], "ES")

        # Broadcast the excluded var list to each processor
    if opts_dict["mpi_enable"]:
        ex_varlist = me.partition(ex_varlist, func=Duplicate(), involved=True)

    in_files = []
    if os.path.exists(input_dir):
        # Get the list of files
        in_files_temp = os.listdir(input_dir)
        in_files = sorted(in_files_temp)
        # Make sure we have enough
        num_files = len(in_files)
        if verbose == True:
            print "Number of files in input directory = ", num_files
        if num_files < esize:
            print "Number of files in input directory (", num_files, ") is less than specified ensemble size of ", esize
            sys.exit(2)
        if num_files > esize:
            print "NOTE: Number of files in ", input_dir, "is greater than specified ensemble size of ", esize, "\nwill just use the first ", esize, "files"
    else:
        print "Input directory: ", input_dir, " not found"
        sys.exit(2)

    if opts_dict["cumul"]:
        if opts_dict["regx"]:
            in_files_list = get_cumul_filelist(opts_dict, opts_dict["indir"], opts_dict["regx"])
        in_files = me.partition(in_files_list, func=EqualLength(), involved=True)
        if me.get_rank() == 0:
            print "in_files=", in_files

    # Open the files in the input directory
    o_files = []
    for onefile in in_files[0:esize]:
        if os.path.isfile(input_dir + "/" + onefile):
            o_files.append(Nio.open_file(input_dir + "/" + onefile, "r"))
        else:
            print "COULD NOT LOCATE FILE " + input_dir + onefile + "! EXITING...."
            sys.exit()

    # Store dimensions of the input fields
    if verbose == True:
        print "Getting spatial dimensions"
    nlev = -1
    ncol = -1
    nlat = -1
    nlon = -1
    lonkey = ""
    latkey = ""
    # Look at first file and get dims
    input_dims = o_files[0].dimensions
    ndims = len(input_dims)

    for key in input_dims:
        if key == "lev":
            nlev = input_dims["lev"]
        elif key == "ncol":
            ncol = input_dims["ncol"]
        elif (key == "nlon") or (key == "lon"):
            nlon = input_dims[key]
            lonkey = key
        elif (key == "nlat") or (key == "lat"):
            nlat = input_dims[key]
            latkey = key

    if nlev == -1:
        print "COULD NOT LOCATE valid dimension lev => EXITING...."
        sys.exit()

    if (ncol == -1) and ((nlat == -1) or (nlon == -1)):
        print "Need either lat/lon or ncol  => EXITING...."
        sys.exit()

    # Check if this is SE or FV data
    if ncol != -1:
        is_SE = True
    else:
        is_SE = False

    # Make sure all files have the same dimensions
    if verbose == True:
        print "Checking dimensions across files...."
        print "lev = ", nlev
        if is_SE == True:
            print "ncol = ", ncol
        else:
            print "nlat = ", nlat
            print "nlon = ", nlon

    for count, this_file in enumerate(o_files):
        input_dims = this_file.dimensions
        if is_SE == True:
            if nlev != int(input_dims["lev"]) or (ncol != int(input_dims["ncol"])):
                print "Dimension mismatch between ", in_files[0], "and", in_files[0], "!!!"
                sys.exit()
        else:
            if nlev != int(input_dims["lev"]) or (nlat != int(input_dims[latkey])) or (nlon != int(input_dims[lonkey])):
                print "Dimension mismatch between ", in_files[0], "and", in_files[0], "!!!"
                sys.exit()

    # Get 2d vars, 3d vars and all vars (For now include all variables)
    vars_dict = o_files[0].variables
    # Remove the excluded variables (specified in json file) from variable dictionary
    if ex_varlist:
        for i in ex_varlist:
            del vars_dict[i]
    num_vars = len(vars_dict)
    if verbose == True:
        print "Number of variables (including metadata) found =  ", num_vars
    str_size = 0

    d2_var_names = []
    d3_var_names = []
    num_2d = 0
    num_3d = 0

    # Which are 2d, which are 3d and max str_size
    for k, v in vars_dict.iteritems():
        var = k
        vd = v.dimensions  # all the variable's dimensions (names)
        vr = v.rank  # num dimension
        vs = v.shape  # dim values
        is_2d = False
        is_3d = False
        if is_SE == True:  # (time, lev, ncol) or (time, ncol)
            if (vr == 2) and (vs[1] == ncol):
                is_2d = True
                num_2d += 1
            elif (vr == 3) and (vs[2] == ncol and vs[1] == nlev):
                is_3d = True
                num_3d += 1
        else:  # (time, lev, nlon, nlon) or (time, nlat, nlon)
            if (vr == 3) and (vs[1] == nlat and vs[2] == nlon):
                is_2d = True
                num_2d += 1
            elif (vr == 4) and (vs[2] == nlat and vs[3] == nlon and vs[1] == nlev):
                is_3d = True
                num_3d += 1
        if is_3d == True:
            str_size = max(str_size, len(k))
            d3_var_names.append(k)
        elif is_2d == True:
            str_size = max(str_size, len(k))
            d2_var_names.append(k)

    # Now sort these and combine (this sorts caps first, then lower case -
    # which is what we want)
    d2_var_names.sort()
    d3_var_names.sort()

    # All vars is 3d vars first (sorted), the 2d vars
    all_var_names = list(d3_var_names)
    all_var_names += d2_var_names
    n_all_var_names = len(all_var_names)

    if verbose == True:
        print "num vars = ", n_all_var_names, "(3d = ", num_3d, " and 2d = ", num_2d, ")"

    # Create new summary ensemble file
    this_sumfile = opts_dict["sumfile"]

    if verbose == True:
        print "Creating ", this_sumfile, "  ..."
    if me.get_rank() == 0 | opts_dict["popens"]:
        if os.path.exists(this_sumfile):
            os.unlink(this_sumfile)

        opt = Nio.options()
        opt.PreFill = False
        opt.Format = "NetCDF4Classic"
        nc_sumfile = Nio.open_file(this_sumfile, "w", options=opt)

        # Set dimensions
        if verbose == True:
            print "Setting dimensions ....."
        if is_SE == True:
            nc_sumfile.create_dimension("ncol", ncol)
        else:
            nc_sumfile.create_dimension("nlat", nlat)
            nc_sumfile.create_dimension("nlon", nlon)
        nc_sumfile.create_dimension("nlev", nlev)
        nc_sumfile.create_dimension("ens_size", esize)
        nc_sumfile.create_dimension("nvars", num_3d + num_2d)
        nc_sumfile.create_dimension("nvars3d", num_3d)
        nc_sumfile.create_dimension("nvars2d", num_2d)
        nc_sumfile.create_dimension("str_size", str_size)

        # Set global attributes
        now = time.strftime("%c")
        if verbose == True:
            print "Setting global attributes ....."
        setattr(nc_sumfile, "creation_date", now)
        setattr(nc_sumfile, "title", "CAM verification ensemble summary file")
        setattr(nc_sumfile, "tag", opts_dict["tag"])
        setattr(nc_sumfile, "compset", opts_dict["compset"])
        setattr(nc_sumfile, "resolution", opts_dict["res"])
        setattr(nc_sumfile, "machine", opts_dict["mach"])

        # Create variables
        if verbose == True:
            print "Creating variables ....."
        v_lev = nc_sumfile.create_variable("lev", "f", ("nlev",))
        v_vars = nc_sumfile.create_variable("vars", "S1", ("nvars", "str_size"))
        v_var3d = nc_sumfile.create_variable("var3d", "S1", ("nvars3d", "str_size"))
        v_var2d = nc_sumfile.create_variable("var2d", "S1", ("nvars2d", "str_size"))
        if not opts_dict["gmonly"]:
            if is_SE == True:
                v_ens_avg3d = nc_sumfile.create_variable("ens_avg3d", "f", ("nvars3d", "nlev", "ncol"))
                v_ens_stddev3d = nc_sumfile.create_variable("ens_stddev3d", "f", ("nvars3d", "nlev", "ncol"))
                v_ens_avg2d = nc_sumfile.create_variable("ens_avg2d", "f", ("nvars2d", "ncol"))
                v_ens_stddev2d = nc_sumfile.create_variable("ens_stddev2d", "f", ("nvars2d", "ncol"))
            else:
                v_ens_avg3d = nc_sumfile.create_variable("ens_avg3d", "f", ("nvars3d", "nlev", "nlat", "nlon"))
                v_ens_stddev3d = nc_sumfile.create_variable("ens_stddev3d", "f", ("nvars3d", "nlev", "nlat", "nlon"))
                v_ens_avg2d = nc_sumfile.create_variable("ens_avg2d", "f", ("nvars2d", "nlat", "nlon"))
                v_ens_stddev2d = nc_sumfile.create_variable("ens_stddev2d", "f", ("nvars2d", "nlat", "nlon"))

            v_RMSZ = nc_sumfile.create_variable("RMSZ", "f", ("nvars", "ens_size"))
        v_gm = nc_sumfile.create_variable("global_mean", "f", ("nvars", "ens_size"))
        v_loadings_gm = nc_sumfile.create_variable("loadings_gm", "f", ("nvars", "nvars"))
        v_mu_gm = nc_sumfile.create_variable("mu_gm", "f", ("nvars",))
        v_sigma_gm = nc_sumfile.create_variable("sigma_gm", "f", ("nvars",))
        v_sigma_scores_gm = nc_sumfile.create_variable("sigma_scores_gm", "f", ("nvars",))

        # Assign vars, var3d and var2d
        if verbose == True:
            print "Assigning vars, var3d, and var2d ....."

        eq_all_var_names = []
        eq_d3_var_names = []
        eq_d2_var_names = []

        l_eq = len(all_var_names)
        for i in range(l_eq):
            tt = list(all_var_names[i])
            l_tt = len(tt)
            if l_tt < str_size:
                extra = list(" ") * (str_size - l_tt)
                tt.extend(extra)
            eq_all_var_names.append(tt)

        l_eq = len(d3_var_names)
        for i in range(l_eq):
            tt = list(d3_var_names[i])
            l_tt = len(tt)
            if l_tt < str_size:
                extra = list(" ") * (str_size - l_tt)
                tt.extend(extra)
            eq_d3_var_names.append(tt)

        l_eq = len(d2_var_names)
        for i in range(l_eq):
            tt = list(d2_var_names[i])
            l_tt = len(tt)
            if l_tt < str_size:
                extra = list(" ") * (str_size - l_tt)
                tt.extend(extra)
            eq_d2_var_names.append(tt)

        v_vars[:] = eq_all_var_names[:]
        v_var3d[:] = eq_d3_var_names[:]
        v_var2d[:] = eq_d2_var_names[:]

        # Time-invarient metadata
        if verbose == True:
            print "Assigning time invariant metadata ....."
        lev_data = vars_dict["lev"]
        v_lev = lev_data

        # Form ensembles, each missing one member; compute RMSZs and global means
        # for each variable, we also do max norm also (currently done in pyStats)
    tslice = opts_dict["tslice"]

    if not opts_dict["cumul"]:
        # Partition the var list
        var3_list_loc = me.partition(d3_var_names, func=EqualStride(), involved=True)
        var2_list_loc = me.partition(d2_var_names, func=EqualStride(), involved=True)
    else:
        var3_list_loc = d3_var_names
        var2_list_loc = d2_var_names

    # Calculate global means #
    if verbose == True:
        print "Calculating global means ....."
    if not opts_dict["cumul"]:
        gm3d, gm2d = pyEnsLib.generate_global_mean_for_summary(
            o_files, var3_list_loc, var2_list_loc, is_SE, False, opts_dict
        )
    if verbose == True:
        print "Finish calculating global means ....."

    # Calculate RMSZ scores
    if verbose == True:
        print "Calculating RMSZ scores ....."
    if (not opts_dict["gmonly"]) | (opts_dict["cumul"]):
        zscore3d, zscore2d, ens_avg3d, ens_stddev3d, ens_avg2d, ens_stddev2d, temp1, temp2 = pyEnsLib.calc_rmsz(
            o_files, var3_list_loc, var2_list_loc, is_SE, opts_dict
        )

    # Calculate max norm ensemble
    if opts_dict["maxnorm"]:
        if verbose == True:
            print "Calculating max norm of ensembles ....."
        pyEnsLib.calculate_maxnormens(opts_dict, var3_list_loc)
        pyEnsLib.calculate_maxnormens(opts_dict, var2_list_loc)

    if opts_dict["mpi_enable"] & (not opts_dict["popens"]):

        if not opts_dict["cumul"]:
            # Gather the 3d variable results from all processors to the master processor
            slice_index = get_stride_list(len(d3_var_names), me)

            # Gather global means 3d results
            gm3d = gather_npArray(gm3d, me, slice_index, (len(d3_var_names), len(o_files)))

            if not opts_dict["gmonly"]:
                # Gather zscore3d results
                zscore3d = gather_npArray(zscore3d, me, slice_index, (len(d3_var_names), len(o_files)))

                # Gather ens_avg3d and ens_stddev3d results
                shape_tuple3d = get_shape(ens_avg3d.shape, len(d3_var_names), me.get_rank())
                ens_avg3d = gather_npArray(ens_avg3d, me, slice_index, shape_tuple3d)
                ens_stddev3d = gather_npArray(ens_stddev3d, me, slice_index, shape_tuple3d)

                # Gather 2d variable results from all processors to the master processor
            slice_index = get_stride_list(len(d2_var_names), me)

            # Gather global means 2d results
            gm2d = gather_npArray(gm2d, me, slice_index, (len(d2_var_names), len(o_files)))

            if not opts_dict["gmonly"]:
                # Gather zscore2d results
                zscore2d = gather_npArray(zscore2d, me, slice_index, (len(d2_var_names), len(o_files)))

                # Gather ens_avg3d and ens_stddev2d results
                shape_tuple2d = get_shape(ens_avg2d.shape, len(d2_var_names), me.get_rank())
                ens_avg2d = gather_npArray(ens_avg2d, me, slice_index, shape_tuple2d)
                ens_stddev2d = gather_npArray(ens_stddev2d, me, slice_index, shape_tuple2d)

        else:
            gmall = np.concatenate((temp1, temp2), axis=0)
            gmall = pyEnsLib.gather_npArray_pop(gmall, me, (me.get_size(), len(d3_var_names) + len(d2_var_names)))
    # Assign to file:
    if me.get_rank() == 0 | opts_dict["popens"]:
        if not opts_dict["cumul"]:
            gmall = np.concatenate((gm3d, gm2d), axis=0)
            if not opts_dict["gmonly"]:
                Zscoreall = np.concatenate((zscore3d, zscore2d), axis=0)
                v_RMSZ[:, :] = Zscoreall[:, :]
            if not opts_dict["gmonly"]:
                if is_SE == True:
                    v_ens_avg3d[:, :, :] = ens_avg3d[:, :, :]
                    v_ens_stddev3d[:, :, :] = ens_stddev3d[:, :, :]
                    v_ens_avg2d[:, :] = ens_avg2d[:, :]
                    v_ens_stddev2d[:, :] = ens_stddev2d[:, :]
                else:
                    v_ens_avg3d[:, :, :, :] = ens_avg3d[:, :, :, :]
                    v_ens_stddev3d[:, :, :, :] = ens_stddev3d[:, :, :, :]
                    v_ens_avg2d[:, :, :] = ens_avg2d[:, :, :]
                    v_ens_stddev2d[:, :, :] = ens_stddev2d[:, :, :]
        else:
            gmall_temp = np.transpose(gmall[:, :])
            gmall = gmall_temp
        mu_gm, sigma_gm, standardized_global_mean, loadings_gm, scores_gm = pyEnsLib.pre_PCA(gmall)
        v_gm[:, :] = gmall[:, :]
        v_mu_gm[:] = mu_gm[:]
        v_sigma_gm[:] = sigma_gm[:].astype(np.float32)
        v_loadings_gm[:, :] = loadings_gm[:, :]
        v_sigma_scores_gm[:] = scores_gm[:]

        print "All Done"
Ejemplo n.º 24
0
def combine_met_data(rootdir, date, ofilepath):

    #
    # Set the dirs based on date
    #
    day = timedelta(days=1)
    date0 = date - day

    dir0 = rootdir + "/" + date0.strftime("Y%Y/M%m/D%d")
    dir1 = rootdir + "/" + date.strftime("Y%Y/M%m/D%d")

    #
    # Set the PreFill option to False to improve writing performance
    #
    opt = Nio.options()
    opt.PreFill = False

    #
    # Options for writing NetCDF4 "classic" file.
    #
    # If Nio wasn't built with netcdf 4 support, you will get a
    # warning here, and the code will use netcdf 3 instead.
    #
    opt.Format = "netcdf4classic"
    #opt.Format = "LargeFile"

    vrt_file = Nio.open_file(
        '/glade/p/acom/acom-climate/fvitt/GEOS/GEOS5_orig_res_20180715.nc',
        mode='r')

    # define vertical coordinate

    yyyymmdd = date.strftime("%Y%m%d")
    os.system("/bin/rm -f " + ofilepath)
    now = datetime.now()
    hist_str = 'created by combine_met_data.py : ' + now.strftime(
        "%a %d %b %Y %H:%M:%S")
    out_file = Nio.open_file(ofilepath,
                             mode='c',
                             options=opt,
                             history=hist_str)

    # vertical dimension ...

    # define dimensions and ALL variables before writing the data ....

    length = vrt_file.dimensions["lev"]
    out_file.create_dimension("lev", length)

    length = vrt_file.dimensions["ilev"]
    out_file.create_dimension("ilev", length)

    # define horizontal coordinates

    hrz_file = Nio.open_file(
        '/glade/p/acom/acom-climate/fvitt/GEOS/GEOS.fp.asm.const_2d_asm_Nx.00000000_0000.V01.nc4'
    )

    length = hrz_file.dimensions["lat"]
    out_file.create_dimension("lat", length)

    length = hrz_file.dimensions["lon"]
    out_file.create_dimension("lon", length)

    # time dimension ...

    out_file.create_dimension("time", None)

    refdate = datetime(1900, 01, 01)

    dims = ('time', )
    out_file.create_variable("time", 'd', dims)
    setattr(out_file.variables['time'], 'units', 'days')
    setattr(out_file.variables['time'], 'long_name',
            'days since ' + refdate.strftime("%d %b %Y %H:%M:%S"))

    out_file.create_variable("date", 'i', dims)
    setattr(out_file.variables["date"], 'units', 'current date (YYYYMMDD)')
    setattr(out_file.variables["date"], 'long_name', 'current date (YYYYMMDD)')

    out_file.create_variable("datesec", 'i', dims)
    setattr(out_file.variables["datesec"], 'units', 'seconds')
    setattr(out_file.variables["datesec"], 'long_name',
            'current seconds of current date')

    vrt_vars = ["lev", "ilev", "hyam", "hybm", "hyai", "hybi"]
    for var in vrt_vars:
        type = vrt_file.variables[var].typecode()
        vdims = vrt_file.variables[var].dimensions
        out_file.create_variable(var, type, vdims)
        varatts = vrt_file.variables[var].__dict__.keys()
        for att in varatts:
            val = getattr(vrt_file.variables[var], att)
            setattr(out_file.variables[var], att, val)

    hrz_vars = ["lon", "lat", "PHIS"]
    for var in hrz_vars:
        type = hrz_file.variables[var].typecode()
        vdims = hrz_file.variables[var].dimensions
        out_file.create_variable(var, type, vdims)
        varatts = hrz_file.variables[var].__dict__.keys()
        for att in varatts:
            val = getattr(hrz_file.variables[var], att)
            setattr(out_file.variables[var], att, val)

    type = hrz_file.variables["FRLAND"].typecode()
    vdims = hrz_file.variables["FRLAND"].dimensions
    out_file.create_variable("ORO", type, vdims)
    varatts = hrz_file.variables["FRLAND"].__dict__.keys()
    for att in varatts:
        val = getattr(hrz_file.variables["FRLAND"], att)
        setattr(out_file.variables["ORO"], att, val)

    tavg_flx_vars = {
        'HFLUX': 'SHFLX',
        'TAUX': 'TAUX',
        'TAUY': 'TAUY',
        'EVAP': 'QFLX'
    }  # flx
    tavg_flx_filem = glob.glob(dir0 +
                               '/GEOS.fp.asm.tavg1_2d_flx_Nx.*_2330.V01.nc4')
    define_flds(tavg_flx_vars, tavg_flx_filem, out_file)

    tavg_rad_vars = {'ALBEDO': 'ALB', 'TS': 'TS', 'SWGDN': 'FSDS'}  # rad
    tavg_rad_filem = glob.glob(dir0 +
                               '/GEOS.fp.asm.tavg1_2d_rad_Nx.*_2330.V01.nc4')
    define_flds(tavg_rad_vars, tavg_rad_filem, out_file)

    tavg_lnd_vars = {'GWETTOP': 'SOILW', 'SNOMAS': 'SNOWH'}  # lnd
    tavg_lnd_filem = glob.glob(dir0 +
                               '/GEOS.fp.asm.tavg1_2d_lnd_Nx.*_2330.V01.nc4')
    define_flds(tavg_lnd_vars, tavg_lnd_filem, out_file)

    inst_vars = {'PS': 'PS', 'T': 'T', 'U': 'U', 'V': 'V', 'QV': 'Q'}
    inst_files = glob.glob(dir1 + '/GEOS.fp.asm.inst3_3d_asm_Nv.*.nc4')
    inst_files.sort()
    define_flds(inst_vars, inst_files, out_file)

    # definitions should be done at this point

    # Write coordinate dimension variables first

    for var in vrt_vars:
        if vrt_file.dimensions.keys().count(var) > 0:
            v = vrt_file.variables[var].get_value()
            out_file.variables[var].assign_value(v)

    for var in vrt_vars:
        if vrt_file.dimensions.keys().count(var) == 0:
            v = vrt_file.variables[var].get_value()
            out_file.variables[var].assign_value(v)

    vrt_file.close()

    # set time/date data ...

    times = [i * 3 for i in range(8)]  # hours
    days = list()
    datesecs = list()

    for hr in times:
        d = datetime(date.year, date.month, date.day, hr, 0, 0)
        dd = d - refdate
        days.append(dd.days + (dd.seconds / 86400.0))
        datesecs.append(dd.seconds)

    out_file.variables['time'].assign_value(days)

    out_file.variables['date'].assign_value(int(yyyymmdd))

    out_file.variables['datesec'].assign_value(datesecs)

    var = "lat"
    v = hrz_file.variables[var].get_value()
    out_file.variables[var].assign_value(v)

    var = "lon"
    v = hrz_file.variables[var].get_value()

    # want logitudes from 0 to 360 ( rather than -180 to 180)
    neglons = numpy.where(v < 0.0)
    nroll = neglons[0][-1] + 1
    lons = numpy.roll(v, nroll)
    lons = numpy.where(lons < 0., lons + 360., lons)
    lons = numpy.where(lons < 1.e-3, 0.,
                       lons)  # GEOS data has a small value rather than zero
    out_file.variables[var].assign_value(lons)

    for var in hrz_vars:
        if hrz_file.dimensions.keys().count(var) == 0:
            v = hrz_file.variables[var].get_value()
            v = numpy.roll(v, nroll, axis=2)
            v = numpy.tile(v, (8, 1, 1))
            out_file.variables[var].assign_value(v)

    files = glob.glob(dir1 + '/GEOS.fp.asm.tavg1_2d_flx_Nx.*.nc4')
    files.sort()
    filepaths = tavg_flx_filem + files
    write_tavg_flds(tavg_flx_vars, filepaths, nroll, out_file)

    # special code for ORO
    ivar = 'FRSEAICE'
    for n in range(1, 24, 3):
        filem = Nio.open_file(filepaths[n - 1])
        filep = Nio.open_file(filepaths[n])

        valm = filem.variables[ivar].get_value()
        valp = filep.variables[ivar].get_value()
        ndims = filep.variables[ivar].rank
        vala = 0.5 * (valm + valp)
        vala = numpy.roll(vala, nroll, ndims - 1)
        if n > 1:
            val = numpy.append(val, vala, axis=0)
        else:
            val = vala

    seaice = val

    v = hrz_file.variables["FRLAND"].get_value()
    v = numpy.roll(v, nroll, axis=2)
    v = numpy.tile(v, (8, 1, 1))
    #v = numpy.where(v==2, 1, v)
    v = numpy.where(seaice > 0.5, 2, v)
    out_file.variables["ORO"].assign_value(v)

    hrz_file.close()

    files = glob.glob(dir1 + '/GEOS.fp.asm.tavg1_2d_rad_Nx.*.nc4')
    files.sort()
    filepaths = tavg_rad_filem + files
    write_tavg_flds(tavg_rad_vars, filepaths, nroll, out_file)

    files = glob.glob(dir1 + '/GEOS.fp.asm.tavg1_2d_lnd_Nx.*.nc4')
    files.sort()
    filepaths = tavg_lnd_filem + files
    write_tavg_flds(tavg_lnd_vars, filepaths, nroll, out_file)

    # instantaneous fields ....
    write_inst_flds(inst_vars, inst_files, nroll, out_file)

    out_file.close()
    return True
Ejemplo n.º 25
0
 def setUp(self):
     do_setup(filename)
     opt = Nio.options()
     opt.MaskedArrayMode = "MaskedIfFillAttAndValue"
     self.f = Nio.open_file(filename, options=opt)
Ejemplo n.º 26
0
    def open_new_file(self, file_name, info=None,
                      var_name='X',
                      long_name=None,
                      units_name='None',
                      dtype='float64',
                      ### dtype='float64'
                      time_units='minutes',
                      comment='',
                      shape=(1,1,1),
                      res=(1.,1.,1.),
                      MAKE_RTI=True, MAKE_BOV=False):
            
        #--------------------------------------------------
        # Try to import the Nio module from PyNIO package
        #--------------------------------------------------
        Nio = self.import_nio ()
        if not Nio:
            return False

        #----------------------------
        # Does file already exist ?
        #----------------------------
        file_name = file_utils.check_overwrite( file_name )
        self.file_name = file_name
        
        #---------------------------------------
        # Check and store the grid information
        #---------------------------------------
        self.format     = 'nccs'
        self.file_name  = file_name
        self.time_index = 0
        self.var_name   = var_name
        self.shape      = shape
        self.res        = res
        
        if (long_name is None):
            long_name = var_name
        self.long_name  = long_name
        self.units_name = units_name
        self.dtype      = dtype

        #-----------------------------------
        # Get Nio type code for this dtype
        #------------------------------------
        nio_type_map  = self.get_nio_type_map()        
        nio_type_code = nio_type_map[ dtype.lower() ]        
        self.nio_type_code = nio_type_code
        
        #-------------------------------------
        # Open a new netCDF file for writing
        #-------------------------------------
        # Sample output from time.asctime():
        #     "Thu Oct  8 17:10:18 2009"
        #-------------------------------------
        opt = Nio.options()
        opt.PreFill = False            # (for efficiency)
        opt.HeaderReserveSpace = 4000  # (4000 bytes, for efficiency)
        history = "Created using PyNIO " + Nio.__version__ + " on "
        history = history + time.asctime() + ". " 
        history = history + comment
        # print 'MADE IT PAST history BLOCK'
        
        try:
            nccs_unit = Nio.open_file (file_name, mode="w",
                                       options=opt, history=history)
            OK = True
        except:
            OK = False
            return OK

        #----------------------------------------------
        # Create grid dimensions nx and ny, plus time
        #----------------------------------------------
        # Without using "int()" here, we get this:
        #     TypeError: size must be None or integer
        #----------------------------------------------
        nccs_unit.create_dimension("nz", self.shape[0])
        nccs_unit.create_dimension("ny", self.shape[1])
        nccs_unit.create_dimension("nx", self.shape[2])
        nccs_unit.create_dimension("time", None)   # (unlimited dimension)
        # print 'MADE IT PAST create_dimension CALLS.'
        
        #-------------------------
        # Create a time variable
        #------------------------------------------
        #('d' = float64; must match in add_cube()
        #------------------------------------------
        tvar = nccs_unit.create_variable ('time', 'd', ("time",))
        nccs_unit.variables['time'].units = time_units
        
        #--------------------------------
        # Create a variable in the file
        #----------------------------------
        # Returns "var" as a PyNIO object
        #----------------------------------
        var = nccs_unit.create_variable (var_name, nio_type_code,
                                         ("time", "nz", "ny", "nx"))

        #----------------------------------
        # Specify a "nodata" fill value ?
        #----------------------------------
        var._FillValue = -9999.0    ## Does this jive with Prefill above ??
        
        #------------------------------------
        # Create attributes of the variable
        #------------------------------------
        nccs_unit.variables[var_name].long_name = long_name
        nccs_unit.variables[var_name].units = units_name
        nccs_unit.variables[var_name].dz = self.res[0]
        nccs_unit.variables[var_name].dy = self.res[1]
        nccs_unit.variables[var_name].dx = self.res[2]
        nccs_unit.variables[var_name].y_south_edge = 0.
        nccs_unit.variables[var_name].y_north_edge = self.res[1]*self.shape[1]
        nccs_unit.variables[var_name].x_west_edge = 0.
        nccs_unit.variables[var_name].x_east_edge = self.res[2]*self.shape[2]
        nccs_unit.variables[var_name].z_bottom_edge = 0.
        nccs_unit.variables[var_name].z_top_edge = self.res[0]*self.shape[0]
        
        self.nccs_unit = nccs_unit
        return OK
Ejemplo n.º 27
0
 def setUp(self):
     # print 'Creating temporary file'
     do_setup(filename)
     opt = Nio.options()
     opt.MaskedArrayMode = "MaskedIfFillAtt"
     self.f = Nio.open_file(filename, options=opt)
Ejemplo n.º 28
0
def main(argv):

    # Get command line stuff and store in a dictionary
    s = 'tag= compset= esize= tslice= res= sumfile= indir= sumfiledir= mach= verbose jsonfile= mpi_enable maxnorm gmonly popens cumul regx= startMon= endMon= fIndex='
    optkeys = s.split()
    try:
        opts, args = getopt.getopt(argv, "h", optkeys)
    except getopt.GetoptError:
        pyEnsLib.EnsSum_usage()
        sys.exit(2)

    # Put command line options in a dictionary - also set defaults
    opts_dict = {}

    # Defaults
    opts_dict['tag'] = 'cesm2_0_beta08'
    opts_dict['compset'] = 'F2000'
    opts_dict['mach'] = 'cheyenne'
    opts_dict['esize'] = 350
    opts_dict['tslice'] = 1
    opts_dict['res'] = 'f19_f19'
    opts_dict['sumfile'] = 'ens.summary.nc'
    opts_dict['indir'] = './'
    opts_dict['sumfiledir'] = './'
    opts_dict['jsonfile'] = 'exclude_empty.json'
    opts_dict['verbose'] = False
    opts_dict['mpi_enable'] = False
    opts_dict['maxnorm'] = False
    opts_dict['gmonly'] = True
    opts_dict['popens'] = False
    opts_dict['cumul'] = False
    opts_dict['regx'] = 'test'
    opts_dict['startMon'] = 1
    opts_dict['endMon'] = 1
    opts_dict['fIndex'] = 151

    # This creates the dictionary of input arguments
    opts_dict = pyEnsLib.getopt_parseconfig(opts, optkeys, 'ES', opts_dict)

    verbose = opts_dict['verbose']

    st = opts_dict['esize']
    esize = int(st)

    if not (opts_dict['tag'] and opts_dict['compset'] and opts_dict['mach']
            or opts_dict['res']):
        print 'Please specify --tag, --compset, --mach and --res options'
        sys.exit()

    # Now find file names in indir
    input_dir = opts_dict['indir']
    # The var list that will be excluded
    ex_varlist = []
    inc_varlist = []

    # Create a mpi simplecomm object
    if opts_dict['mpi_enable']:
        me = simplecomm.create_comm()
    else:
        me = simplecomm.create_comm(not opts_dict['mpi_enable'])

    if me.get_rank() == 0:
        print 'Running pyEnsSum!'

    if me.get_rank() == 0 and (verbose == True):
        print opts_dict
        print 'Ensemble size for summary = ', esize

    exclude = False
    if me.get_rank() == 0:
        if opts_dict['jsonfile']:
            inc_varlist = []
            # Read in the excluded or included var list
            ex_varlist, exclude = pyEnsLib.read_jsonlist(
                opts_dict['jsonfile'], 'ES')
            if exclude == False:
                inc_varlist = ex_varlist
                ex_varlist = []
            # Read in the included var list
            #inc_varlist=pyEnsLib.read_jsonlist(opts_dict['jsonfile'],'ES')

    # Broadcast the excluded var list to each processor
    #if opts_dict['mpi_enable']:
    #   ex_varlist=me.partition(ex_varlist,func=Duplicate(),involved=True)
    # Broadcast the excluded var list to each processor
    if opts_dict['mpi_enable']:
        exclude = me.partition(exclude, func=Duplicate(), involved=True)
        if exclude:
            ex_varlist = me.partition(ex_varlist,
                                      func=Duplicate(),
                                      involved=True)
        else:
            inc_varlist = me.partition(inc_varlist,
                                       func=Duplicate(),
                                       involved=True)

    in_files = []
    if (os.path.exists(input_dir)):
        # Get the list of files
        in_files_temp = os.listdir(input_dir)
        in_files = sorted(in_files_temp)

        # Make sure we have enough
        num_files = len(in_files)
        if me.get_rank() == 0 and (verbose == True):
            print 'Number of files in input directory = ', num_files
        if (num_files < esize):
            if me.get_rank() == 0 and (verbose == True):
                print 'Number of files in input directory (',num_files,\
                 ') is less than specified ensemble size of ', esize
            sys.exit(2)
        if (num_files > esize):
            if me.get_rank() == 0 and (verbose == True):
                print 'NOTE: Number of files in ', input_dir, \
                 'is greater than specified ensemble size of ', esize ,\
                 '\nwill just use the first ',  esize, 'files'
    else:
        if me.get_rank() == 0:
            print 'Input directory: ', input_dir, ' not found'
        sys.exit(2)

    if opts_dict['cumul']:
        if opts_dict['regx']:
            in_files_list = get_cumul_filelist(opts_dict, opts_dict['indir'],
                                               opts_dict['regx'])
        in_files = me.partition(in_files_list,
                                func=EqualLength(),
                                involved=True)
        if me.get_rank() == 0 and (verbose == True):
            print 'in_files=', in_files

    # Open the files in the input directory
    o_files = []
    if me.get_rank() == 0 and opts_dict['verbose']:
        print 'Input files are: '
        print "\n".join(in_files)
        #for i in in_files:
        #    print "in_files =",i
    for onefile in in_files[0:esize]:
        if (os.path.isfile(input_dir + '/' + onefile)):
            o_files.append(Nio.open_file(input_dir + '/' + onefile, "r"))
        else:
            if me.get_rank() == 0:
                print "COULD NOT LOCATE FILE " + input_dir + onefile + "! EXITING...."
            sys.exit()

    # Store dimensions of the input fields
    if me.get_rank() == 0 and (verbose == True):
        print "Getting spatial dimensions"
    nlev = -1
    nilev = -1
    ncol = -1
    nlat = -1
    nlon = -1
    lonkey = ''
    latkey = ''
    # Look at first file and get dims
    input_dims = o_files[0].dimensions
    ndims = len(input_dims)

    for key in input_dims:
        if key == "lev":
            nlev = input_dims["lev"]
        elif key == "ilev":
            nilev = input_dims["ilev"]
        elif key == "ncol":
            ncol = input_dims["ncol"]
        elif (key == "nlon") or (key == "lon"):
            nlon = input_dims[key]
            lonkey = key
        elif (key == "nlat") or (key == "lat"):
            nlat = input_dims[key]
            latkey = key

    if (nlev == -1):
        if me.get_rank() == 0:
            print "COULD NOT LOCATE valid dimension lev => EXITING...."
        sys.exit()

    if ((ncol == -1) and ((nlat == -1) or (nlon == -1))):
        if me.get_rank() == 0:
            print "Need either lat/lon or ncol  => EXITING...."
        sys.exit()

    # Check if this is SE or FV data
    if (ncol != -1):
        is_SE = True
    else:
        is_SE = False

    # Make sure all files have the same dimensions
    if me.get_rank() == 0 and (verbose == True):
        print "Checking dimensions across files...."
        print 'lev = ', nlev
        if (is_SE == True):
            print 'ncol = ', ncol
        else:
            print 'nlat = ', nlat
            print 'nlon = ', nlon

    for count, this_file in enumerate(o_files):
        input_dims = this_file.dimensions
        if (is_SE == True):
            if (nlev != int(input_dims["lev"])
                    or (ncol != int(input_dims["ncol"]))):
                if me.get_rank() == 0:
                    print "Dimension mismatch between ", in_files[
                        0], 'and', in_files[0], '!!!'
                sys.exit()
        else:
            if ( nlev != int(input_dims["lev"]) or ( nlat != int(input_dims[latkey]))\
                  or ( nlon != int(input_dims[lonkey]))):
                if me.get_rank() == 0:
                    print "Dimension mismatch between ", in_files[
                        0], 'and', in_files[0], '!!!'
                sys.exit()

    # Get 2d vars, 3d vars and all vars (For now include all variables)
    vars_dict_all = o_files[0].variables
    # Remove the excluded variables (specified in json file) from variable dictionary
    #print len(vars_dict_all)
    if exclude:
        vars_dict = vars_dict_all
        for i in ex_varlist:
            if i in vars_dict:
                del vars_dict[i]
    #Given an included var list, remove all float var that are not on the list
    else:
        vars_dict = vars_dict_all.copy()
        for k, v in vars_dict_all.iteritems():
            if (k not in inc_varlist) and (vars_dict_all[k].typecode() == 'f'):
                #print vars_dict_all[k].typecode()
                #print k
                del vars_dict[k]

    num_vars = len(vars_dict)
    #print num_vars
    #if me.get_rank() == 0:
    #   for k,v in vars_dict.iteritems():
    #       print 'vars_dict',k,vars_dict[k].typecode()

    str_size = 0

    d2_var_names = []
    d3_var_names = []
    num_2d = 0
    num_3d = 0

    # Which are 2d, which are 3d and max str_size
    for k, v in vars_dict.iteritems():
        var = k
        vd = v.dimensions  # all the variable's dimensions (names)
        vr = v.rank  # num dimension
        vs = v.shape  # dim values
        is_2d = False
        is_3d = False
        if (is_SE == True):  # (time, lev, ncol) or (time, ncol)
            if ((vr == 2) and (vs[1] == ncol)):
                is_2d = True
                num_2d += 1
            elif ((vr == 3) and (vs[2] == ncol and vs[1] == nlev)):
                is_3d = True
                num_3d += 1
        else:  # (time, lev, nlon, nlon) or (time, nlat, nlon)
            if ((vr == 3) and (vs[1] == nlat and vs[2] == nlon)):
                is_2d = True
                num_2d += 1
            elif ((vr == 4) and (vs[2] == nlat and vs[3] == nlon and
                                 (vs[1] == nlev or vs[1] == nilev))):
                is_3d = True
                num_3d += 1

        if (is_3d == True):
            str_size = max(str_size, len(k))
            d3_var_names.append(k)
        elif (is_2d == True):
            str_size = max(str_size, len(k))
            d2_var_names.append(k)
        #else:
        #    print 'var=',k

    if me.get_rank() == 0 and (verbose == True):
        print 'Number of variables found:  ', num_3d + num_2d
        print '3D variables: ' + str(num_3d) + ', 2D variables: ' + str(num_2d)

    # Now sort these and combine (this sorts caps first, then lower case -
    # which is what we want)
    d2_var_names.sort()
    d3_var_names.sort()

    if esize < num_2d + num_3d:
        if me.get_rank() == 0:
            print "************************************************************************************************************************************"
            print "  Error: the total number of 3D and 2D variables " + str(
                num_2d + num_3d
            ) + " is larger than the number of ensemble files " + str(esize)
            print "  Cannot generate ensemble summary file, please remove more variables from your included variable list,"
            print "  or add more varaibles in your excluded variable list!!!"
            print "************************************************************************************************************************************"
        sys.exit()
    # All vars is 3d vars first (sorted), the 2d vars
    all_var_names = list(d3_var_names)
    all_var_names += d2_var_names
    n_all_var_names = len(all_var_names)

    #if me.get_rank() == 0 and (verbose == True):
    #    print 'num vars = ', n_all_var_names, '(3d = ', num_3d, ' and 2d = ', num_2d, ")"

    # Create new summary ensemble file
    this_sumfile = opts_dict["sumfile"]

    if me.get_rank() == 0 and (verbose == True):
        print "Creating ", this_sumfile, "  ..."
    if (me.get_rank() == 0 | opts_dict["popens"]):
        if os.path.exists(this_sumfile):
            os.unlink(this_sumfile)

        opt = Nio.options()
        opt.PreFill = False
        opt.Format = 'NetCDF4Classic'
        nc_sumfile = Nio.open_file(this_sumfile, 'w', options=opt)

        # Set dimensions
        if me.get_rank() == 0 and (verbose == True):
            print "Setting dimensions ....."
        if (is_SE == True):
            nc_sumfile.create_dimension('ncol', ncol)
        else:
            nc_sumfile.create_dimension('nlat', nlat)
            nc_sumfile.create_dimension('nlon', nlon)
        nc_sumfile.create_dimension('nlev', nlev)
        nc_sumfile.create_dimension('ens_size', esize)
        nc_sumfile.create_dimension('nvars', num_3d + num_2d)
        nc_sumfile.create_dimension('nvars3d', num_3d)
        nc_sumfile.create_dimension('nvars2d', num_2d)
        nc_sumfile.create_dimension('str_size', str_size)

        # Set global attributes
        now = time.strftime("%c")
        if me.get_rank() == 0 and (verbose == True):
            print "Setting global attributes ....."
        setattr(nc_sumfile, 'creation_date', now)
        setattr(nc_sumfile, 'title', 'CAM verification ensemble summary file')
        setattr(nc_sumfile, 'tag', opts_dict["tag"])
        setattr(nc_sumfile, 'compset', opts_dict["compset"])
        setattr(nc_sumfile, 'resolution', opts_dict["res"])
        setattr(nc_sumfile, 'machine', opts_dict["mach"])

        # Create variables
        if me.get_rank() == 0 and (verbose == True):
            print "Creating variables ....."
        v_lev = nc_sumfile.create_variable("lev", 'f', ('nlev', ))
        v_vars = nc_sumfile.create_variable("vars", 'S1',
                                            ('nvars', 'str_size'))
        v_var3d = nc_sumfile.create_variable("var3d", 'S1',
                                             ('nvars3d', 'str_size'))
        v_var2d = nc_sumfile.create_variable("var2d", 'S1',
                                             ('nvars2d', 'str_size'))
        if not opts_dict['gmonly']:
            if (is_SE == True):
                v_ens_avg3d = nc_sumfile.create_variable(
                    "ens_avg3d", 'f', ('nvars3d', 'nlev', 'ncol'))
                v_ens_stddev3d = nc_sumfile.create_variable(
                    "ens_stddev3d", 'f', ('nvars3d', 'nlev', 'ncol'))
                v_ens_avg2d = nc_sumfile.create_variable(
                    "ens_avg2d", 'f', ('nvars2d', 'ncol'))
                v_ens_stddev2d = nc_sumfile.create_variable(
                    "ens_stddev2d", 'f', ('nvars2d', 'ncol'))
            else:
                v_ens_avg3d = nc_sumfile.create_variable(
                    "ens_avg3d", 'f', ('nvars3d', 'nlev', 'nlat', 'nlon'))
                v_ens_stddev3d = nc_sumfile.create_variable(
                    "ens_stddev3d", 'f', ('nvars3d', 'nlev', 'nlat', 'nlon'))
                v_ens_avg2d = nc_sumfile.create_variable(
                    "ens_avg2d", 'f', ('nvars2d', 'nlat', 'nlon'))
                v_ens_stddev2d = nc_sumfile.create_variable(
                    "ens_stddev2d", 'f', ('nvars2d', 'nlat', 'nlon'))

            v_RMSZ = nc_sumfile.create_variable("RMSZ", 'f',
                                                ('nvars', 'ens_size'))
        v_gm = nc_sumfile.create_variable("global_mean", 'f',
                                          ('nvars', 'ens_size'))
        v_standardized_gm = nc_sumfile.create_variable("standardized_gm", 'f',
                                                       ('nvars', 'ens_size'))
        v_loadings_gm = nc_sumfile.create_variable('loadings_gm', 'f',
                                                   ('nvars', 'nvars'))
        v_mu_gm = nc_sumfile.create_variable('mu_gm', 'f', ('nvars', ))
        v_sigma_gm = nc_sumfile.create_variable('sigma_gm', 'f', ('nvars', ))
        v_sigma_scores_gm = nc_sumfile.create_variable('sigma_scores_gm', 'f',
                                                       ('nvars', ))

        # Assign vars, var3d and var2d
        if me.get_rank() == 0 and (verbose == True):
            print "Assigning vars, var3d, and var2d ....."

        eq_all_var_names = []
        eq_d3_var_names = []
        eq_d2_var_names = []

        l_eq = len(all_var_names)
        for i in range(l_eq):
            tt = list(all_var_names[i])
            l_tt = len(tt)
            if (l_tt < str_size):
                extra = list(' ') * (str_size - l_tt)
                tt.extend(extra)
            eq_all_var_names.append(tt)

        l_eq = len(d3_var_names)
        for i in range(l_eq):
            tt = list(d3_var_names[i])
            l_tt = len(tt)
            if (l_tt < str_size):
                extra = list(' ') * (str_size - l_tt)
                tt.extend(extra)
            eq_d3_var_names.append(tt)

        l_eq = len(d2_var_names)
        for i in range(l_eq):
            tt = list(d2_var_names[i])
            l_tt = len(tt)
            if (l_tt < str_size):
                extra = list(' ') * (str_size - l_tt)
                tt.extend(extra)
            eq_d2_var_names.append(tt)

        v_vars[:] = eq_all_var_names[:]
        v_var3d[:] = eq_d3_var_names[:]
        v_var2d[:] = eq_d2_var_names[:]

        # Time-invarient metadata
        if me.get_rank() == 0 and (verbose == True):
            print "Assigning time invariant metadata ....."
        lev_data = vars_dict["lev"]
        v_lev = lev_data

    # Form ensembles, each missing one member; compute RMSZs and global means
    #for each variable, we also do max norm also (currently done in pyStats)
    tslice = opts_dict['tslice']

    if not opts_dict['cumul']:
        # Partition the var list

        var3_list_loc = me.partition(d3_var_names,
                                     func=EqualStride(),
                                     involved=True)
        var2_list_loc = me.partition(d2_var_names,
                                     func=EqualStride(),
                                     involved=True)
    else:
        var3_list_loc = d3_var_names
        var2_list_loc = d2_var_names

    # Calculate global means #
    if me.get_rank() == 0 and (verbose == True):
        print "Calculating global means ....."
    if not opts_dict['cumul']:
        gm3d, gm2d, var_list = pyEnsLib.generate_global_mean_for_summary(
            o_files, var3_list_loc, var2_list_loc, is_SE, False, opts_dict)
    if me.get_rank() == 0 and (verbose == True):
        print "Finish calculating global means ....."

    # Calculate RMSZ scores
    if (not opts_dict['gmonly']) | (opts_dict['cumul']):
        if me.get_rank() == 0 and (verbose == True):
            print "Calculating RMSZ scores ....."
        zscore3d, zscore2d, ens_avg3d, ens_stddev3d, ens_avg2d, ens_stddev2d, temp1, temp2 = pyEnsLib.calc_rmsz(
            o_files, var3_list_loc, var2_list_loc, is_SE, opts_dict)

    # Calculate max norm ensemble
    if opts_dict['maxnorm']:
        if me.get_rank() == 0 and (verbose == True):
            print "Calculating max norm of ensembles ....."
        pyEnsLib.calculate_maxnormens(opts_dict, var3_list_loc)
        pyEnsLib.calculate_maxnormens(opts_dict, var2_list_loc)

    if opts_dict['mpi_enable'] & (not opts_dict['popens']):

        if not opts_dict['cumul']:
            # Gather the 3d variable results from all processors to the master processor
            slice_index = get_stride_list(len(d3_var_names), me)

            # Gather global means 3d results
            gm3d = gather_npArray(gm3d, me, slice_index,
                                  (len(d3_var_names), len(o_files)))
            if not opts_dict['gmonly']:
                # Gather zscore3d results
                zscore3d = gather_npArray(zscore3d, me, slice_index,
                                          (len(d3_var_names), len(o_files)))

                # Gather ens_avg3d and ens_stddev3d results
                shape_tuple3d = get_shape(ens_avg3d.shape, len(d3_var_names),
                                          me.get_rank())
                ens_avg3d = gather_npArray(ens_avg3d, me, slice_index,
                                           shape_tuple3d)
                ens_stddev3d = gather_npArray(ens_stddev3d, me, slice_index,
                                              shape_tuple3d)

            # Gather 2d variable results from all processors to the master processor
            slice_index = get_stride_list(len(d2_var_names), me)

            # Gather global means 2d results
            gm2d = gather_npArray(gm2d, me, slice_index,
                                  (len(d2_var_names), len(o_files)))

            var_list = gather_list(var_list, me)

            if not opts_dict['gmonly']:
                # Gather zscore2d results
                zscore2d = gather_npArray(zscore2d, me, slice_index,
                                          (len(d2_var_names), len(o_files)))

                # Gather ens_avg3d and ens_stddev2d results
                shape_tuple2d = get_shape(ens_avg2d.shape, len(d2_var_names),
                                          me.get_rank())
                ens_avg2d = gather_npArray(ens_avg2d, me, slice_index,
                                           shape_tuple2d)
                ens_stddev2d = gather_npArray(ens_stddev2d, me, slice_index,
                                              shape_tuple2d)

        else:
            gmall = np.concatenate((temp1, temp2), axis=0)
            gmall = pyEnsLib.gather_npArray_pop(
                gmall, me,
                (me.get_size(), len(d3_var_names) + len(d2_var_names)))
    # Assign to file:
    if me.get_rank() == 0 | opts_dict['popens']:
        if not opts_dict['cumul']:
            gmall = np.concatenate((gm3d, gm2d), axis=0)
            if not opts_dict['gmonly']:
                Zscoreall = np.concatenate((zscore3d, zscore2d), axis=0)
                v_RMSZ[:, :] = Zscoreall[:, :]
            if not opts_dict['gmonly']:
                if (is_SE == True):
                    v_ens_avg3d[:, :, :] = ens_avg3d[:, :, :]
                    v_ens_stddev3d[:, :, :] = ens_stddev3d[:, :, :]
                    v_ens_avg2d[:, :] = ens_avg2d[:, :]
                    v_ens_stddev2d[:, :] = ens_stddev2d[:, :]
                else:
                    v_ens_avg3d[:, :, :, :] = ens_avg3d[:, :, :, :]
                    v_ens_stddev3d[:, :, :, :] = ens_stddev3d[:, :, :, :]
                    v_ens_avg2d[:, :, :] = ens_avg2d[:, :, :]
                    v_ens_stddev2d[:, :, :] = ens_stddev2d[:, :, :]
        else:
            gmall_temp = np.transpose(gmall[:, :])
            gmall = gmall_temp
        mu_gm, sigma_gm, standardized_global_mean, loadings_gm, scores_gm = pyEnsLib.pre_PCA(
            gmall, all_var_names, var_list, me)
        v_gm[:, :] = gmall[:, :]
        v_standardized_gm[:, :] = standardized_global_mean[:, :]
        v_mu_gm[:] = mu_gm[:]
        v_sigma_gm[:] = sigma_gm[:].astype(np.float32)
        v_loadings_gm[:, :] = loadings_gm[:, :]
        v_sigma_scores_gm[:] = scores_gm[:]

        if me.get_rank() == 0:
            print "All Done"
Ejemplo n.º 29
0
def main(argv):
    print 'Running pyEnsSumPop!'

    # Get command line stuff and store in a dictionary
    s = 'nyear= nmonth= npert= tag= res= mach= compset= sumfile= indir= tslice= verbose jsonfile= mpi_enable zscoreonly nrand= rand seq= jsondir='
    optkeys = s.split()
    try: 
        opts, args = getopt.getopt(argv, "h", optkeys)
    except getopt.GetoptError:
        pyEnsLib.EnsSumPop_usage()
        sys.exit(2)

    # Put command line options in a dictionary - also set defaults
    opts_dict={}

    # Defaults
    opts_dict['tag'] = 'cesm1_2_0'
    opts_dict['compset'] = 'FC5'
    opts_dict['mach'] = 'yellowstone'
    opts_dict['tslice'] = 0 
    opts_dict['nyear'] = 3
    opts_dict['nmonth'] = 12
    opts_dict['npert'] = 40
    opts_dict['nbin'] = 40
    opts_dict['minrange'] = 0.0
    opts_dict['maxrange'] = 4.0
    opts_dict['res'] = 'ne30_ne30'
    opts_dict['sumfile'] = 'ens.pop.summary.nc'
    opts_dict['indir'] = './'
    opts_dict['jsonfile'] = ''
    opts_dict['verbose'] = True
    opts_dict['mpi_enable'] = False
    opts_dict['zscoreonly'] = False
    opts_dict['popens'] = True
    opts_dict['nrand'] = 40 
    opts_dict['rand'] = False
    opts_dict['seq'] = 0 
    opts_dict['jsondir'] = '/glade/scratch/haiyingx/' 

    # This creates the dictionary of input arguments 
    print "before parseconfig"
    opts_dict = pyEnsLib.getopt_parseconfig(opts,optkeys,'ESP',opts_dict)

    verbose = opts_dict['verbose']
    nbin = opts_dict['nbin']

    if verbose:
       print opts_dict
       
    # Now find file names in indir
    input_dir = opts_dict['indir']

    # Create a mpi simplecomm object
    if opts_dict['mpi_enable']:
        me=simplecomm.create_comm()
    else:
        me=simplecomm.create_comm(not opts_dict['mpi_enable'])
    if opts_dict['jsonfile']:
        # Read in the included var list
        Var2d,Var3d=pyEnsLib.read_jsonlist(opts_dict['jsonfile'],'ESP')
        str_size=0
        for str in Var3d:
            if str_size < len(str):
               str_size=len(str)
        for str in Var2d:
            if str_size < len(str):
               str_size=len(str)


    in_files=[]
    if(os.path.exists(input_dir)):
        # Pick up the 'nrand' random number of input files to generate summary files
        if opts_dict['rand']:
           in_files=pyEnsLib.Random_pickup_pop(input_dir,opts_dict,opts_dict['nrand'])
        else:    
           # Get the list of files
           in_files_temp = os.listdir(input_dir)
           in_files=sorted(in_files_temp)
        # Make sure we have enough
        num_files = len(in_files)
    else:
        print 'Input directory: ',input_dir,' not found'
        sys.exit(2)

    # Create a mpi simplecomm object
    if opts_dict['mpi_enable']:
        me=simplecomm.create_comm()
    else:
        me=simplecomm.create_comm(not opts_dict['mpi_enable'])
    #Partition the input file list 
    in_file_list=me.partition(in_files,func=EqualStride(),involved=True)

    
    # Open the files in the input directory
    o_files=[]
    for onefile in in_file_list:
        if (os.path.isfile(input_dir+'/' + onefile)):
            o_files.append(Nio.open_file(input_dir+'/' + onefile,"r"))
        else:
            print "COULD NOT LOCATE FILE "+ input_dir + onefile + "! EXITING...."
            sys.exit() 


    print in_file_list

    # Store dimensions of the input fields
    if (verbose == True):
        print "Getting spatial dimensions"
    nlev = -1
    nlat = -1
    nlon = -1

    # Look at first file and get dims
    input_dims = o_files[0].dimensions
    ndims = len(input_dims)

    # Make sure all files have the same dimensions
    for key in input_dims:
        if key == "z_t":
            nlev = input_dims["z_t"]
        elif key == "nlon":
            nlon = input_dims["nlon"]
        elif key == "nlat":
            nlat = input_dims["nlat"]

    for count, this_file in enumerate(o_files):
        input_dims = this_file.dimensions     
	if ( nlev != int(input_dims["z_t"]) or ( nlat != int(input_dims["nlat"]))\
	      or ( nlon != int(input_dims["nlon"]))):
	    print "Dimension mismatch between ", in_file_list[0], 'and', in_file_list[count], '!!!'
	    sys.exit() 


    # Create new summary ensemble file
    this_sumfile = opts_dict["sumfile"]

    if verbose:
       print "Creating ", this_sumfile, "  ..."
    if (me.get_rank() == 0 ):
       if os.path.exists(this_sumfile):
           os.unlink(this_sumfile)
       opt =Nio.options()
       opt.PreFill = False
       opt.Format = 'NetCDF4Classic'

       nc_sumfile = Nio.open_file(this_sumfile, 'w', options=opt)

       # Set dimensions
       if (verbose == True):
	   print "Setting dimensions ....."
       nc_sumfile.create_dimension('nlat', nlat)
       nc_sumfile.create_dimension('nlon', nlon)
       nc_sumfile.create_dimension('nlev', nlev)
       nc_sumfile.create_dimension('time',None)
       nc_sumfile.create_dimension('ens_size', opts_dict['npert'])
       nc_sumfile.create_dimension('nbin', opts_dict['nbin'])
       nc_sumfile.create_dimension('nvars', len(Var3d) + len(Var2d))
       nc_sumfile.create_dimension('nvars3d', len(Var3d))
       nc_sumfile.create_dimension('nvars2d', len(Var2d))
       nc_sumfile.create_dimension('str_size', str_size)

       # Set global attributes
       now = time.strftime("%c")
       if (verbose == True):
	   print "Setting global attributes ....."
       setattr(nc_sumfile, 'creation_date',now)
       setattr(nc_sumfile, 'title', 'POP verification ensemble summary file')
       setattr(nc_sumfile, 'tag', opts_dict["tag"]) 
       setattr(nc_sumfile, 'compset', opts_dict["compset"]) 
       setattr(nc_sumfile, 'resolution', opts_dict["res"]) 
       setattr(nc_sumfile, 'machine', opts_dict["mach"]) 

       # Create variables
       if (verbose == True):
	   print "Creating variables ....."
       v_lev = nc_sumfile.create_variable("lev", 'f', ('nlev',))
       v_vars = nc_sumfile.create_variable("vars", 'S1', ('nvars', 'str_size'))
       v_var3d = nc_sumfile.create_variable("var3d", 'S1', ('nvars3d', 'str_size'))
       v_var2d = nc_sumfile.create_variable("var2d", 'S1', ('nvars2d', 'str_size'))
       v_time = nc_sumfile.create_variable("time",'d',('time',))
       v_ens_avg3d = nc_sumfile.create_variable("ens_avg3d", 'f', ('time','nvars3d', 'nlev', 'nlat', 'nlon'))
       v_ens_stddev3d = nc_sumfile.create_variable("ens_stddev3d", 'f', ('time','nvars3d', 'nlev', 'nlat', 'nlon'))
       v_ens_avg2d = nc_sumfile.create_variable("ens_avg2d", 'f', ('time','nvars2d', 'nlat', 'nlon'))
       v_ens_stddev2d = nc_sumfile.create_variable("ens_stddev2d", 'f', ('time','nvars2d', 'nlat', 'nlon'))

       v_RMSZ = nc_sumfile.create_variable("RMSZ", 'f', ('time','nvars', 'ens_size','nbin'))
       if not opts_dict['zscoreonly']:
          v_gm = nc_sumfile.create_variable("global_mean", 'f', ('time','nvars', 'ens_size'))


       # Assign vars, var3d and var2d
       if (verbose == True):
	   print "Assigning vars, var3d, and var2d ....."

       eq_all_var_names =[]
       eq_d3_var_names = []
       eq_d2_var_names = []
       all_var_names = list(Var3d)
       all_var_names += Var2d
       l_eq = len(all_var_names)
       for i in range(l_eq):
	   tt = list(all_var_names[i])
	   l_tt = len(tt)
	   if (l_tt < str_size):
	       extra = list(' ')*(str_size - l_tt)
	       tt.extend(extra)
	   eq_all_var_names.append(tt)

       l_eq = len(Var3d)
       for i in range(l_eq):
	   tt = list(Var3d[i])
	   l_tt = len(tt)
	   if (l_tt < str_size):
	       extra = list(' ')*(str_size - l_tt)
	       tt.extend(extra)
	   eq_d3_var_names.append(tt)

       l_eq = len(Var2d)
       for i in range(l_eq):
	   tt = list(Var2d[i])
	   l_tt = len(tt)
	   if (l_tt < str_size):
	       extra = list(' ')*(str_size - l_tt)
	       tt.extend(extra)
	   eq_d2_var_names.append(tt)

       v_vars[:] = eq_all_var_names[:]
       v_var3d[:] = eq_d3_var_names[:]
       v_var2d[:] = eq_d2_var_names[:]

       # Time-invarient metadata
       if (verbose == True):
	   print "Assigning time invariant metadata ....."
       vars_dict = o_files[0].variables
       lev_data = vars_dict["z_t"]
       v_lev = lev_data
       
    # Time-varient metadata
    if verbose:
       print "Assigning time variant metadata ....."
    vars_dict = o_files[0].variables
    time_value = vars_dict['time']
    time_array = np.array([time_value])
    time_array = pyEnsLib.gather_npArray_pop(time_array,me,(me.get_size(),))
    if me.get_rank() == 0:
       v_time[:]=time_array[:]

    # Calculate global mean, average, standard deviation 
    if verbose:
       print "Calculating global means ....."
    is_SE = False
    tslice=0
    if not opts_dict['zscoreonly']:
       gm3d,gm2d = pyEnsLib.generate_global_mean_for_summary(o_files,Var3d,Var2d, is_SE,False,opts_dict)
    if verbose:
       print "Finish calculating global means ....."

    # Calculate RMSZ scores  
    if (verbose == True):
       print "Calculating RMSZ scores ....."
    zscore3d,zscore2d,ens_avg3d,ens_stddev3d,ens_avg2d,ens_stddev2d,temp1,temp2=pyEnsLib.calc_rmsz(o_files,Var3d,Var2d,is_SE,opts_dict)    

    # Collect from all processors
    if opts_dict['mpi_enable'] :
	# Gather the 3d variable results from all processors to the master processor
	# Gather global means 3d results
        if not opts_dict['zscoreonly']:
           gmall=np.concatenate((gm3d,gm2d),axis=0)
           #print "before gather, gmall.shape=",gmall.shape
	   gmall=pyEnsLib.gather_npArray_pop(gmall,me,(me.get_size(),len(Var3d)+len(Var2d),len(o_files)))
        zmall=np.concatenate((zscore3d,zscore2d),axis=0)
        zmall=pyEnsLib.gather_npArray_pop(zmall,me,(me.get_size(),len(Var3d)+len(Var2d),len(o_files),nbin))
        #print 'zmall=',zmall
        
        #print "after gather, gmall.shape=",gmall.shape
        ens_avg3d=pyEnsLib.gather_npArray_pop(ens_avg3d,me,(me.get_size(),len(Var3d),nlev,(nlat),nlon))
        ens_avg2d=pyEnsLib.gather_npArray_pop(ens_avg2d,me,(me.get_size(),len(Var2d),(nlat),nlon))
        ens_stddev3d=pyEnsLib.gather_npArray_pop(ens_stddev3d,me,(me.get_size(),len(Var3d),nlev,(nlat),nlon))
        ens_stddev2d=pyEnsLib.gather_npArray_pop(ens_stddev2d,me,(me.get_size(),len(Var2d),(nlat),nlon))

    # Assign to file:
    if me.get_rank() == 0 :
	#Zscoreall=np.concatenate((zscore3d,zscore2d),axis=0)
	v_RMSZ[:,:,:,:]=zmall[:,:,:,:]
	v_ens_avg3d[:,:,:,:,:]=ens_avg3d[:,:,:,:,:]
	v_ens_stddev3d[:,:,:,:,:]=ens_stddev3d[:,:,:,:,:]
	v_ens_avg2d[:,:,:,:]=ens_avg2d[:,:,:,:]
	v_ens_stddev2d[:,:,:,:]=ens_stddev2d[:,:,:,:]
	if not opts_dict['zscoreonly']:
	   v_gm[:,:,:]=gmall[:,:,:]
        print "All done"
Ejemplo n.º 30
0
    def __init__(self, spec, serial=False, verbosity=1, once=False):
        '''
        Constructor

        @param spec  An instance of the Specifier class, defining the
                          input specification for this reshaper operation.

        @param serial     True or False, indicating whether the operation
                          should be performed in serial (True) or parallel
                          (False).  The default is to assume parallel operation
                          (but serial will be chosen if the mpi4py cannot be
                          found when trying to initialize decomposition.

        @param verbosity  Level of printed output (stdout).  A value of 0 means
                          no output, and a higher value means more output.  The
                          default value is 1.

        @param once       True or False, indicating whether the Reshaper should
                          write all metadata to a 'once' file (separately).
        '''
        # Type checking (or double-checking)
        if (not isinstance(spec, Slice2SeriesSpecifier)):
            err_msg = "Slice2SeriesReshaper requires a Slice2SeriesSpecifier" \
                    + " as input."
            raise TypeError(err_msg)

        # Call the base-class constructor
        super(Slice2SeriesReshaper, self).__init__(spec,
                                                   serial=serial,
                                                   verbosity=verbosity,
                                                   once=once)

        # Setup PyNIO options (including disabling the default PreFill option)
        opt = Nio.options()
        opt.PreFill = False

        # Determine the Format and CompressionLevel options
        # from the NetCDF format string in the Specifier
        if (self._specifier.netcdf_format == 'netcdf'):
            opt.Format = 'Classic'
        elif (self._specifier.netcdf_format == 'netcdf4'):
            opt.Format = 'NetCDF4Classic'
            opt.CompressionLevel = 0
        elif (self._specifier.netcdf_format == 'netcdf4c'):
            opt.Format = 'NetCDF4Classic'
            opt.CompressionLevel = 1
        self._nio_options = opt
        self._messenger.print_once('PyNIO options set', vlevel=2)

        # Open all of the input files
        self._timer.start('Open Input Files')
        self._input_files = []
        for filename in self._specifier.input_file_list:
            self._input_files.append(Nio.open_file(filename, "r"))
        self._timer.stop('Open Input Files')
        self._messenger.print_once('Input files opened', vlevel=2)

        # Validate the input files themselves
        self._timer.start('Input File Validation')
        self._validate_input_files()
        self._timer.stop('Input File Validation')
        self._messenger.print_once('Input files validated', vlevel=2)

        # Sort the input files by time
        self._timer.start('Sort Input Files')
        self._sort_input_files_by_time()
        self._timer.stop('Sort Input Files')
        self._messenger.print_once('Input files sorted', vlevel=2)

        # Retrieve and sort the variables in each time-slice file
        # (To determine if it is time-invariant metadata, time-variant
        # metadata, or if it is a time-series variable)
        self._timer.start('Sort Variables')
        self._sort_variables()
        self._timer.stop('Sort Variables')
        self._messenger.print_once('Variables sorted', vlevel=2)

        # Helpful debugging message
        self._messenger.print_once('Reshaper initialized.', vlevel=1)

        # Sync before continuing..
        self._messenger.sync()
Ejemplo n.º 31
0
    def setUp(self):
        do_setup(filename)
        opt = Nio.options()
	opt.UseAxisAttribute = True
        self.f = Nio.open_file(filename, options = opt)
Ejemplo n.º 32
0
    def __init__(self, spec, serial=False, verbosity=1, once=False):
        '''
        Constructor

        @param spec  An instance of the Specifier class, defining the
                          input specification for this reshaper operation.

        @param serial     True or False, indicating whether the operation
                          should be performed in serial (True) or parallel
                          (False).  The default is to assume parallel operation
                          (but serial will be chosen if the mpi4py cannot be
                          found when trying to initialize decomposition.

        @param verbosity  Level of printed output (stdout).  A value of 0 means
                          no output, and a higher value means more output.  The
                          default value is 1.

        @param once       True or False, indicating whether the Reshaper should
                          write all metadata to a 'once' file (separately).
        '''
        # Type checking (or double-checking)
        if (not isinstance(spec, Slice2SeriesSpecifier)):
            err_msg = "Slice2SeriesReshaper requires a Slice2SeriesSpecifier" \
                    + " as input."
            raise TypeError(err_msg)

        # Call the base-class constructor
        super(Slice2SeriesReshaper, self).__init__(spec,
                                                   serial=serial,
                                                   verbosity=verbosity,
                                                   once=once)

        # Setup PyNIO options (including disabling the default PreFill option)
        opt = Nio.options()
        opt.PreFill = False

        # Determine the Format and CompressionLevel options
        # from the NetCDF format string in the Specifier
        if (self._specifier.netcdf_format == 'netcdf'):
            opt.Format = 'Classic'
        elif (self._specifier.netcdf_format == 'netcdf4'):
            opt.Format = 'NetCDF4Classic'
            opt.CompressionLevel = 0
        elif (self._specifier.netcdf_format == 'netcdf4c'):
            opt.Format = 'NetCDF4Classic'
            opt.CompressionLevel = 1
        self._nio_options = opt
        self._messenger.print_once('PyNIO options set', vlevel=2)

        # Open all of the input files
        self._timer.start('Open Input Files')
        self._input_files = []
        for filename in self._specifier.input_file_list:
            self._input_files.append(Nio.open_file(filename, "r"))
        self._timer.stop('Open Input Files')
        self._messenger.print_once('Input files opened', vlevel=2)

        # Validate the input files themselves
        self._timer.start('Input File Validation')
        self._validate_input_files()
        self._timer.stop('Input File Validation')
        self._messenger.print_once('Input files validated', vlevel=2)

        # Sort the input files by time
        self._timer.start('Sort Input Files')
        self._sort_input_files_by_time()
        self._timer.stop('Sort Input Files')
        self._messenger.print_once('Input files sorted', vlevel=2)

        # Retrieve and sort the variables in each time-slice file
        # (To determine if it is time-invariant metadata, time-variant
        # metadata, or if it is a time-series variable)
        self._timer.start('Sort Variables')
        self._sort_variables()
        self._timer.stop('Sort Variables')
        self._messenger.print_once('Variables sorted', vlevel=2)

        # Helpful debugging message
        self._messenger.print_once('Reshaper initialized.', vlevel=1)

        # Sync before continuing..
        self._messenger.sync()
Ejemplo n.º 33
0
    def open_new_file(
            self,
            file_name,
            info=None,
            var_name='X',
            long_name=None,
            units_name='None',
            dtype='float64',
            ### dtype='float64'
            time_units='minutes',
            comment='',
            shape=(1, 1, 1),
            res=(1., 1., 1.),
            MAKE_RTI=True,
            MAKE_BOV=False):

        #--------------------------------------------------
        # Try to import the Nio module from PyNIO package
        #--------------------------------------------------
        Nio = self.import_nio()
        if not Nio:
            return False

        #----------------------------
        # Does file already exist ?
        #----------------------------
        file_name = file_utils.check_overwrite(file_name)
        self.file_name = file_name

        #---------------------------------------
        # Check and store the grid information
        #---------------------------------------
        self.format = 'nccs'
        self.file_name = file_name
        self.time_index = 0
        self.var_name = var_name
        self.shape = shape
        self.res = res

        if (long_name is None):
            long_name = var_name
        self.long_name = long_name
        self.units_name = units_name
        self.dtype = dtype

        #-----------------------------------
        # Get Nio type code for this dtype
        #------------------------------------
        nio_type_map = self.get_nio_type_map()
        nio_type_code = nio_type_map[dtype.lower()]
        self.nio_type_code = nio_type_code

        #-------------------------------------
        # Open a new netCDF file for writing
        #-------------------------------------
        # Sample output from time.asctime():
        #     "Thu Oct  8 17:10:18 2009"
        #-------------------------------------
        opt = Nio.options()
        opt.PreFill = False  # (for efficiency)
        opt.HeaderReserveSpace = 4000  # (4000 bytes, for efficiency)
        history = "Created using PyNIO " + Nio.__version__ + " on "
        history = history + time.asctime() + ". "
        history = history + comment
        # print 'MADE IT PAST history BLOCK'

        try:
            nccs_unit = Nio.open_file(file_name,
                                      mode="w",
                                      options=opt,
                                      history=history)
            OK = True
        except:
            OK = False
            return OK

        #----------------------------------------------
        # Create grid dimensions nx and ny, plus time
        #----------------------------------------------
        # Without using "int()" here, we get this:
        #     TypeError: size must be None or integer
        #----------------------------------------------
        nccs_unit.create_dimension("nz", self.shape[0])
        nccs_unit.create_dimension("ny", self.shape[1])
        nccs_unit.create_dimension("nx", self.shape[2])
        nccs_unit.create_dimension("time", None)  # (unlimited dimension)
        # print 'MADE IT PAST create_dimension CALLS.'

        #-------------------------
        # Create a time variable
        #------------------------------------------
        #('d' = float64; must match in add_cube()
        #------------------------------------------
        tvar = nccs_unit.create_variable('time', 'd', ("time", ))
        nccs_unit.variables['time'].units = time_units

        #--------------------------------
        # Create a variable in the file
        #----------------------------------
        # Returns "var" as a PyNIO object
        #----------------------------------
        var = nccs_unit.create_variable(var_name, nio_type_code,
                                        ("time", "nz", "ny", "nx"))

        #----------------------------------
        # Specify a "nodata" fill value ?
        #----------------------------------
        var._FillValue = -9999.0  ## Does this jive with Prefill above ??

        #------------------------------------
        # Create attributes of the variable
        #------------------------------------
        nccs_unit.variables[var_name].long_name = long_name
        nccs_unit.variables[var_name].units = units_name
        nccs_unit.variables[var_name].dz = self.res[0]
        nccs_unit.variables[var_name].dy = self.res[1]
        nccs_unit.variables[var_name].dx = self.res[2]
        nccs_unit.variables[var_name].y_south_edge = 0.
        nccs_unit.variables[
            var_name].y_north_edge = self.res[1] * self.shape[1]
        nccs_unit.variables[var_name].x_west_edge = 0.
        nccs_unit.variables[var_name].x_east_edge = self.res[2] * self.shape[2]
        nccs_unit.variables[var_name].z_bottom_edge = 0.
        nccs_unit.variables[var_name].z_top_edge = self.res[0] * self.shape[0]

        self.nccs_unit = nccs_unit
        return OK
Ejemplo n.º 34
0
    def open_new_file(self, file_name, info=None,
                      var_name='X',
                      long_name=None,
                      units_name='None',
                      dtype='float32',
                      ### dtype='float64'
                      time_units='minutes',
                      comment='',
                      MAKE_RTI=True, MAKE_BOV=False):
     
        #--------------------------------------------------
        # Try to import the Nio module from PyNIO package
        #--------------------------------------------------
        Nio = self.import_nio()
        if not(Nio): return False

        #----------------------------
        # Does file already exist ?
        #----------------------------
        file_name = file_utils.check_overwrite( file_name )
        self.file_name = file_name
        
        #---------------------------------------
        # Check and store the grid information
        #---------------------------------------
        self.check_and_store_info( file_name, info, var_name,
                                   dtype, MAKE_RTI, MAKE_BOV )
        if (long_name == None): long_name = var_name
        self.long_name  = long_name
        self.units_name = units_name
        self.dtype      = dtype

        #-------------------------
        # Save the Nio type code
        #-------------------------
        nio_type_map  = self.get_nio_type_map()
        nio_type_code = nio_type_map[ dtype.lower() ]
        self.nio_type_code = nio_type_code
        
        #-------------------------------------
        # Open a new netCDF file for writing
        #-------------------------------------
        # Sample output from time.asctime():
        #     "Thu Oct  8 17:10:18 2009"
        #-------------------------------------
        opt = Nio.options()
        opt.PreFill = False            # (for efficiency)
        opt.HeaderReserveSpace = 4000  # (4000 bytes, for efficiency)
        history = "Created using PyNIO " + Nio.__version__ + " on "
        history = history + time.asctime() + ". " 
        history = history + comment
        # print 'MADE IT PAST history BLOCK'
        
        try:
            ncgs_unit = Nio.open_file(file_name, mode="w",
                                      options=opt, history=history )
            OK = True
        except:
            OK = False
            return OK

##        print 'nx =', self.info.ncols
##        print 'ny =', self.info.nrows
##        print 'dx =', self.info.xres
##        print 'dy =', self.info.yres
##        print ' '
        
        #----------------------------------------------
        # Create grid dimensions nx and ny, plus time
        #----------------------------------------------
        # Without using "int()" here, we get this:
        #     TypeError: size must be None or integer
        #----------------------------------------------
        ncgs_unit.create_dimension("nx", int(self.info.ncols))
        ncgs_unit.create_dimension("ny", int(self.info.nrows))
        ncgs_unit.create_dimension("time", None)   # (unlimited dimension)
        # print 'MADE IT PAST create_dimension CALLS.'
        
        #-------------------------
        # Create a time variable
        #------------------------------------------
        #('d' = float64; must match in add_grid()
        #------------------------------------------
        tvar = ncgs_unit.create_variable('time', 'd', ("time",))
        ncgs_unit.variables['time'].units = time_units
        
        #--------------------------------
        # Create a variable in the file
        #----------------------------------
        # Returns "var" as a PyNIO object
        #----------------------------------
        var = ncgs_unit.create_variable(var_name, nio_type_code,
                                        ("time", "ny", "nx"))
        ## var = nc_unit.create_variable(var_name, nio_type_code,
        ##            ("time", "nx", "ny"))

        #-------------------------------------------
        # Create a separate, scalar "time stamp" ?
        #-------------------------------------------
        # t = nc_unit.create_variable("time", nio_type_code, ("time"))
        
        #----------------------------------
        # Specify a "nodata" fill value ?
        #----------------------------------
        var._FillValue = -9999.0    ## Does this jive with Prefill above ??
        
        #------------------------------------
        # Create attributes of the variable
        #------------------------------------
        ncgs_unit.variables[var_name].long_name = long_name
        ncgs_unit.variables[var_name].units     = units_name
        ncgs_unit.variables[var_name].dx        = self.info.xres
        ncgs_unit.variables[var_name].dy        = self.info.yres  ### (12/2/09)
##        ncgs_unit.variables[var_name].dx        = dx
##        ncgs_unit.variables[var_name].dy        = dy  ### (10/15/09)
        ncgs_unit.variables[var_name].y_south_edge = self.info.y_south_edge
        ncgs_unit.variables[var_name].y_north_edge = self.info.y_north_edge
        ncgs_unit.variables[var_name].x_west_edge  = self.info.x_west_edge
        ncgs_unit.variables[var_name].x_east_edge  = self.info.x_east_edge        
        
        self.ncgs_unit = ncgs_unit
        return OK
Ejemplo n.º 35
0
 def setUp(self):
     self.filename = tempfile.mktemp(prefix="test_", suffix=".nc")
     do_setup(self.filename)
     opt = Nio.options()
     opt.UseAxisAttribute = True
     self.f = Nio.open_file(self.filename, options=opt)
Ejemplo n.º 36
0
 def setUp(self):
     #print 'Creating temporary file'
     do_setup(filename)
     opt = Nio.options()
     opt.MaskedArrayMode = 'MaskedIfFillAtt'
     self.f = Nio.open_file(filename, options=opt)
Ejemplo n.º 37
0
f = Nio.open_file(dirc + fname)

#
# Print the input file contents
#
# print f

#
# If the output file already exists, remove it
#
os.system("rm -f " + fname + ".nc")

#
# Set the PreFill option to False to improve writing performance
#
opt = Nio.options()
opt.PreFill = False

#
# Options for writing NetCDF4 "classic" file.
#
# If Nio wasn't built with netcdf 4 support, you will get a
# warning here, and the code will use netcdf 3 instead.
#
opt.Format = "netcdf4classic"
opt.CompressionLevel = 5  # Can go up to 9

#
# Set the history attribute
#
hatt = "Converted from GRIB2: " + time.ctime(time.time())
Ejemplo n.º 38
0
 def setUp(self):
     #print 'Creating temporary file: ', filename
     do_setup(filename)
     opt = Nio.options()
     opt.MaskedArrayMode = 'MaskedAlways'
     self.f = Nio.open_file(filename, options=opt)
Ejemplo n.º 39
0
    def __init__(self,
                 specifier,
                 serial=False,
                 verbosity=1,
                 skip_existing=False,
                 overwrite=False,
                 once=False,
                 simplecomm=None):
        """
        Constructor

        Parameters:
            specifier (Specifier): An instance of the Specifier class, 
                defining the input specification for this reshaper operation.

        Keyword Arguments:
            serial (bool): True or False, indicating whether the operation
                should be performed in serial (True) or parallel
                (False).  The default is to assume parallel operation
                (but serial will be chosen if the mpi4py cannot be
                found when trying to initialize decomposition.
            verbosity(int): Level of printed output (stdout).  A value of 0 
                means no output, and a higher value means more output.  The
                default value is 1.
            skip_existing (bool): Flag specifying whether to skip the generation
                of time-series for variables with time-series files that already
                exist.  Default is False.
            overwrite (bool): Flag specifying whether to forcefully overwrite
                output files if they already exist.  Default is False.
            once (bool): True or False, indicating whether the Reshaper should
                write all metadata to a 'once' file (separately).
            simplecomm (SimpleComm): A SimpleComm object to handle the parallel 
                communication, if necessary
        """

        # Type checking (or double-checking)
        if not isinstance(specifier, Specifier):
            err_msg = "Input must be given in the form of a Specifier object"
            raise TypeError(err_msg)
        if type(serial) is not bool:
            err_msg = "Serial indicator must be True or False."
            raise TypeError(err_msg)
        if type(verbosity) is not int:
            err_msg = "Verbosity level must be an integer."
            raise TypeError(err_msg)
        if type(skip_existing) is not bool:
            err_msg = "Skip_existing flag must be True or False."
            raise TypeError(err_msg)
        if type(once) is not bool:
            err_msg = "Once-file indicator must be True or False."
            raise TypeError(err_msg)
        if simplecomm is not None:
            if not (isinstance(simplecomm, SimpleComm) or \
                    isinstance(simplecomm, SimpleCommMPI)):
                err_msg = (
                    "Simple communicator object is not a SimpleComm or ",
                    "SimpleCommMPI")
                raise TypeError(err_msg)

        # Whether to write a once file
        self._use_once_file = once

        # Internal timer data
        self._timer = TimeKeeper()

        # Dictionary storing read/write data amounts
        self.assumed_block_size = float(4 * 1024 * 1024)
        self._byte_counts = {}

        self._timer.start('Initializing Simple Communicator')
        if simplecomm is None:
            simplecomm = create_comm(serial=serial)
        # Reference to the simple communicator
        self._simplecomm = simplecomm
        self._timer.stop('Initializing Simple Communicator')

        # Contruct the print header
        header = ''.join([
            '[',
            str(self._simplecomm.get_rank()), '/',
            str(self._simplecomm.get_size()), '] '
        ])

        # Reference to the verbose printer tool
        self._vprint = VPrinter(header=header, verbosity=verbosity)

        # Debug output starting
        if self._simplecomm.is_manager():
            self._vprint('Initializing Reshaper', verbosity=1)

        # Validate the user input data
        self._timer.start('Specifier Validation')
        specifier.validate()
        self._timer.stop('Specifier Validation')
        if self._simplecomm.is_manager():
            self._vprint('Specifier validated', verbosity=1)

        # Setup PyNIO options (including disabling the default PreFill option)
        opt = Nio.options()
        opt.PreFill = False

        # Determine the Format and CompressionLevel options
        # from the NetCDF format string in the Specifier
        if specifier.netcdf_format == 'netcdf':
            opt.Format = 'Classic'
        elif specifier.netcdf_format == 'netcdf4':
            opt.Format = 'NetCDF4Classic'
            opt.CompressionLevel = 0
        elif specifier.netcdf_format == 'netcdf4c':
            opt.Format = 'NetCDF4Classic'
            opt.CompressionLevel = specifier.netcdf_deflate
            if self._simplecomm.is_manager():
                self._vprint('PyNIO compression level: {0}'.format(\
                    specifier.netcdf_deflate), verbosity=2)

        self._nio_options = opt
        if self._simplecomm.is_manager():
            self._vprint('PyNIO options set', verbosity=2)

        # Open all of the input files
        self._timer.start('Open Input Files')
        self._input_files = []
        for filename in specifier.input_file_list:
            self._input_files.append(Nio.open_file(filename, "r"))
        self._timer.stop('Open Input Files')
        if self._simplecomm.is_manager():
            self._vprint('Input files opened', verbosity=2)

        # Validate the input files themselves
        self._timer.start('Input File Validation')
        self._validate_input_files(specifier)
        self._timer.stop('Input File Validation')
        if self._simplecomm.is_manager():
            self._vprint('Input files validated', verbosity=2)

        # Sort the input files by time
        self._timer.start('Sort Input Files')
        self._sort_input_files_by_time(specifier)
        self._timer.stop('Sort Input Files')
        if self._simplecomm.is_manager():
            self._vprint('Input files sorted', verbosity=2)

        # Retrieve and sort the variables in each time-slice file
        # (To determine if it is time-invariant metadata, time-variant
        # metadata, or if it is a time-series variable)
        self._timer.start('Sort Variables')
        self._sort_variables(specifier)
        self._timer.stop('Sort Variables')
        if self._simplecomm.is_manager():
            self._vprint('Variables sorted', verbosity=2)

        # Validate the output files
        self._timer.start('Output File Validation')
        self._validate_output_files(specifier, skip_existing, overwrite)
        self._timer.stop('Output File Validation')
        if self._simplecomm.is_manager():
            self._vprint('Output files validated', verbosity=2)

        # Helpful debugging message
        if self._simplecomm.is_manager():
            self._vprint('Reshaper initialized.', verbosity=1)

        # Sync before continuing..
        self._simplecomm.sync()
Ejemplo n.º 40
0
 def setUp(self):
     do_setup(filename)
     opt = Nio.options()
     opt.MaskedArrayMode = 'MaskedIfFillAttAndValue'
     self.f = Nio.open_file(filename, options=opt)