Example #1
0
    def __init__(self, path="/RECH/skynet1_rech3/huziy/cru_data/CRUTS3.1/cru_ts_3_10.1901.2009.tmp.dat.nc",
                 var_name="tmp", lazy=False):

        self.times = None
        self.var_data = None

        self.times_var = None
        self.kdtree = None
        self.times_num = None
        self.lons2d, self.lats2d = None, None

        self.lazy = lazy
        self.var_name = var_name



        try:
            with Dataset(path) as ds:
                self._init_fields(ds)

            # Cannot go into with, since it needs to be open
            self.nc_dataset = Dataset(path)

        except OSError as oserr:
            with MFDataset(path) as ds:
                self._init_fields(ds)

            # Cannot go into with, since it needs to be open
            self.nc_dataset = MFDataset(path)


        self.nc_vars = ds.variables
Example #2
0
def ice_comp_model_to_osi(pathToModel, modelYear, modelIteration, boundLat, pathToOSI, param = 'area', threshold=0.15):
    '''
    Plot sea ice area from satellite data and several model iterations
    '''
    fsat = MFDataset(pathToOSI+'/OSI'+modelYear+'??.nc')
    osi_lat = fsat.variables['lat'][:]
    osi_lon = fsat.variables['lon'][:]

    osi_ice = fsat.variables['ice_conc'][0,:,:]
    area_osi = np.ones(osi_ice.shape)*100

    osi_area = []
    for mm in range(12):
        if param == 'area':
            osi_area.append(calc_area(fsat.variables['ice_conc'][mm,:,:]/100,\
                                      area_osi, osi_lat, blat=boundLat, threshold=threshold)/1e6)
        elif param == 'extent':
            osi_area.append(calc_extent(fsat.variables['ice_conc'][mm,:,:]/100,\
                            area_osi, osi_lat, blat=boundLat, threshold=threshold)/1e6)
        
    
    g  = Dataset('./grid.cdf')
    dxc = g.variables['dxc'][0,:,:]
    dyc = g.variables['dyc'][0,:,:]
    lat = g.variables['yc'][0,:,:]

    dxcXdyc = dxc*dyc

    area_model=np.zeros((len(modelIteration), 12))

    for (it, iteration) in enumerate(modelIteration):
        fm = MFDataset(pathToModel+'/'+modelYear+'/'+'it'+str(iteration)+'/fw/*.cdf')
        for mm in range(12):
            if param == 'area':
                area_model[it,mm] = calc_area(fm.variables['area'][mm,:,:],\
                                              dxcXdyc, lat, blat=boundLat,threshold=threshold)/10e11
            elif param == 'extent':
                area_model[it,mm] = calc_extent(fm.variables['area'][mm,:,:],\
                                              dxcXdyc, lat, blat=boundLat,threshold=threshold)/10e11
    
        fm.close()
    
    dates = pd.date_range(modelYear+'-01', str(int(modelYear)+1)+'-01', freq='M')
    dd = pd.DataFrame(index=dates)

    dd['Satellite']=osi_area

    for (it , iteration) in enumerate(modelIteration):
        dd['it'+str(iteration)]=area_model[it,:]

    return dd.plot(figsize=(10,5), lw = 3)
Example #3
0
def ice_comp_model_to_osi_table(pathToModel, modelYears, modelIteration, boundLat, pathToOSI, param = 'area', threshold=0.15):

    diff_array = numpy.zeros((len(modelYears), 12))

    for (nnum, yyear) in enumerate(modelYears):
    
        fsat = MFDataset(pathToOSI+'/OSI'+yyear+'??.nc')
        osi_lat = fsat.variables['lat'][:]
        osi_lon = fsat.variables['lon'][:]

        osi_ice = fsat.variables['ice_conc'][0,:,:]
        area_osi = np.ones(osi_ice.shape)*100

        osi_area = []
        for mm in range(12):
            if param == 'area':
                osi_area.append(calc_area(fsat.variables['ice_conc'][mm,:,:]/100,\
                                      area_osi, osi_lat, blat=boundLat, threshold=threshold)/1e6)
            elif param == 'extent':
                osi_area.append(calc_extent(fsat.variables['ice_conc'][mm,:,:]/100,\
                            area_osi, osi_lat, blat=boundLat, threshold=threshold)/1e6)
        
    
        g  = Dataset('./grid.cdf')
        dxc = g.variables['dxc'][0,:,:]
        dyc = g.variables['dyc'][0,:,:]
        lat = g.variables['yc'][0,:,:]
        dxcXdyc = dxc*dyc

        area_model=np.zeros((len(modelIteration), 12))
    
        if modelIteration[0] == 'last':
            gg = glob.glob(pathToModel+'/'+yyear+'/'+'it*')
            gg.sort()
            lastit = [int(gg[-1].split('/')[-1].split('t')[-1])]
        else:
            lastit = modelIteration

        for (it, iteration) in enumerate(lastit):
            fm = MFDataset(pathToModel+'/'+yyear+'/'+'it'+str(iteration)+'/fw/*.cdf')
            for mm in range(12):
                if param == 'area':
                    area_model[it,mm] = calc_area(fm.variables['area'][mm,:,:],\
                                              dxcXdyc, lat, blat=boundLat)/10e11
                elif param == 'extent':
                    area_model[it,mm] = calc_extent(fm.variables['area'][mm,:,:],\
                                              dxcXdyc, lat, blat=boundLat)/10e11
    
            fm.close()
        diff_array[nnum,:] = area_model[0,:]-osi_area[:]
    return diff_array
Example #4
0
File: manager.py Project: guziy/RPN
    def get_climatologic_field(self,  varname = "mrro", gcm = "", rcm = "",
                                         start_year = None, end_year = None,
                                         months = None
                                         ):
        """
             for time t: start_year <= t <= end_year
        """

        mfds = MFDataset("{0}/{1}-{2}/current/{3}_*.nc".format(self.folder_with_nc_data, gcm, rcm, varname))

        self.lon2d = mfds.variables[self.lon_name][:].transpose()
        self.lat2d = mfds.variables[self.lat_name][:].transpose()
        self._init_kd_tree()



        cache_file = self._get_clim_cache_file_path(varname = varname, gcm=gcm, rcm = rcm,
            start_year=start_year, end_year=end_year, months=months)


        cache_file = os.path.join(self.cache_files_folder, cache_file)

        if os.path.isfile(cache_file):
            f = open(cache_file)
            mfds.close()
            return pickle.load(f)




        t = mfds.variables["time"]
        t_units = t.units
        t_calendar = t.calendar

        t_start = date2num(datetime(start_year, 1,1), t_units, calendar=t_calendar)
        t_end = date2num(datetime(end_year+1, 1,1), t_units, calendar=t_calendar)

        t = t[:]
        t_sel = t[(t_start <= t) & (t < t_end)]
        dates_sel = num2date(t_sel, t_units, calendar=t_calendar)

        bool_vect = np.array( [x.month in months for x in dates_sel], dtype=np.bool )
        data_sel = mfds.variables[varname][ np.where( (t_start <= t) & (t < t_end) )[0],:,:]


        #save results to a cache file for reuse
        result = data_sel[bool_vect,:,:].mean(axis = 0).transpose()
        pickle.dump(result, open(cache_file,"w"))
        mfds.close()
        return result #because in the file the axes are inversed
Example #5
0
def getCruData(month, dimLon, dimLat, cruPath):
  
  from netCDF4 import MFDataset
  import numpy as np
  
  #dimLon = np.array((-30.25, 50.25))
  #dimLat = np.array((30.25, 70.25))

  # account python 0 indexing 
  month = int(month) - 1
  
  # open netCDF file
  nc = MFDataset(cruPath)
  
  # map lat lon data to file cru ncdf indexing schema
  lonStart = np.where(nc.variables['lon'][:] == snapToGrid(dimLon[0]))[0][0]+1
  lonEnd = np.where(nc.variables['lon'][:] == snapToGrid(dimLon[1]))[0][0]-1
  lonDim = lonEnd - lonStart

  latStart = np.where(nc.variables['lat'][:] == snapToGrid(dimLat[0]))[0][0]+1
  latEnd = np.where(nc.variables['lat'][:] == snapToGrid(dimLat[1]))[0][0]-1
  latDim = latEnd - latStart
     
  # get number of available years for every month
  yearsAvailable = len(nc.dimensions['time']) / 12
          
  # predefine month array like 100 81 161
  monthData = np.empty((yearsAvailable, latDim, lonDim))
  monthData[:] = np.nan

  for i in range(0, yearsAvailable):
    # read data from file for given month
    monthData[i,:,:] = nc.variables['tmp'][month,latStart:latEnd,lonStart:lonEnd]
    month = month + 12
    
  # to stay consitent with old array schema
  monthData = monthData.T
 
  # get vector of lat lon data
  longitude = nc.variables['lon'][lonStart:lonEnd] 
  latitude = nc.variables['lat'][latStart:latEnd] 
  
  nc.close()

  # set na values
  monthData[monthData > 100] = np.nan

  return monthData, longitude, latitude
class EcoFOCI_mfnetCDF(object):

    def __init__(self, file_name=None, aggdim=None):
        """Initialize opening of multiple netcdf files along
        same dimension (aggdim) in same path.

        Parameters
        ----------
        file_name : str
            full path to file on disk (with wildcards)
        aggdim : str
            dimesion name to aggregate along.  Slowest varying
            dimension or unlimited dimension will be choosen if 
            no option is passed.

        """

        self.nchandle = MFDataset(file_name,'a',aggdim=aggdim)
        self.file_name = file_name
        

    def get_global_atts(self):

        g_atts = {}
        att_names = self.nchandle.ncattrs()
        
        for name in att_names:
            g_atts[name] = self.nchandle.getncattr(name)
            
        return g_atts

    def get_vars(self):
        self.variables = self.nchandle.variables
        return self.nchandle.variables

    def ncreadfile_dic(self):

        data = {}
        for j, v in enumerate(self.nchandle.variables): 
            if v in self.nchandle.variables.keys(): #check for nc variable
                    data[v] = self.nchandle.variables[v][:]

            else: #if parameter doesn't exist fill the array with zeros
                data[v] = None
        return (data)

    def close(self):
        self.nchandle.close()
Example #7
0
 def runTest(self):
     """testing multi-file dataset access"""
     f = MFDataset(self.files,check=True)
     assert f.history == 'created today'
     assert_array_equal(np.arange(0,nx),f.variables['x'][:])
     varin = f.variables['data']
     datin = varin[:]
     assert_array_equal(datin.mask,data.mask)
     varin.set_auto_maskandscale(False)
     data2 = data.filled()
     assert varin.long_name == 'phony data'
     assert len(varin) == nx
     assert varin.shape == (nx,ydim,zdim)
     assert varin.dimensions == ('x','y','z')
     assert_array_equal(varin[4:-4:4,3:5,2:8],data2[4:-4:4,3:5,2:8])
     assert varin[0,0,0] == data2[0,0,0]
     assert_array_equal(varin[:],data2)
     assert getattr(varin,'nonexistantatt',None) == None
     f.close()
Example #8
0
def get_timerange(resource):
    """
  returns from/to timestamp of given netcdf file(s).

  :param resource: list of path(s) to netCDF file(s)

  :returns netcdf.datetime.datetime: start, end

  """
    start = end = None

    if type(resource) != list:
        resource = [resource]
        print resource

    try:
        if len(resource) > 1:
            ds = MFDataset(resource)
            time = ds.variables["time"]
        else:
            ds = Dataset(resource[0])
            time = ds.variables["time"]

        if (hasattr(time, "units") and hasattr(time, "calendar")) == True:
            s = num2date(time[0], time.units, time.calendar)
            e = num2date(time[-1], time.units, time.calendar)
        elif hasattr(time, "units"):
            s = num2date(time[0], time.units)
            e = num2date(time[-1], time.units)
        else:
            s = num2date(time[0])
            e = num2date(time[-1])

        ##TODO: include frequency
        start = "%s%s%s" % (s.year, str(s.month).zfill(2), str(s.day).zfill(2))
        end = "%s%s%s" % (e.year, str(e.month).zfill(2), str(e.day).zfill(2))
        ds.close()
    except Exception as e:
        msg = "failed to get time range: %s " % e
        logger.exception(msg)
        raise Exception(msg)

    return start, end
Example #9
0
def get_time(resource, format=None):
    """
    returns all timestamps of given netcdf file as datetime list.

    :param resource: NetCDF file(s)
    :param format: if a format is provided (e.g format='%Y%d%m'), values will be converted to string
    :return : list of timesteps
    """
    if type(resource) != list:
        resource = [resource]

    try:
        if len(resource) > 1:
            ds = MFDataset(resource)
            time = ds.variables["time"]
        else:
            ds = Dataset(resource[0])
            time = ds.variables["time"]
    except:
        msg = "failed to get time"
        logger.exception(msg)
        raise Exception(msg)

    try:
        if (hasattr(time, "units") and hasattr(time, "calendar")) == True:
            timestamps = num2date(time[:], time.units, time.calendar)
        elif hasattr(time, "units"):
            timestamps = num2date(time[:], time.units)
        else:
            timestamps = num2date(time[:])
        ds.close()
        try:
            if format != None:
                timestamps = [t.strftime(format=format) for t in timestamps]
        except:
            msg = "failed to convert times to string"
            print msg
            logger.debug(msg)
    except:
        msg = "failed to convert time"
        logger.exception(msg)
        raise Exception(msg)
    return timestamps
Example #10
0
def getCruData(NCFILE, month, dimLon, dimLat):
  from netCDF4 import MFDataset
  import numpy as np

  # account python 0 indexing
  month = month - 1

  # open netCDF file
  nc = MFDataset(NCFILE)

  # map lat lon data to file cru ncdf indexing schema
  lonStart = np.where(nc.variables['lon'][:] == dimLon[0])[0][0]
  lonEnd = np.where(nc.variables['lon'][:] == dimLon[1])[0][0]
  lonDim = (lonEnd - lonStart)

  latStart = np.where(nc.variables['lat'][:] == dimLat[0])[0][0]
  latEnd = np.where(nc.variables['lat'][:] == dimLat[1])[0][0]
  latDim = (latEnd - latStart)

  # get number of available years for every month
  yearsAvailable = len(nc.dimensions['time']) / 12

  # predefine month array like 100 81 161
  monthData = np.empty((yearsAvailable, latDim, lonDim))
  monthData[:] = np.nan

  for i in range(0, yearsAvailable):
    # read data from file for given month
    monthData[i,:,:] = nc.variables['tmp'][month,latStart:latEnd,lonStart:lonEnd]
    month = month + 12

  # set na value
  monthData[monthData > 100] = np.nan

  # get vector of lat lon data
  longitude = nc.variables['lon'][lonStart:lonEnd]
  latitude = nc.variables['lat'][latStart:latEnd]

  nc.close()

  return monthData, longitude, latitude
Example #11
0
def ice_comp_model_to_sat_table_rm(pathToModel, modelYears, modelIteration,\
                               boundLat, pathToOSI, param):


    diff_array = numpy.zeros((len(modelYears), 12))

    for (nnum, yyear) in enumerate(modelYears):
        
        g  = Dataset('./grid.cdf')
        dxc = g.variables['dxc'][0,:,:]
        dyc = g.variables['dyc'][0,:,:]
        lat = g.variables['yc'][0,:,:]
        topo = g.variables['topo'][0,:,:]
        dxcXdyc = dxc*dyc

        
        #area_model=np.zeros((len(modelIteration), 12))
    
        if modelIteration[0] == 'last':
            gg = glob.glob(pathToModel+'/'+yyear+'/'+'it*')
            gg.sort()
            lastit = [int(gg[-1].split('/')[-1].split('t')[-1])]
        else:
            lastit = modelIteration

        for (it, iteration) in enumerate(lastit):
            fm = MFDataset(pathToModel+'/'+yyear+'/'+'it'+str(iteration)+'/fw/*.cdf')
            fsat = MFDataset(pathToOSI+yyear+'??.nc')
            for mm in range(12):
                    if param == 'area':

                        aa_model = np.ma.filled(fm.variables['area'][mm,:,:], 0) * dxcXdyc
                        bb_satel = (fsat.variables['ice'][mm,:,:]) * dxcXdyc
                        cc_diff  = aa_model - bb_satel
                        diff_array[nnum,mm] = np.sqrt(cc_diff**2).sum()
                    
                    if param == 'extent':

                        dmodel = np.ma.filled(fm.variables['area'][mm,:,:], 0)
                        dmodel[dmodel<0.15] = 0
                        dmodel[dmodel>=0.15] = 1
                        aa_model = dmodel * dxcXdyc

                        dsat = fsat.variables['ice'][mm,:,:]
                        dsat[dsat<0.15] = 0
                        dsat[dsat>=0.15] = 1
                        bb_satel = dsat * dxcXdyc
                        cc_diff  = aa_model - bb_satel
                        diff_array[nnum,mm] = np.sqrt(cc_diff**2).sum()


    
            fm.close()
            fsat.close()

    return diff_array
    def __init__(self, file_name=None, aggdim=None):
        """Initialize opening of multiple netcdf files along
        same dimension (aggdim) in same path.

        Parameters
        ----------
        file_name : str
            full path to file on disk (with wildcards)
        aggdim : str
            dimesion name to aggregate along.  Slowest varying
            dimension or unlimited dimension will be choosen if 
            no option is passed.

        """

        self.nchandle = MFDataset(file_name,'a',aggdim=aggdim)
        self.file_name = file_name
Example #13
0
def ncinfo(files, hidedims, ignoretime, units, vars=None):

    if isinstance(files, list):
        try:
            ncobj = MFDataset(files)
        except Exception as e:
            warn("Could not aggregate datasets, python library returned: " +
                 str(e))
            return
    else:
        print()
        print(files)
        ncobj = Dataset(files, 'r')

    varnames = ncobj.variables.keys()
    varname_maxlen = len(max(varnames, key=len))

    pr_varnames = []
    pr_dimensions = []
    pr_longnames = []

    for varname in varnames:

        var = ncobj.variables[varname]

        if "time" == varname.lower():
            if not var.ndim == 1:
                warn("I don't understand two dimensional time dimensions")
                continue
            # Get our time axis
            nsteps = len(var)
            try:
                unit = var.__getattribute__("units").partition(' ')[0]
            except AttributeError:
                unit = 'None'
            if nsteps > 1:
                print("Time steps: ", nsteps, " x ", var[1] - var[0], unit)
            elif nsteps == 1:
                print("Time : ", var[0], unit)
            continue

        if ignoretime and "time" in varname.lower():
            continue

        if vars is not None:
            if varname not in vars: continue

        if var.ndim == 1:
            dims = ncobj.variables[varname].dimensions
            if hidedims and dims[0] == varname:
                # This is a dimension variable, ignore
                continue
            if ignoretime and dims[0] == "time":
                # Time bounds stuff also ignore
                continue
        # fmt = '{0:{1}} ::  {2:<22}  :: {3}'

        try:
            long_name = var.__getattribute__("long_name")
        except AttributeError:
            long_name = ''

        if units:
            try:
                unit = "(" + var.__getattribute__("units") + ")"
            except AttributeError:
                unit = ''
            long_name = " ".join([long_name, unit])

        pr_varnames.append(str(varname))
        pr_dimensions.append(str(var.shape))
        pr_longnames.append(str(long_name))

    fmt = '{0:{1}} :: {2:{3}} :: {4}'
    pr_varnames_maxlen = len(max(pr_varnames, key=len))
    pr_dimensions_maxlen = len(max(pr_dimensions, key=len))
    for varstr, dimstr, namestr in zip(pr_varnames, pr_dimensions,
                                       pr_longnames):
        print(
            fmt.format(varstr, pr_varnames_maxlen, dimstr,
                       pr_dimensions_maxlen, namestr))
Example #14
0
 def runTest(self):
     """testing multi-file dataset access"""
     f = MFDataset(self.files,check=True)
     f.set_auto_maskandscale(True) # issue570
     f.set_always_mask(False)
     assert f.history == 'created today'
     assert_array_equal(np.arange(0,nx),f.variables['x'][:])
     varin = f.variables['data']
     datin = varin[:]
     assert_array_equal(datin.mask,data.mask)
     varin.set_auto_maskandscale(False)
     data2 = data.filled()
     assert varin.long_name == 'phony data'
     assert len(varin) == nx
     assert varin.shape == (nx,ydim,zdim)
     assert varin.dimensions == ('x','y','z')
     assert_array_equal(varin[4:-4:4,3:5,2:8],data2[4:-4:4,3:5,2:8])
     assert varin[0,0,0] == data2[0,0,0]
     assert_array_equal(varin[:],data2)
     assert getattr(varin,'nonexistantatt',None) == None
     f.close()
     # test master_file kwarg (issue #835).
     f = MFDataset(self.files,master_file=self.files[-1],check=True)
     assert_array_equal(np.arange(0,nx),f.variables['x'][:])
     varin = f.variables['data']
     assert_array_equal(varin[4:-4:4,3:5,2:8],data2[4:-4:4,3:5,2:8])
     f.close()
     # testing multi-file get_variables_by_attributes
     f = MFDataset(self.files,check=True)
     assert f.get_variables_by_attributes(axis='T') == []
     f.get_variables_by_attributes(units='zlotys')[0] == f['x']
     f.close()
Example #15
0
 def __init__(self, files, thkth=1.0, **kwargs):
     MFDataset.__init__(self, files, **kwargs)
     self.__dict__['thkth'] = thkth
Example #16
0
 def test_get_by_mfdataset(self):
     """testing multi-file get_variables_by_attributes."""
     f = MFDataset(self.files,check=True)
     assert f.get_variables_by_attributes(axis='T') == []
     f.get_variables_by_attributes(units='zlotys')[0] == f['x']
     f.close()
def test_tutorial():
    # 2 unlimited dimensions.
    #temp = rootgrp.createVariable('temp','f4',('time','level','lat','lon',))
    # this makes the compression 'lossy' (preserving a precision of 1/1000)
    # try it and see how much smaller the file gets.
    temp = rootgrp.createVariable('temp',
                                  'f4', (
                                      'time',
                                      'level',
                                      'lat',
                                      'lon',
                                  ),
                                  least_significant_digit=3)
    # attributes.
    import time
    rootgrp.description = 'bogus example script'
    rootgrp.history = 'Created ' + time.ctime(time.time())
    rootgrp.source = 'netCDF4 python module tutorial'
    latitudes.units = 'degrees north'
    longitudes.units = 'degrees east'
    levels.units = 'hPa'
    temp.units = 'K'
    times.units = 'hours since 0001-01-01 00:00:00.0'
    times.calendar = 'gregorian'
    for name in rootgrp.ncattrs():
        print('Global attr', name, '=', getattr(rootgrp, name))
    print(rootgrp)
    print(rootgrp.__dict__)
    print(rootgrp.variables)
    print(rootgrp.variables['temp'])
    import numpy
    # no unlimited dimension, just assign to slice.
    lats = numpy.arange(-90, 91, 2.5)
    lons = numpy.arange(-180, 180, 2.5)
    latitudes[:] = lats
    longitudes[:] = lons
    print('latitudes =\n', latitudes[:])
    print('longitudes =\n', longitudes[:])
    # append along two unlimited dimensions by assigning to slice.
    nlats = len(rootgrp.dimensions['lat'])
    nlons = len(rootgrp.dimensions['lon'])
    print('temp shape before adding data = ', temp.shape)
    from numpy.random.mtrand import uniform  # random number generator.
    temp[0:5, 0:10, :, :] = uniform(size=(5, 10, nlats, nlons))
    print('temp shape after adding data = ', temp.shape)
    # levels have grown, but no values yet assigned.
    print('levels shape after adding pressure data = ', levels.shape)
    # assign values to levels dimension variable.
    levels[:] = [1000., 850., 700., 500., 300., 250., 200., 150., 100., 50.]
    # fancy slicing
    tempdat = temp[::2, [1, 3, 6], lats > 0, lons > 0]
    print('shape of fancy temp slice = ', tempdat.shape)
    print(temp[0, 0, [0, 1, 2, 3], [0, 1, 2, 3]].shape)
    # fill in times.
    from datetime import datetime, timedelta
    from netCDF4 import num2date, date2num, date2index
    dates = [
        datetime(2001, 3, 1) + n * timedelta(hours=12)
        for n in range(temp.shape[0])
    ]
    times[:] = date2num(dates, units=times.units, calendar=times.calendar)
    print('time values (in units %s): ' % times.units + '\\n', times[:])
    dates = num2date(times[:], units=times.units, calendar=times.calendar)
    print('dates corresponding to time values:\\n', dates)
    rootgrp.close()
    # create a series of netCDF files with a variable sharing
    # the same unlimited dimension.
    for nfile in range(10):
        f = Dataset('mftest' + repr(nfile) + '.nc',
                    'w',
                    format='NETCDF4_CLASSIC')
        f.createDimension('x', None)
        x = f.createVariable('x', 'i', ('x', ))
        x[0:10] = numpy.arange(nfile * 10, 10 * (nfile + 1))
    f.close()
    # now read all those files in at once, in one Dataset.
    from netCDF4 import MFDataset
    f = MFDataset('mftest*nc')
    print(f.variables['x'][:])
    # example showing how to save numpy complex arrays using compound types.
    f = Dataset('complex.nc', 'w')
    size = 3  # length of 1-d complex array
    # create sample complex data.
    datac = numpy.exp(1j * (1. + numpy.linspace(0, numpy.pi, size)))
    print(datac.dtype)
    # create complex128 compound data type.
    complex128 = numpy.dtype([('real', numpy.float64),
                              ('imag', numpy.float64)])
    complex128_t = f.createCompoundType(complex128, 'complex128')
    # create a variable with this data type, write some data to it.
    f.createDimension('x_dim', None)
    v = f.createVariable('cmplx_var', complex128_t, 'x_dim')
    data = numpy.empty(size, complex128)  # numpy structured array
    data['real'] = datac.real
    data['imag'] = datac.imag
    v[:] = data
    # close and reopen the file, check the contents.
    f.close()
    f = Dataset('complex.nc')
    print(f)
    print(f.variables['cmplx_var'])
    print(f.cmptypes)
    print(f.cmptypes['complex128'])
    v = f.variables['cmplx_var']
    print(v.shape)
    datain = v[:]  # read in all the data into a numpy structured array
    # create an empty numpy complex array
    datac2 = numpy.empty(datain.shape, numpy.complex128)
    # .. fill it with contents of structured array.
    datac2.real = datain['real']
    datac2.imag = datain['imag']
    print(datac.dtype, datac)
    print(datac2.dtype, datac2)
    # more complex compound type example.
    from netCDF4 import chartostring, stringtoarr
    f = Dataset('compound_example.nc', 'w')  # create a new dataset.
    # create an unlimited dimension call 'station'
    f.createDimension('station', None)
    # define a compound data type (can contain arrays, or nested compound types).
    NUMCHARS = 80  # number of characters to use in fixed-length strings.
    winddtype = numpy.dtype([('speed', 'f4'), ('direction', 'i4')])
    statdtype = numpy.dtype([('latitude', 'f4'), ('longitude', 'f4'),
                             ('surface_wind', winddtype),
                             ('temp_sounding', 'f4', 10),
                             ('press_sounding', 'i4', 10),
                             ('location_name', 'S1', NUMCHARS)])
    # use this data type definitions to create a compound data types
    # called using the createCompoundType Dataset method.
    # create a compound type for vector wind which will be nested inside
    # the station data type. This must be done first!
    wind_data_t = f.createCompoundType(winddtype, 'wind_data')
    # now that wind_data_t is defined, create the station data type.
    station_data_t = f.createCompoundType(statdtype, 'station_data')
    # create nested compound data types to hold the units variable attribute.
    winddtype_units = numpy.dtype([('speed', 'S1', NUMCHARS),
                                   ('direction', 'S1', NUMCHARS)])
    statdtype_units = numpy.dtype([('latitude', 'S1', NUMCHARS),
                                   ('longitude', 'S1', NUMCHARS),
                                   ('surface_wind', winddtype_units),
                                   ('temp_sounding', 'S1', NUMCHARS),
                                   ('location_name', 'S1', NUMCHARS),
                                   ('press_sounding', 'S1', NUMCHARS)])
    # create the wind_data_units type first, since it will nested inside
    # the station_data_units data type.
    wind_data_units_t = f.createCompoundType(winddtype_units,
                                             'wind_data_units')
    station_data_units_t =\
        f.createCompoundType(statdtype_units,'station_data_units')
    # create a variable of of type 'station_data_t'
    statdat = f.createVariable('station_obs', station_data_t, ('station', ))
    # create a numpy structured array, assign data to it.
    data = numpy.empty(1, station_data_t)
    data['latitude'] = 40.
    data['longitude'] = -105.
    data['surface_wind']['speed'] = 12.5
    data['surface_wind']['direction'] = 270
    data['temp_sounding'] = (280.3, 272., 270., 269., 266., 258., 254.1, 250.,
                             245.5, 240.)
    data['press_sounding'] = range(800, 300, -50)
    # variable-length string datatypes are not supported inside compound types, so
    # to store strings in a compound data type, each string must be
    # stored as fixed-size (in this case 80) array of characters.
    data['location_name'] = stringtoarr('Boulder, Colorado, USA', NUMCHARS)
    # assign structured array to variable slice.
    statdat[0] = data
    # or just assign a tuple of values to variable slice
    # (will automatically be converted to a structured array).
    statdat[1] = (40.78, -73.99, (-12.5, 90), (290.2, 282.5, 279., 277.9, 276.,
                                               266., 264.1, 260., 255.5, 243.),
                  range(900, 400,
                        -50), stringtoarr('New York, New York, USA', NUMCHARS))
    print(f.cmptypes)
    windunits = numpy.empty(1, winddtype_units)
    stationobs_units = numpy.empty(1, statdtype_units)
    windunits['speed'] = stringtoarr('m/s', NUMCHARS)
    windunits['direction'] = stringtoarr('degrees', NUMCHARS)
    stationobs_units['latitude'] = stringtoarr('degrees north', NUMCHARS)
    stationobs_units['longitude'] = stringtoarr('degrees west', NUMCHARS)
    stationobs_units['surface_wind'] = windunits
    stationobs_units['location_name'] = stringtoarr('None', NUMCHARS)
    stationobs_units['temp_sounding'] = stringtoarr('Kelvin', NUMCHARS)
    stationobs_units['press_sounding'] = stringtoarr('hPa', NUMCHARS)
    statdat.units = stationobs_units
    # close and reopen the file.
    f.close()
    f = Dataset('compound_example.nc')
    print(f)
    statdat = f.variables['station_obs']
    print(statdat)
    # print out data in variable.
    print('data in a variable of compound type:')
    print('----')
    for data in statdat[:]:
        for name in statdat.dtype.names:
            if data[name].dtype.kind == 'S':  # a string
                # convert array of characters back to a string for display.
                units = chartostring(statdat.units[name])
                print(name,': value =',chartostring(data[name]),\
                          ': units=',units)
            elif data[name].dtype.kind == 'V':  # a nested compound type
                units_list = [
                    chartostring(s) for s in tuple(statdat.units[name])
                ]
                print(name,data[name].dtype.names,': value=',data[name],': units=',\
                          units_list)
            else:  # a numeric type.
                units = chartostring(statdat.units[name])
                print(name, ': value=', data[name], ': units=', units)
                print('----')
    f.close()
    f = Dataset('tst_vlen.nc', 'w')
    vlen_t = f.createVLType(numpy.int32, 'phony_vlen')
    x = f.createDimension('x', 3)
    y = f.createDimension('y', 4)
    vlvar = f.createVariable('phony_vlen_var', vlen_t, ('y', 'x'))
    import random
    data = numpy.empty(len(y) * len(x), object)
    for n in range(len(y) * len(x)):
        data[n] = numpy.arange(random.randint(1, 10), dtype='int32') + 1
        data = numpy.reshape(data, (len(y), len(x)))
        vlvar[:] = data
        print(vlvar)
        print('vlen variable =\n', vlvar[:])
        print(f)
        print(f.variables['phony_vlen_var'])
        print(f.vltypes['phony_vlen'])
        z = f.createDimension('z', 10)
        strvar = f.createVariable('strvar', str, 'z')
        chars = '1234567890aabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
    data = numpy.empty(10, object)
    for n in range(10):
        stringlen = random.randint(2, 12)
        data[n] = ''.join([random.choice(chars) for i in range(stringlen)])
        strvar[:] = data
        print('variable-length string variable:\n', strvar[:])
        print(f)
        print(f.variables['strvar'])
    f.close()
def readEnsemble(wrfinit, timerange=None, fields=None, debug=False):
    ''' Reads in desired fields and returns 2-D arrays of data for each field (barb/contour/field) '''
    if debug: print fields

    datadict = {}
    file_list, missing_list = makeEnsembleList(wrfinit, timerange) #construct list of files
 
    # loop through fill field, contour field, barb field and retrieve required data
    for f in ['fill', 'contour', 'barb']:
        if not fields[f].keys(): continue
        if debug: print 'Reading field:', fields[f]['name'], 'from', fields[f]['filename']
        
        # save some variables for use in this function
        filename = fields[f]['filename']
        arrays = fields[f]['arrayname']
        fieldtype = fields[f]['ensprod']
        if fieldtype in ['prob', 'neprob']: thresh = fields[f]['thresh']
        if fieldtype[0:3]=='mem': member = int(fieldtype[3:])
        
        # open Multi-file netcdf dataset
	if debug: print file_list[filename] 
        fh = MFDataset(file_list[filename])
       
        # loop through each field, wind fields will have two fields that need to be read
        datalist = []
        for array in arrays:
            # read in 3D array (times*members,ny,nx) from file object 
            if 'arraylevel' in fields[f]:
                if fields[f]['arraylevel'] != 'max': data = fh.variables[array][:,0,fields[f]['arraylevel'],:,:]
                else: 				     data = np.amax(fh.variables[array][:,0,:,:,:], axis=1)
#GSR            else:                                    data = fh.variables[array][:,0,:,:]
#            elif 'sfclevel' in fields[f]:            data = fh.variables[array][:,:,:]
            else:                                    data = fh.variables[array][:,0,:,:]
#            else:                                    data = fh.variables[array][:,:,:]
            
            # change units for certain fields
            if array in ['U_GRID_PRS', 'V_GRID_PRS', 'UBSHR6','VBSHR6','U10','V10', 'U_COMP_STM', 'V_COMP_STM','S_PL']:  data = data*1.93 # m/s > kt
            if array in ['mean_V10_d01','mean_U10_d01']:  data = data*1.93*10.0 # m/s > .1 kt
            if array in ['MSL_PRES']:  data = data/100. # mb
            if array in ['P_WAT']:  data = data*0.0393701 # mb
            elif array in ['DEWPOINT_2M', 'T2', 'AFWA_WCHILL', 'AFWA_HEATIDX']:   data = (data - 273.15)*1.8 + 32.0 # K > F 
            elif array in ['PREC_ACC_NC', 'PREC_ACC_C', 'AFWA_PWAT', 'PWAT', 'AFWA_SNOWFALL', 'AFWA_SNOW', 'AFWA_ICE', 'AFWA_FZRA']:   data = data*0.0393701 # mm > in 
            elif array in ['RAINNC', 'GRPL_MAX', 'SNOW_ACC_NC']:  data = data*0.0393701 # mm > in 
            elif array in ['TEMP_PRS', 'DEWPOINT_PRS', 'SFC_LI']:            data = data - 273.15 # K > C
            elif array in ['ABS_VORT_PRS']:                       data = data*100000.0
            elif array in ['AFWA_MSLP', 'MSLP']:                  data = data*0.01 # Pa > hPa
            elif array in ['ECHOTOP']:                            data = data*0.001  # m > km
            elif array in ['SBCINH', 'MLCINH', 'W_DN_MAX']:       data = data*-1.0 # make cin positive
            elif array in ['PVORT_320K']:                         data = data*1000000 # multiply by 1e6
            elif array in ['SBT123_GDS3_NTAT']:                   data = data -273.15 # K -> C
            elif array in ['SBT124_GDS3_NTAT']:                   data = data -273.15 # K -> C
            elif array in ['HAIL_MAXK1', 'HAIL_MAX2D']:           data = data*39.3701 #  m -> inches
            elif array in ['mean_T2_d01']:                        data = data*1.8 # C->F
            elif array in ['T_LEV1']:                             data = data*1.8 + 32.0 # C->F

            # perform mean/max/variance/etc to reduce 3D array to 2D
            if (fieldtype == 'mean'):  data = np.mean(data, axis=0)
            elif (fieldtype == 'pmm'): data = compute_pmm(data)
            elif (fieldtype == 'max'): data = np.amax(data, axis=0)
            elif (fieldtype == 'var'): data = np.std(data, axis=0)
            elif (fieldtype == 'summean'):
                for i in missing_list[filename]: data = np.insert(data, i, np.nan, axis=0) #insert nan for missing files
                data = np.reshape(data, (data.shape[0]/10,10,data.shape[1],data.shape[2]))
                data = np.nansum(data, axis=0)
                data = np.nanmean(data, axis=0)
            elif (fieldtype[0:3] == 'mem'):
                for i in missing_list[filename]: data = np.insert(data, i, np.nan, axis=0) #insert nan for missing files
                data = np.reshape(data, (data.shape[0]/10,10,data.shape[1],data.shape[2]))
                data = np.nanmax(data, axis=0)
                data = data[member-1,:]
            elif (fieldtype in ['prob', 'neprob']):
                data = (data>=thresh).astype('float')
                for i in missing_list[filename]: data = np.insert(data, i, np.nan, axis=0) #insert nan for missing files
                data = np.reshape(data, (data.shape[0]/10,10,data.shape[1],data.shape[2]))
                data = np.nanmax(data, axis=0)
                if (fieldtype == 'neprob'): data = compute_neprob(data, roi=14, sigma=40) #nw=neighborhood width
                else: data = np.nanmean(data, axis=0) 
                data = data+0.001 #hack to ensure that plot displays discrete prob values

            if debug: print 'Returning', array, 'with shape', data.shape, 'max', data.max(), 'min', data.min()

            datalist.append(data)

        # attach data arrays for each type of field (e.g. { 'fill':[data], 'barb':[data,data] })
        datadict[f] = datalist
        fh.close()

    # these are derived fields, we don't have in any of the input files but we can compute
    if 'name' in fields['fill']:
      if fields['fill']['name'] in ['shr06mag', 'shr01mag', 'bunkmag']: datadict['fill'] = [np.sqrt(datadict['fill'][0]**2 + datadict['fill'][1]**2)]
      if fields['fill']['name'] in ['iso300', 'iso500', 'iso700', 'iso850']: datadict['fill'] = [np.sqrt(datadict['fill'][0]**2 + datadict['fill'][1]**2)]
      elif fields['fill']['name'] == 'stp': datadict['fill'] = computestp(datadict['fill'])

    return (datadict, missing_list['amem'])
Example #19
0
##############################################################################

print sys.argv[1]
if sys.argv[1] == '-h' or sys.argv[1] == '-help':
    help()
    sys.exit()

# GPU SET UP - bus connection
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"

# GPU SET UP - visible gpu in this script
os.environ["CUDA_VISIBLE_DEVICES"] = str(sys.argv[2])
num_gpu = str(sys.argv[2]).count(',') + 1

#LOADING DATA
ncfile_r = MFDataset(
    '/home/aidl/git/data/data_final/long_train_10_years/clt/*.nc')
maps = ncfile_r.variables['clt']
Nt, Ny, Nx = maps.shape
print('{} maps ready to be loaded'.format(Nt))

##############################################################################
##############################################################################
##############################################################################

# Parameters to play with.

# BATCH DEFINITION
Nframes = 1  # Number of frames within each sequences to be used during training
batch_size = 128  # Number of sequences to included within each batch during training
epochs = 500
Example #20
0
def ice_comp_model_to_sat(pathToModel, modelYear, modelIteration, \
                          boundLat, pathToOSI, param = 'area', threshold=0.15, coast_exp=False):
    '''
    Plot sea ice area from satellite data and several model iterations
    '''
    g  = Dataset('./grid.cdf')
    dxc = g.variables['dxc'][0,:,:]
    dyc = g.variables['dyc'][0,:,:]
    lat = g.variables['yc'][0,:,:]
    topo = g.variables['topo'][0,:,:]
    dxcXdyc = dxc*dyc
    if coast_exp==True:
        topo2 = expand_coast(topo)

    area_model=np.zeros((len(modelIteration), 12))

    for (it, iteration) in enumerate(modelIteration):
        fm = MFDataset(pathToModel+'/'+modelYear+'/'+'it'+str(iteration)+'/fw/*.cdf')
        for mm in range(12):
            if param == 'area':
                if expand_coast==True:
                    temp_area = fm.variables['area'][mm,:,:]
                    temp_area = np.ma.masked_array(temp_area, mask = topo2.mask)
                    area_model[it,mm] = calc_area(temp_area,\
                                                  dxcXdyc, lat, blat=boundLat, threshold=threshold)/10e11
                else:
                    area_model[it,mm] = calc_area(fm.variables['area'][mm,:,:],\
                                              dxcXdyc, lat, blat=boundLat, threshold=threshold)/10e11
            elif param == 'extent':
                if expand_coast==True:
                    temp_area = fm.variables['area'][mm,:,:]
                    temp_area = np.ma.masked_array(temp_area, mask = topo2.mask)
                    area_model[it,mm] = calc_extent(temp_area,\
                                              dxcXdyc, lat, blat=boundLat, threshold=threshold)/10e11
                else:
                    area_model[it,mm] = calc_extent(fm.variables['area'][mm,:,:],\
                                              dxcXdyc, lat, blat=boundLat, threshold=threshold)/10e11
    
        fm.close()

    fsat = MFDataset(pathToOSI+modelYear+'??.nc')
    
    #osi_ice = fsat.variables['ice'][0,:,:]
    
    osi_area = []
    for mm in range(12):
        if param == 'area':
            area_temp = fsat.variables['ice'][mm,:,:]
            if coast_exp==True:
                area_temp = np.ma.masked_array(area_temp, mask = topo2.mask)
            else:
                area_temp = np.ma.masked_array(area_temp, mask = topo.mask)
            
            area_temp = np.ma.masked_less_equal(area_temp, threshold)
            osi_area.append(calc_area(np.ma.filled(area_temp,0),\
                            dxcXdyc, lat, blat=boundLat, threshold=threshold)/10e11)
            
        elif param == 'extent':
            area_temp = fsat.variables['ice'][mm,:,:]
            if coast_exp==True:
                area_temp = np.ma.masked_array(area_temp, mask = topo2.mask)
            else:
                area_temp = np.ma.masked_array(area_temp, mask = topo.mask)
            
            area_temp = np.ma.masked_less_equal(area_temp, threshold)
            osi_area.append(calc_extent(np.ma.filled(area_temp,0),\
                            dxcXdyc, lat, blat=boundLat, threshold=threshold)/10e11)
        
    dates = pd.date_range(modelYear+'-01', str(int(modelYear)+1)+'-01', freq='M')
    dd = pd.DataFrame(index=dates)

    dd['Satellite']=osi_area

    for (it , iteration) in enumerate(modelIteration):
        dd['it'+str(iteration)]=area_model[it,:]

    return dd.plot(figsize=(10,5))
Example #21
0
 def update(self, FileName):
     #point to a new nc file or url without reinitializing everything
     if isinstance(FileName, list):
         self.Dataset = MFDataset(FileName)
     else:
         self.Dataset = Dataset(FileName)
Example #22
0
    def __init__(self, filename=None, name=None, gridfile=None):

        if filename is None:
            raise ValueError('Need filename as argument to constructor')

        # Map ROMS variable names to CF standard_name
        self.ROMS_variable_mapping = {
            # Removing (temoprarily) land_binary_mask from ROMS-variables,
            # as this leads to trouble with linearNDFast interpolation
            'mask_rho': 'land_binary_mask',
            'mask_psi': 'land_binary_mask',
            'h': 'sea_floor_depth_below_sea_level',
            'zeta': 'sea_surface_height',
            'u': 'x_sea_water_velocity',
            'v': 'y_sea_water_velocity',
            'w': 'upward_sea_water_velocity',
            'temp': 'sea_water_temperature',
            'salt': 'sea_water_salinity',
            'uice': 'sea_ice_x_velocity',
            'vice': 'sea_ice_y_velocity',
            'aice': 'sea_ice_area_fraction',
            'hice': 'sea_ice_thickness',
            'gls': 'turbulent_generic_length_scale',
            'tke': 'turbulent_kinetic_energy',
            'AKs': 'ocean_vertical_diffusivity',
            'sustr': 'surface_downward_x_stress',
            'svstr': 'surface_downward_y_stress',
            'Uwind': 'x_wind',
            'Vwind': 'y_wind'
        }

        # z-levels to which sigma-layers may be interpolated
        self.zlevels = np.array([
            0, -.5, -1, -3, -5, -10, -25, -50, -75, -100, -150, -200, -250,
            -300, -400, -500, -600, -700, -800, -900, -1000, -1500, -2000,
            -2500, -3000, -3500, -4000, -4500, -5000, -5500, -6000, -6500,
            -7000, -7500, -8000
        ])

        gls_param = ['gls_cmu0', 'gls_p', 'gls_m', 'gls_n']

        filestr = str(filename)
        if name is None:
            self.name = filestr
        else:
            self.name = name

        try:
            # Open file, check that everything is ok
            self.logger.info('Opening dataset: ' + filestr)
            if ('*' in filestr) or ('?' in filestr) or ('[' in filestr):
                self.logger.info('Opening files with MFDataset')

                def drop_non_essential_vars_pop(ds):
                    dropvars = [
                        v for v in ds.variables
                        if v not in list(self.ROMS_variable_mapping.keys()) +
                        gls_param +
                        ['ocean_time', 's_rho', 'Cs_r', 'hc', 'angle']
                        and v[0:3] not in ['lon', 'lat', 'mas']
                    ]
                    self.logger.debug('Dropping variables: %s' % dropvars)
                    ds = ds.drop(dropvars)
                    return ds

                if has_xarray is True:
                    self.Dataset = xr.open_mfdataset(
                        filename,
                        chunks={'ocean_time': 1},
                        concat_dim='ocean_time',
                        preprocess=drop_non_essential_vars_pop,
                        data_vars='minimal',
                        coords='minimal')
                else:
                    self.Dataset = MFDataset(filename)
            else:
                self.logger.info('Opening file with Dataset')
                if has_xarray is True:
                    self.Dataset = xr.open_dataset(filename)
                else:
                    self.Dataset = Dataset(filename, 'r')
        except Exception as e:
            raise ValueError(e)

        if 's_rho' not in self.Dataset.variables:
            dimensions = 2
        else:
            dimensions = 3

        if dimensions == 3:
            # Read sigma-coordinate values
            try:
                self.sigma = self.Dataset.variables['s_rho'][:]
            except:
                num_sigma = len(self.Dataset.dimensions['s_rho'])
                self.logger.warning(
                    's_rho not available in dataset, constructing from'
                    ' number of layers (%s).' % num_sigma)
                self.sigma = (np.arange(num_sigma) + .5 -
                              num_sigma) / num_sigma

            # Read sigma-coordinate transform parameters
            try:
                self.Dataset.variables['Cs_r'].set_auto_mask(False)
            except:
                pass
            self.Cs_r = self.Dataset.variables['Cs_r'][:]
            try:
                self.hc = self.Dataset.variables['hc'][:]
            except:
                if has_xarray is True:
                    self.hc = self.Dataset.variables['hc'].data  # scalar
                else:
                    self.hc = self.Dataset.variables['hc'][0]

            self.num_layers = len(self.sigma)
        else:
            self.num_layers = 1
            self.ROMS_variable_mapping['ubar'] = 'x_sea_water_velocity'
            self.ROMS_variable_mapping['vbar'] = 'y_sea_water_velocity'
            del self.ROMS_variable_mapping['u']
            del self.ROMS_variable_mapping['v']

        if 'lat_rho' in self.Dataset.variables:
            # Horizontal oordinates and directions
            self.lat = self.Dataset.variables['lat_rho'][:]
            self.lon = self.Dataset.variables['lon_rho'][:]
        else:
            if gridfile is None:
                raise ValueError(filename + ' does not contain lon/lat '
                                 'arrays, please supply a grid-file '
                                 '"gridfile=<grid_file>"')
            else:
                gf = Dataset(gridfile)
                self.lat = gf.variables['lat_rho'][:]
                self.lon = gf.variables['lon_rho'][:]

        try:  # Check for GLS parameters (diffusivity)
            self.gls_parameters = {}
            for gls_par in gls_param:
                self.gls_parameters[gls_par] = \
                    self.Dataset.variables[gls_par][()]
            self.logger.info('Read GLS parameters from file.')
        except Exception as e:
            self.logger.info(e)
            self.logger.info('Did not find complete set of GLS parameters')

        # Get time coverage
        try:
            ocean_time = self.Dataset.variables['ocean_time']
        except:
            ocean_time = self.Dataset.variables['time']
        if has_xarray:
            self.times = [
                datetime.utcfromtimestamp(
                    (OT - np.datetime64('1970-01-01T00:00:00Z')) /
                    np.timedelta64(1, 's')) for OT in ocean_time.data
            ]
        else:
            time_units = ocean_time.__dict__['units']
            if time_units == 'second':
                self.logger.info(
                    'Ocean time given as seconds relative to start '
                    'Setting artifical start time of 1 Jan 2000.')
                time_units = 'seconds since 2000-01-01 00:00:00'
            self.times = num2date(ocean_time[:], time_units)
        self.start_time = self.times[0]
        self.end_time = self.times[-1]
        if len(self.times) > 1:
            self.time_step = self.times[1] - self.times[0]
        else:
            self.time_step = None

        # x and y are rows and columns for unprojected datasets
        self.xmin = 0.
        self.delta_x = 1.
        self.ymin = 0.
        self.delta_y = 1.
        if has_xarray:
            self.xmax = self.Dataset['xi_rho'].shape[0] - 1.
            self.ymax = self.Dataset['eta_rho'].shape[0] - 1.
            self.lon = self.lon.data  # Extract, could be avoided downstream
            self.lat = self.lat.data
            self.sigma = self.sigma.data
        else:
            self.xmax = np.float(len(self.Dataset.dimensions['xi_rho'])) - 1
            self.ymax = np.float(len(self.Dataset.dimensions['eta_rho'])) - 1

        self.name = 'roms native'

        self.precalculate_s2z_coefficients = True

        # Find all variables having standard_name
        self.variables = []
        for var_name in self.Dataset.variables:
            if var_name in self.ROMS_variable_mapping.keys():
                var = self.Dataset.variables[var_name]
                self.variables.append(self.ROMS_variable_mapping[var_name])

        # Run constructor of parent Reader class
        super(Reader, self).__init__()
Example #23
0
    def get_tz_crosssection_for_the_point(self,
                                          lon=None,
                                          lat=None,
                                          zlist=None,
                                          var_name="",
                                          start_date=None,
                                          end_date=None):
        """
        get t-z cross section matrix for the point on the zlist levels
        Note: if zlist is None, the profiles are returned on model levels
        :param lon:
        :param lat:
        :param zlist:
        :param var_name:
        :param start_date:
        :param end_date:
        """
        if self.model_kdtree is None:
            xs, ys, zs = lat_lon.lon_lat_to_cartesian(self.lons.flatten(),
                                                      self.lats.flatten())
            self.model_kdtree = cKDTree(list(zip(xs, ys, zs)))

        xt, yt, zt = lat_lon.lon_lat_to_cartesian(lon, lat)

        start_year = start_date.year
        end_year = end_date.year

        # Get 4 nearest neighbors for interpolation
        dists_from, inds_from = self.model_kdtree.query([
            (xt, yt, zt),
        ], k=1)

        # Calculate the inverse of squre of distance for weighted average
        weights = 1.0 / dists_from**2
        weights /= weights.sum()

        inds_from = inds_from.squeeze()
        weights = weights.squeeze()
        if len(weights.shape) == 0:
            weights = [
                weights,
            ]

        neighbor_lons = self.lons.flatten()[inds_from]
        neighbor_lats = self.lats.flatten()[inds_from]

        i_list, j_list = [], []

        if dists_from.ndim > 1:
            for the_lon, the_lat in zip(neighbor_lons, neighbor_lats):
                i, j = np.where((self.lons == the_lon)
                                & (self.lats == the_lat))
                i_list.append(i[0])
                j_list.append(j[0])
        else:
            i, j = np.where((self.lons == neighbor_lons)
                            & (self.lats == neighbor_lats))
            i_list.append(i[0])
            j_list.append(j[0])

        profiles = []
        dates = []
        ztarget = np.asarray(zlist) if zlist is not None else None
        vert_kdtree = None
        for the_year in range(start_year, end_year + 1):

            # cube dimensions (t, z, y, x)

            print("treating the following files: {}".format(", ".join(
                self.year_to_path[the_year])))

            with MFDataset(self.year_to_path[the_year]) as ds:
                data = ds.variables[var_name]

                time_var = ds.variables["time_counter"]

                time_data = time_var[:]

                assert np.all(
                    time_data == np.array(sorted(time_data))
                ), "Time data is not sorted: {}".format(time_data)

                if end_date.hour == 0:
                    end_date += timedelta(days=1)

                d1 = date2num(start_date, time_var.units)
                d2 = date2num(end_date, time_var.units)

                current_dates = num2date(
                    [t for t in time_var[:] if d1 <= t <= d2],
                    units=time_var.units)
                data = data[np.where((d1 <= time_var[:])
                                     & (time_var[:] <= d2))[0], :, :, :]

                # Use inverse squared distances to interpolate in horizontal
                prof = data[:, :, j_list[0], i_list[0]] * weights[0]
                for i, j, weight in zip(i_list[1:], j_list[1:], weights[1:]):
                    prof += data[:, :, j, i] * weight

                # Linear interpolation in vertical

                if "deptht" in ds.variables:
                    zsource = ds.variables["deptht"][:]
                elif "depthu" in ds.variables:
                    zsource = ds.variables["depthu"][:]
                elif "depthv" in ds.variables:
                    zsource = ds.variables["depthv"][:]
                elif "depthw" in ds.variables:
                    zsource = ds.variables["depthw"][:]
                else:
                    raise Exception("Could not find vertical coordinate")

                if vert_kdtree is None:
                    vert_kdtree = cKDTree([[
                        z,
                    ] for z in zsource])

                # No interpolation if the vertical levels are not supplied
                ztarget = zsource if ztarget is None else ztarget

                zdists, zinds = vert_kdtree.query([[
                    z,
                ] for z in ztarget],
                                                  k=2)
                zdists = zdists.squeeze()
                zinds = zinds.squeeze()

                zweights = zdists / zdists.sum(
                    axis=1)[:, np.newaxis]  # weight1 = d2/(d1 + d2)

                prof = prof[:, zinds[:, 0]] * zweights[
                    np.newaxis, :,
                    1] + prof[:, zinds[:, 1]] * zweights[np.newaxis, :, 0]
                profiles.extend(prof)

                print("Selected data for the time range: ", current_dates[0],
                      current_dates[-1])
                print("The limits are ", start_date, end_date)

                dates.extend(current_dates)

        # Calculate model bottom
        bottom = 0
        for i, j in zip(i_list, j_list):
            bottom += self.bathymetry[i, j]
        bottom /= float(len(i_list))

        dates_num = mdates.date2num(dates)

        profiles = np.asarray(profiles)
        # mask everything below the model bottom
        if zlist is None and False:
            profiles = profiles[:, np.where(ztarget <= bottom)]
            profiles = profiles.squeeze()
            ztarget = ztarget[ztarget <= bottom]

        zz, tt = np.meshgrid(ztarget, dates_num)

        # print("nemo tt-ranges: ", tt.min(), tt.max())
        # profiles = np.ma.masked_where(zz > bottom, profiles)

        # plot for debug
        #
        # plt.figure()
        # ax = plt.gca()
        #
        # im = ax.contourf(profiles, levels=np.arange(4, 30, 1))
        #
        #
        #
        # xlimits = ax.get_xlim()
        # ax.plot(xlimits, [bottom, bottom], "k-", lw=2)
        #
        #
        # ax.invert_yaxis()
        # # ax.xaxis.set_major_formatter(mdates.DateFormatter("%Y\n%b\n%d"))
        #
        # plt.colorbar(im)
        # plt.show()
        #
        # if True:
        #     raise Exception()

        return tt, zz, profiles
Example #24
0
    def get_seasonal_clim_field(self,
                                start_year=None,
                                end_year=None,
                                season_to_months=None,
                                varname="sosstsst",
                                level_index=0):
        """
        Get seasonal mean climatology for a field
        :param start_year:
        :param end_year:
        :param season_to_months:
        :param varname:
        """
        if start_year is None:
            start_year = min(self.year_to_path.keys())

        if end_year is None:
            end_year = max(self.year_to_path.keys())

        # Set up month to season relation
        month_to_season = defaultdict(lambda: "no-season")
        for m in range(1, 13):
            for s, months in season_to_months.items():
                if m in months:
                    month_to_season[m] = s
                    break

        season_to_field_list = defaultdict(list)
        for y in range(start_year, end_year + 1):
            fpath = self.year_to_path[y]

            with MFDataset(fpath) as ds:

                data_var = ds.variables[varname]

                if len(data_var.shape) == 3:
                    nt, ny, nx = data_var.shape
                    data = data_var[:]
                elif len(data_var.shape) == 4:
                    nt, nz, ny, nx = data_var.shape
                    data = data_var[:, level_index, :, :]
                else:
                    raise Exception(
                        "Do not know how to handle {}-dimensional fields".
                        format(len(data_var.shape)))

                time_var = ds.variables["time_counter"]

                dates = num2date(time_var[:], time_var.units)

                panel = pd.Panel(data=data,
                                 items=dates,
                                 major_axis=range(ny),
                                 minor_axis=range(nx))

                seas_mean = panel.groupby(lambda d: month_to_season[d.month],
                                          axis="items").mean()

                for the_season in seas_mean:
                    season_to_field_list[the_season].append(
                        seas_mean[the_season].values)

        result = {}
        for the_season, field_list in season_to_field_list.items():
            mean_field = np.mean(field_list, axis=0).transpose()
            print(mean_field.shape)

            result[the_season] = np.ma.masked_where(~self.lake_mask,
                                                    mean_field)

        return result
Example #25
0
    def get_seasonal_clim_field_for_dates(
            self,
            start_year=None,
            end_year=None,
            season_to_months=None,
            varname="sosstsst",
            level_index=0,
            season_to_selected_dates: dict = None):
        """

        :param start_year: 
        :param end_year: 
        :param season_to_months: 
        :param varname: 
        :param level_index: 
        :param season_to_selected_dates: 
        :return: {season: (clim, std, nobs)} 
        """

        # presort selected dates
        for season in season_to_selected_dates.keys():
            season_to_selected_dates[season] = sorted(
                season_to_selected_dates[season])

        def __check_if_date_isinlist(d1, dlist):
            """

            :param d1: 
            :param dlist: (should be sorted ascending) 
            :return: 
            """

            if len(dlist) >= 2:
                if d1 < dlist[0] or d1 > dlist[-1]:
                    return False

            return datetime(d1.year, d1.month, d1.day) in dlist

        if start_year is None:
            start_year = min(self.year_to_path.keys())

        if end_year is None:
            end_year = max(self.year_to_path.keys())

        # Set up month to season relation
        month_to_season = defaultdict(lambda: "no-season")
        for m in range(1, 13):
            for s, months in season_to_months.items():
                if m in months:
                    month_to_season[m] = s
                    break

        # selection of the dates of interest for a season
        def __get_selected_dates_for_month(month):
            aseason = month_to_season[month]
            if aseason in season_to_selected_dates:
                return season_to_selected_dates[aseason]
            return []

        season_to_field_list = defaultdict(list)
        for y in range(start_year, end_year + 1):
            fpath = self.year_to_path[y]

            with MFDataset(fpath) as ds:

                data_var = ds.variables[varname]

                if len(data_var.shape) == 3:
                    nt, ny, nx = data_var.shape
                    data = data_var[:]
                elif len(data_var.shape) == 4:
                    nt, nz, ny, nx = data_var.shape
                    data = data_var[:, level_index, :, :]
                else:
                    raise Exception(
                        "Do not know how to handle {}-dimensional fields".
                        format(len(data_var.shape)))

                time_var = ds.variables["time_counter"]

                dates = num2date(time_var[:], time_var.units)

                panel = pd.Panel(data=data,
                                 items=dates,
                                 major_axis=range(ny),
                                 minor_axis=range(nx))

                seas_mean = panel.groupby(
                    lambda d: month_to_season[d.month]
                    if __check_if_date_isinlist(
                        d, __get_selected_dates_for_month(d.month)
                    ) else "no-season",
                    axis="items").mean()

                print(seas_mean)

                for the_season in seas_mean:
                    season_to_field_list[the_season].append(
                        seas_mean[the_season].values)

        result = {}
        for the_season, field_list in season_to_field_list.items():
            mean_field = np.mean(field_list, axis=0).transpose()
            std_field = np.std(field_list, axis=0).transpose()
            nobs = len(field_list)

            print(mean_field.shape)

            result[the_season] = (np.ma.masked_where(~self.lake_mask,
                                                     mean_field), std_field,
                                  nobs)

        return result
Example #26
0
    def get_seasonal_clim_cross_section_with_ttest_data(
            self,
            start_year=None,
            end_year=None,
            season_to_months=None,
            varname="votemper",
            start_point=None,
            end_point=None):
        """

        :param start_year:
        :param end_year:
        :param season_to_months:
        :param varname:
        :param start_point:
        :param end_point:
        """

        if start_year is None:
            start_year = min(self.year_to_path.keys())

        if end_year is None:
            end_year = max(self.year_to_path.keys())

        # Set up month to season relation
        month_to_season = defaultdict(lambda: "no-season")
        for m in range(1, 13):
            for s, months in season_to_months.items():
                if m in months:
                    month_to_season[m] = s
                    break

        season_to_field_list = defaultdict(list)
        for y in range(start_year, end_year + 1):
            fpath = self.year_to_path[y]

            with MFDataset(fpath) as ds:

                data_var = ds.variables[varname]

                assert data_var.ndim == 4

                data = data_var[:]  # (t, z, y, x)

                nt, nz, ny, nx = data.shape

                time_var = ds.variables["time_counter"]

                dates = num2date(time_var[:], time_var.units)

                panel = pd.Panel4D(data=data,
                                   labels=dates,
                                   items=range(nz),
                                   major_axis=range(ny),
                                   minor_axis=range(nx))

                seas_mean = panel.groupby(lambda d: month_to_season[d.month],
                                          axis="labels").mean()

                print(seas_mean)

                for the_season in seas_mean:
                    season_to_field_list[the_season].append(
                        seas_mean[the_season].values)

        result = {}
        for the_season, field_list in season_to_field_list.items():
            mean_field = np.mean(field_list, axis=0).transpose((0, 2, 1))
            std_field = np.std(field_list, axis=0).transpose((0, 2, 1))
            nobs = len(field_list)

            print(mean_field.shape)

            result[the_season] = (np.ma.masked_where(~self.lake_mask,
                                                     mean_field), std_field,
                                  nobs)

        return result
 def test_get_by_mfdataset(self):
     """testing multi-file get_variables_by_attributes."""
     f = MFDataset(self.files, check=True)
     assert f.get_variables_by_attributes(axis='T') == []
     f.get_variables_by_attributes(units='zlotys')[0] == f['x']
     f.close()
# Ocean heat capacity (ocean_core/ocean_parameters.F90)
cp_ocean = 3992.10322329649

# Read 'descriptor' and 'years' from external file
f = open("files.txt")
for line in f.readlines():
  exec(line.lstrip())
f.close()
model_label = "%s (%s)" % (descriptor,years)

# TMPDIR where input files are located
tmpdir = "./"

# Open input files
#fstatic = Dataset(tmpdir+'19000101.ocean_geometry.nc', 'r')
fstatic = MFDataset(tmpdir+'*.ocean_static.nc')
ftemp   = MFDataset(tmpdir+'*.ocean_annual.nc')


# Time info
time = ftemp.variables["time"]
ntimes = len(time[:])
date = num2date(time,time.units,time.calendar.lower())
year = [d.year for d in date]
time_days = date2num(date,'days since 01-01-0001',time.calendar.lower())

# Grid info
#area = fstatic.variables["Ah"][:]
area = fstatic.variables["area_t"][:]

z = ftemp.variables["zl"][:]
Example #29
0
class HighResDataManager(object):
    def __init__(self,
                 path="",
                 vname="",
                 characteristic_scale_deg=0.01,
                 chunks=(5, 500, 500)):

        self.chunks = chunks

        try:
            self.__ds = Dataset(path)
            self.data = da.from_array(Dataset(path).variables[vname],
                                      self.chunks,
                                      lock=True)
        except OSError as err:

            import glob

            if isinstance(path, str):
                path_list = glob.glob(path)
            else:
                path_list = path

            path_list = sorted(path_list)

            self.data = [
                da.from_array(Dataset(p).variables[vname],
                              self.chunks,
                              lock=True) for p in path_list
            ]
            self.data = da.concatenate(self.data)

            try:
                self.__ds = MFDataset(path_list)
            except ValueError as verr:
                print(
                    "Warning: Could not use MFDataset from netCDF4, trying xarray"
                )

                self.__ds = xarray.concat([
                    xarray.open_dataset(p, chunks={"time": 100})
                    for p in sorted(path_list)
                ],
                                          data_vars="minimal",
                                          dim="time")

        self.missing_value = None

        if hasattr(self.__ds.variables[vname], "missing_value"):
            self.missing_value = self.__ds.variables[vname].missing_value
        else:
            self.missing_value = np.nan

        self.vname = vname

        #
        # self.data = biggus.OrthoArrayAdapter(self.ds.variables[vname])

        self.lons = None
        self.lats = None
        self.time = None

        self.time_to_index = None

        self.characteristic_scale_deg = characteristic_scale_deg

        self.__read_coordinates_and_time()
        self.__ds.close()

    def get_data_aggregated_in_space(self, chunk_size):
        return self.data.rechunk(chunks=chunk_size).map_blocks()

    def get_annual_max_with_ttest_stats_lazy(self,
                                             data,
                                             start_year=-np.Inf,
                                             end_year=np.Inf):
        """
        Get the maximum for each year, calculate clim_mean and standard deviation, to be able to use the in ttest
        :param data:
        :param start_year:
        :param end_year:
        :return (mean of ann max, std of ann max, nyears), mask
        """

        data_sel, time_sel = self.__sel_period(start_year=start_year,
                                               end_year=end_year,
                                               arr=data)

        data_sel = data_sel.rechunk((len(time_sel), ) + self.chunks.shape[1:])

        mask = np.abs(data_sel[0, :, :] - self.missing_value) < 1.0e-6

        def annual_max(block):
            tmp = block.reshape((len(time_sel), -1))

            df = pd.DataFrame(index=time_sel, data=tmp)

            return df.groupby(lambda d: d.year,
                              sort=True).max().values.reshape((-1, ) +
                                                              block.shape[1:])

        ann_max_arr = data_sel.map_blocks(annual_max)

        # get climatology and standard deviations
        ann_max_mean_clim = ann_max_arr.mean(axis=0)
        ann_max_std = ann_max_arr.std(axis=0)

        return ann_max_mean_clim, ann_max_std, ann_max_arr.shape[0], mask

    def get_daily_percenile_fields_interpolated_to(
            self,
            lons_target,
            lats_target,
            start_year=-np.Inf,
            end_year=np.Inf,
            percentile=0.5,
            rolling_mean_window_days=None):
        target_scale_deg = (lons_target[1, 1] - lons_target[0, 0] +
                            lats_target[1, 1] - lats_target[0, 0]) / 2.0

        coarsening = int(target_scale_deg / self.characteristic_scale_deg +
                         0.5)
        print("source_scale: {}\ntarget_scale: {}\ncoarsening coefficient: {}".
              format(self.characteristic_scale_deg, target_scale_deg,
                     coarsening))

        def coarsening_func(x, axis=None):
            _mask = np.less(np.abs(x - self.missing_value), 1.0e-6)

            if np.all(_mask):
                return self.missing_value * np.ma.ones(
                    _mask.shape).mean(axis=axis)

            y = np.ma.masked_where(_mask, x)

            return y.mean(axis=axis)

        # aggregate the data
        trim_excess = True
        data = da.coarsen(coarsening_func,
                          self.data,
                          axes={
                              1: coarsening,
                              2: coarsening
                          },
                          trim_excess=trim_excess)
        lons_s = da.coarsen(np.mean,
                            da.from_array(self.lons, self.chunks[1:]),
                            axes={
                                0: coarsening,
                                1: coarsening
                            },
                            trim_excess=trim_excess).compute()
        lats_s = da.coarsen(np.mean,
                            da.from_array(self.lats, self.chunks[1:]),
                            axes={
                                0: coarsening,
                                1: coarsening
                            },
                            trim_excess=trim_excess).compute()

        source_grid = list(
            zip(*lat_lon.lon_lat_to_cartesian(lons_s.flatten(),
                                              lats_s.flatten())))
        print(np.shape(source_grid))
        ktree = KDTree(source_grid)

        dists, inds = ktree.query(
            list(
                zip(*lat_lon.lon_lat_to_cartesian(lons_target.flatten(),
                                                  lats_target.flatten()))))

        perc_daily, mask = self.get_daily_percenile_fields_lazy(
            data,
            start_year=start_year,
            end_year=end_year,
            percentile=percentile,
            rolling_mean_window_days=rolling_mean_window_days)

        print("perc_daily.shape=", perc_daily.shape)

        # do the interpolation for each day
        perc_daily_interpolated = []
        for perc_field in perc_daily:
            print(perc_field.shape)
            field = np.ma.masked_where(
                mask, perc_field.compute()).flatten()[inds].reshape(
                    lons_target.shape)
            perc_daily_interpolated.append(field)

        return np.array(perc_daily_interpolated)

    # @profile
    def get_daily_percenile_fields_lazy(self,
                                        data,
                                        start_year=-np.Inf,
                                        end_year=np.Inf,
                                        percentile=0.5,
                                        rolling_mean_window_days=None):
        """
        calculate the percentile for each day of year for the specified period
        :param rolling_mean_window_days: if None[default] the rolling mean is not applied, if 1 or N - the rolling mean of 1 or N days is applied before computing the percentile
        :param percentile: ranges from 0 to 1.0
        :param data: (time, lon, lat) dask array
        :param start_year:
        :param end_year:
        :return : 365 mean fields (1 for each day of year) of <var>percentile</var> percentile, and the mask

        """
        assert isinstance(data, da.Array)

        msg = "The first dimension of data, should be time, but data.shape[0]={} and len(self.time)={}".format(
            data.shape[0], len(self.time))
        assert data.shape[0] == len(self.time), msg

        # mask the resulting fields
        epsilon = 1.0e-5

        print("missing_value = {}, isnan(..) = {}".format(
            self.missing_value, np.isnan(self.missing_value)))
        if not np.isnan(self.missing_value):
            mask = np.less_equal(np.abs(data[0, :, :] - self.missing_value),
                                 epsilon)
        else:
            mask = np.isnan(data[0, :, :].compute())

        data_sel, time_sel = data, self.time

        assert np.all(
            np.equal(sorted(time_sel),
                     time_sel)), "Time vector does not appear to be sorted"

        print("start rechunking")

        initial_chunks = tuple(data_sel.chunks)
        data_sel = data_sel.rechunk((len(time_sel), ) + data_sel.chunks[1:])
        print("finish rechunking: {} ---> {}".format(initial_chunks,
                                                     data_sel.chunks))

        perc = data_sel.map_blocks(
            clim_day_percentile_calculator,
            time_sel,
            dtype=np.float32,
            rolling_mean_window_days=rolling_mean_window_days,
            percentile=percentile,
            start_year=start_year,
            end_year=end_year,
            missing_value=self.missing_value)

        return perc, mask

    def get_seasonal_means_with_ttest_stats_interpolated_to(
            self,
            lons_target,
            lats_target,
            season_to_monthperiod=None,
            start_year=-np.Inf,
            end_year=np.Inf,
            convert_monthly_accumulators_to_daily=False):
        """

        :param lons_target, lats_target: 2d arrays of target longitudes and latitudes
        :param season_to_monthperiod:
        :param start_year:
        :param end_year:
        :param convert_monthly_accumulators_to_daily: if true converts monthly accumulators to daily,
        :return dict(season: [mean, std, nobs])


        # coarsen the data and coordinates to the target scale and interpolate using nearest neighbours
        """

        target_scale_deg = (lons_target[1, 1] - lons_target[0, 0] +
                            lats_target[1, 1] - lats_target[0, 0]) / 2.0

        coarsening = int(target_scale_deg / self.characteristic_scale_deg +
                         0.5)
        print("source_scale: {}\ntarget_scale: {}\ncoarsening coefficient: {}".
              format(self.characteristic_scale_deg, target_scale_deg,
                     coarsening))

        def coarsening_func(x, axis=None):
            _mask = np.less(np.abs(x - self.missing_value), 1.0e-6)

            if np.all(_mask):
                return self.missing_value * np.ma.ones(
                    _mask.shape).mean(axis=axis)

            y = np.ma.masked_where(_mask, x)

            return y.mean(axis=axis)

        # aggregate the data
        trim_excess = True
        data = da.coarsen(coarsening_func,
                          self.data,
                          axes={
                              1: coarsening,
                              2: coarsening
                          },
                          trim_excess=trim_excess)
        lons_s = da.coarsen(np.mean,
                            da.from_array(self.lons, self.chunks[1:]),
                            axes={
                                0: coarsening,
                                1: coarsening
                            },
                            trim_excess=trim_excess).compute()
        lats_s = da.coarsen(np.mean,
                            da.from_array(self.lats, self.chunks[1:]),
                            axes={
                                0: coarsening,
                                1: coarsening
                            },
                            trim_excess=trim_excess).compute()

        source_grid = list(
            zip(*lat_lon.lon_lat_to_cartesian(lons_s.flatten(),
                                              lats_s.flatten())))
        print(np.shape(source_grid))
        ktree = KDTree(source_grid)

        dists, inds = ktree.query(
            list(
                zip(*lat_lon.lon_lat_to_cartesian(lons_target.flatten(),
                                                  lats_target.flatten()))))

        print("data.shape = ", data.shape)
        result, mask = self.__get_seasonal_means_with_ttest_stats_dask_lazy(
            data,
            season_to_monthperiod=season_to_monthperiod,
            start_year=start_year,
            end_year=end_year,
            convert_monthly_accumulators_to_daily=
            convert_monthly_accumulators_to_daily)

        # invoke the computations and interpolate the result
        for season in result:
            print("Computing for {}".format(season))
            for i in range(len(result[season]) - 1):

                result[season][i] = np.ma.masked_where(
                    mask, result[season][i].compute()).flatten()[inds].reshape(
                        lons_target.shape)

        return result

    def __read_coordinates_and_time(self):

        coord_name_tokens = ["lon", "lat", "time"]

        for nc_vname, nc_var in self.__ds.variables.items():
            vname_lc = nc_vname.lower()

            print(vname_lc, type(vname_lc))

            print(nc_var)

            skip = False
            # avoid loading large variables
            if nc_var.ndim > 2:
                skip = True

            print(nc_var.ndim, nc_var.shape)

            # avoid variables which do not contain lon, lat or time
            if not skip:
                present = False
                for t in coord_name_tokens:
                    present = present or (t in vname_lc)

                skip = not present

            if skip:
                print("Skipping {}".format(vname_lc))
                continue

            # make sure that this is really a numpy array
            data = nc_var[:]
            if hasattr(data, "values"):
                data = data.values

            if "lon" in vname_lc:
                self.lons = data
            elif "lat" in vname_lc:
                self.lats = data

            elif "time" in vname_lc and "bnds" not in vname_lc:
                # check if the time data are already in some kind of date objects
                if isinstance(nc_var, xarray.IndexVariable):
                    self.time = data
                else:
                    if not hasattr(nc_var, "calendar"):
                        self.time = num2date(data, nc_var.units)
                    else:
                        print(
                            "Found the calendar attribute, using calendar={}".
                            format(nc_var.calendar))
                        self.time = num2date(data,
                                             nc_var.units,
                                             calendar=nc_var.calendar)

        if self.lons.ndim == 1:
            self.lats, self.lons = np.meshgrid(self.lats, self.lons)

        if self.lons.shape != self.data.shape[1:]:
            print(
                "Transposing data, since self.lons.shape={} and self.data.shape={}"
                .format(self.lons.shape, self.data.shape))

            print(type(self.data))
            self.data = self.data.transpose(axes=[0, 2, 1])

    def get_seasonal_means_with_ttest_stats(
            self,
            season_to_monthperiod=None,
            start_year=None,
            end_year=None,
            convert_monthly_accumulators_to_daily=False):
        """

        :param season_to_monthperiod:
        :param start_year:
        :param end_year:
        :param convert_monthly_accumulators_to_daily: if true converts monthly accumulators to daily,
        :return dict(season: [mean, std, nobs])
        """

        if True:
            raise NotImplementedError(
                "Biggus way of calculation is not implemented, use the dask version of the method"
            )

        # select the interval of interest
        timesel = [
            i for i, d in enumerate(self.time)
            if start_year <= d.year <= end_year
        ]
        data = self.data[timesel, :, :]
        times = [self.time[i] for i in timesel]

        if convert_monthly_accumulators_to_daily:
            ndays = np.array(
                [calendar.monthrange(d.year, d.month)[1] for d in times])

            data = biggus.divide(data, ndays[:, np.newaxis, np.newaxis])

        else:
            data = self.data

        year_month_to_index_arr = defaultdict(list)
        for i, t in enumerate(times):
            year_month_to_index_arr[t.year, t.month].append(i)

        # calculate monthly means
        monthly_data = {}
        for y in range(start_year, end_year + 1):
            for m in range(1, 13):
                aslice = slice(year_month_to_index_arr[y, m][0],
                               year_month_to_index_arr[y, m][-1] + 1)
                monthly_data[y, m] = biggus.mean(
                    data[aslice.start:aslice.stop, :, :], axis=0)

        result = {}
        for season, month_period in season_to_monthperiod.items():
            assert isinstance(month_period, MonthPeriod)

            seasonal_means = []
            ndays_per_season = []

            for p in month_period.get_season_periods(start_year=start_year,
                                                     end_year=end_year):
                lmos = biggus.ArrayStack([
                    monthly_data[start.year, start.month]
                    for start in p.range("months")
                ])
                ndays_per_month = np.array([
                    calendar.monthrange(start.year, start.month)[1]
                    for start in p.range("months")
                ])

                seasonal_mean = biggus.sum(biggus.multiply(
                    lmos, ndays_per_month[:, np.newaxis, np.newaxis]),
                                           axis=0)
                seasonal_mean = biggus.divide(seasonal_mean,
                                              ndays_per_month.sum())

                seasonal_means.append(seasonal_mean)
                ndays_per_season.append(ndays_per_month.sum())

            seasonal_means = biggus.ArrayStack(seasonal_means)
            ndays_per_season = np.array(ndays_per_season)

            print(seasonal_means.shape, ndays_per_season.shape)

            assert seasonal_means.shape[0] == ndays_per_season.shape[0]

            clim_mean = biggus.sum(biggus.multiply(
                seasonal_means, ndays_per_season[:, np.newaxis, np.newaxis]),
                                   axis=0) / ndays_per_season.sum()

            diff = biggus.subtract(seasonal_means,
                                   clim_mean.masked_array()[np.newaxis, :, :])
            sq_mean = biggus.sum(biggus.multiply(
                diff**2, ndays_per_season[:, np.newaxis, np.newaxis]),
                                 axis=0) / ndays_per_season.sum()
            clim_std = biggus.power(sq_mean, 0.5)

            clim_mean = clim_mean.masked_array()
            print("calculated mean")
            clim_std = clim_std.masked_array()
            print("calculated std")

            result[season] = [clim_mean, clim_std, ndays_per_season.shape[0]]

        return result

    def __sel_period(self, start_year, end_year, arr):
        timesel = [
            i for i, d in enumerate(self.time)
            if start_year <= d.year <= end_year
        ]
        data = arr[timesel]
        times = [self.time[i] for i in timesel]
        return data, times

    def __get_seasmean_cache_file(self,
                                  season_to_month_period,
                                  start_year=-np.Inf,
                                  end_year=np.Inf):
        seas_tok = "_".join(season_to_month_period)
        year_tok = "{}-{}".format(start_year, end_year)

        return "DAYMET_seas__{}__{}.bin".format(seas_tok, year_tok)

    def __get_seasonal_means_with_ttest_stats_dask_lazy(
            self,
            data,
            season_to_monthperiod=None,
            start_year=-np.Inf,
            end_year=np.Inf,
            convert_monthly_accumulators_to_daily=False):

        # mask the resulting fields
        epsilon = 1.0e-5
        mask = np.less_equal(np.abs(data[0, :, :] - self.missing_value),
                             epsilon)

        print("data.shape = ", data.shape)

        data_sel, times_sel = data, self.time

        # select the interval of interest

        if convert_monthly_accumulators_to_daily:
            ndays = da.from_array(
                np.array([
                    calendar.monthrange(d.year, d.month)[1] for d in times_sel
                ]), (100, ))
            ndays = da.transpose(da.broadcast_to(
                da.from_array(ndays, ndays.shape),
                data_sel.shape[1:] + ndays.shape),
                                 axes=(2, 0, 1))

            data_sel = data_sel / ndays

        year_month_to_index_arr = defaultdict(list)
        for i, t in enumerate(times_sel):
            year_month_to_index_arr[t.year, t.month].append(i)

        # calculate monthly means
        monthly_data = {}
        for y in range(start_year, end_year + 1):
            for m in range(1, 13):
                aslice = slice(year_month_to_index_arr[y, m][0],
                               year_month_to_index_arr[y, m][-1] + 1)
                print(aslice, data_sel.shape)
                monthly_data[y, m] = data_sel[aslice, :, :].mean(axis=0)

        result = OrderedDict()
        for season, month_period in season_to_monthperiod.items():
            assert isinstance(month_period, MonthPeriod)

            seasonal_means = []
            ndays_per_season = []

            for p in month_period.get_season_periods(start_year=start_year,
                                                     end_year=end_year):
                lmos = da.stack([
                    monthly_data[start.year, start.month]
                    for start in p.range("months")
                ])
                ndays_per_month = np.array([
                    calendar.monthrange(start.year, start.month)[1]
                    for start in p.range("months")
                ])
                ndays_per_month = da.from_array(ndays_per_month,
                                                ndays_per_month.shape)

                print(p)
                print(lmos.shape, ndays_per_month.shape, ndays_per_month.sum())
                seasonal_mean = da.tensordot(
                    lmos, ndays_per_month, axes=([
                        0,
                    ], [
                        0,
                    ])) / ndays_per_month.sum()

                seasonal_means.append(seasonal_mean)
                ndays_per_season.append(ndays_per_month.sum())

            seasonal_means = da.stack(seasonal_means)
            ndays_per_season = np.array(ndays_per_season)
            ndays_per_season = da.from_array(ndays_per_season,
                                             ndays_per_season.shape)

            print(seasonal_means.shape, ndays_per_season.shape)

            assert seasonal_means.shape[0] == ndays_per_season.shape[0]

            clim_mean = da.tensordot(
                seasonal_means, ndays_per_season, axes=([
                    0,
                ], [
                    0,
                ])) / ndays_per_season.sum()

            clim_std = ((seasonal_means -
                         da.broadcast_to(clim_mean, seasonal_means.shape))**2 *
                        ndays_per_season[:, np.newaxis, np.newaxis]).sum(
                            axis=0) / ndays_per_season.sum()

            clim_std = clim_std**0.5

            result[season] = [clim_mean, clim_std, ndays_per_season.shape[0]]

        return result, mask

    def get_seasonal_means_with_ttest_stats_dask(
            self,
            season_to_monthperiod=None,
            start_year=-np.Inf,
            end_year=np.Inf,
            convert_monthly_accumulators_to_daily=False):
        """

        :param season_to_monthperiod:
        :param start_year:
        :param end_year:
        :param convert_monthly_accumulators_to_daily: if true converts monthly accumulators to daily,
        :return dict(season: [mean, std, nobs])
        """

        result, mask = self.__get_seasonal_means_with_ttest_stats_dask_lazy(
            self.data,
            season_to_monthperiod=season_to_monthperiod,
            start_year=start_year,
            end_year=end_year,
            convert_monthly_accumulators_to_daily=
            convert_monthly_accumulators_to_daily)

        for season in result:
            print("Computing for {}".format(season))
            for i in range(len(result[season]) -
                           1):  # -1 because the last one is for the
                result[season][i] = np.ma.masked_where(
                    mask, result[season][i].compute())

        return result

    def close(self):
        del self
Example #30
0
def ice_comp_model_to_sat_table(pathToModel, modelYears, modelIteration,\
                               boundLat, pathToOSI, param = 'area', threshold=0.15, coast_exp=False):

    diff_array = numpy.zeros((len(modelYears), 12))

    for (nnum, yyear) in enumerate(modelYears):
        
        g  = Dataset('./grid.cdf')
        dxc = g.variables['dxc'][0,:,:]
        dyc = g.variables['dyc'][0,:,:]
        lat = g.variables['yc'][0,:,:]
        topo = g.variables['topo'][0,:,:]
        dxcXdyc = dxc*dyc
        if coast_exp==True:
            topo2 = expand_coast(topo)
        
        area_model=np.zeros((len(modelIteration), 12))
    
        if modelIteration[0] == 'last':
            gg = glob.glob(pathToModel+'/'+yyear+'/'+'it*')
            gg.sort()
            lastit = [int(gg[-1].split('/')[-1].split('t')[-1])]
        else:
            lastit = modelIteration

        for (it, iteration) in enumerate(lastit):
            fm = MFDataset(pathToModel+'/'+yyear+'/'+'it'+str(iteration)+'/fw/*.cdf')
            for mm in range(12):
                if param == 'area':
                    if expand_coast==True:
                        temp_area = fm.variables['area'][mm,:,:]
                        temp_area = np.ma.masked_array(temp_area, mask = topo2.mask)
                        area_model[it,mm] = calc_area(temp_area,\
                                                  dxcXdyc, lat, blat=boundLat, threshold=threshold)/10e11
                    else:
                        area_model[it,mm] = calc_area(fm.variables['area'][mm,:,:],\
                                              dxcXdyc, lat, blat=boundLat, threshold=threshold)/10e11
                elif param == 'extent':
                    if expand_coast==True:
                        temp_area = fm.variables['area'][mm,:,:]
                        temp_area = np.ma.masked_array(temp_area, mask = topo2.mask)
                        area_model[it,mm] = calc_extent(temp_area,\
                                              dxcXdyc, lat, blat=boundLat, threshold=threshold)/10e11
                    else:
                        area_model[it,mm] = calc_extent(fm.variables['area'][mm,:,:],\
                                              dxcXdyc, lat, blat=boundLat, threshold=threshold)/10e11
    
            fm.close()

        fsat = MFDataset(pathToOSI+yyear+'??.nc')
        
        osi_area = []
        for mm in range(12):
            if param == 'area':
                area_temp = fsat.variables['ice'][mm,:,:]
                if coast_exp==True:
                    area_temp = np.ma.masked_array(area_temp, mask = topo2.mask)
                else:
                    area_temp = np.ma.masked_array(area_temp, mask = topo.mask)
            
                area_temp = np.ma.masked_less_equal(area_temp, threshold)
                osi_area.append(calc_area(np.ma.filled(area_temp,0),\
                            dxcXdyc, lat, blat=boundLat, threshold=threshold)/10e11)
            
            elif param == 'extent':
                area_temp = fsat.variables['ice'][mm,:,:]
                if coast_exp==True:
                    area_temp = np.ma.masked_array(area_temp, mask = topo2.mask)
                else:
                    area_temp = np.ma.masked_array(area_temp, mask = topo.mask)
            
                area_temp = np.ma.masked_less_equal(area_temp, threshold)
                osi_area.append(calc_extent(np.ma.filled(area_temp,0),\
                            dxcXdyc, lat, blat=boundLat, threshold=threshold)/10e11)
        
    
        
        diff_array[nnum,:] = area_model[0,:]-osi_area[:]
    return diff_array
Example #31
0
    def __init__(self,
                 path="",
                 vname="",
                 characteristic_scale_deg=0.01,
                 chunks=(5, 500, 500)):

        self.chunks = chunks

        try:
            self.__ds = Dataset(path)
            self.data = da.from_array(Dataset(path).variables[vname],
                                      self.chunks,
                                      lock=True)
        except OSError as err:

            import glob

            if isinstance(path, str):
                path_list = glob.glob(path)
            else:
                path_list = path

            path_list = sorted(path_list)

            self.data = [
                da.from_array(Dataset(p).variables[vname],
                              self.chunks,
                              lock=True) for p in path_list
            ]
            self.data = da.concatenate(self.data)

            try:
                self.__ds = MFDataset(path_list)
            except ValueError as verr:
                print(
                    "Warning: Could not use MFDataset from netCDF4, trying xarray"
                )

                self.__ds = xarray.concat([
                    xarray.open_dataset(p, chunks={"time": 100})
                    for p in sorted(path_list)
                ],
                                          data_vars="minimal",
                                          dim="time")

        self.missing_value = None

        if hasattr(self.__ds.variables[vname], "missing_value"):
            self.missing_value = self.__ds.variables[vname].missing_value
        else:
            self.missing_value = np.nan

        self.vname = vname

        #
        # self.data = biggus.OrthoArrayAdapter(self.ds.variables[vname])

        self.lons = None
        self.lats = None
        self.time = None

        self.time_to_index = None

        self.characteristic_scale_deg = characteristic_scale_deg

        self.__read_coordinates_and_time()
        self.__ds.close()
Example #32
0
dates = num2date(times[:], units=times.units, calendar=times.calendar)
print('dates corresponding to time values:\\n', dates)

rootgrp.close()

# create a series of netCDF files with a variable sharing
# the same unlimited dimension.
for nfile in range(10):
    f = Dataset('mftest' + repr(nfile) + '.nc', 'w', format='NETCDF4_CLASSIC')
    f.createDimension('x', None)
    x = f.createVariable('x', 'i', ('x', ))
    x[0:10] = numpy.arange(nfile * 10, 10 * (nfile + 1))
    f.close()
# now read all those files in at once, in one Dataset.
from netCDF4 import MFDataset
f = MFDataset('mftest*nc')
print(f.variables['x'][:])

# example showing how to save numpy complex arrays using compound types.
f = Dataset('complex.nc', 'w')
size = 3  # length of 1-d complex array
# create sample complex data.
datac = numpy.exp(1j * (1. + numpy.linspace(0, numpy.pi, size)))
print(datac.dtype)
# create complex128 compound data type.
complex128 = numpy.dtype([('real', numpy.float64), ('imag', numpy.float64)])
complex128_t = f.createCompoundType(complex128, 'complex128')
# create a variable with this data type, write some data to it.
f.createDimension('x_dim', None)
v = f.createVariable('cmplx_var', complex128_t, 'x_dim')
data = numpy.empty(size, complex128)  # numpy structured array
Example #33
0
        compressedFile.write(ftpf.read())
        compressedFile.flush()
    else:
        if args.verbose > 0: print('using cache ' + cachedpath)
        compressedFile = open(cachedpath, 'r+b')
    compressedFile.seek(0)
    decompressedFile = gzip.GzipFile(fileobj=compressedFile, mode='rb')
    diskf = tempfile.NamedTemporaryFile('w+b')  
    infiles.append(diskf)  
    diskf.write(decompressedFile.read())
    diskf.flush()
    if args.verbose > 0:
        print(time, 'end')


ncff = MFDataset([inf.name for inf in infiles], 'r')
lat = ncff.variables['latitude'][:]
lon = ncff.variables['longitude'][:]
points = zip(lon, lat)
found_point_ids = []
for pi, point in enumerate(points):
    isin = prep_bounds.contains(Point(*point))
    if isin:
        found_point_ids.append(pi)
        if args.verbose > 1:
            print(point, isin)
    elif args.verbose > 2:
            print(point, isin)

varkeys = ['temperature', 'windDir', 'windSpeed', 'dewpoint', 'altimeter']
vardds = [k + 'DD' for k in varkeys]
    def __init__(self, filename=None, name=None):

        if filename is None:
            raise ValueError('Need filename as argument to constructor')
        filestr = str(filename)
        if name is None:
            self.name = filestr
        else:
            self.name = name

        # Due to misspelled standard_name in
        # some (Akvaplan-NIVA) FVCOM files
        variable_aliases = {
            'eastward_sea_water_velocity': 'x_sea_water_velocity',
            'Northward_sea_water_velocity': 'y_sea_water_velocity',
            'eastward wind': 'x_wind',
            'northward wind': 'y_wind'
        }

        # Mapping FVCOM variable names to CF standard_name
        fvcom_mapping = {
            'um': 'x_sea_water_velocity',
            'vm': 'y_sea_water_velocity'
        }

        self.return_block = True

        try:
            # Open file, check that everything is ok
            logging.info('Opening dataset: ' + filestr)
            if ('*' in filestr) or ('?' in filestr) or ('[' in filestr):
                logging.info('Opening files with MFDataset')
                self.Dataset = MFDataset(filename)
            else:
                logging.info('Opening file with Dataset')
                self.Dataset = Dataset(filename, 'r')
        except Exception as e:
            raise ValueError(e)

        # We are reading and using lon/lat arrays,
        # and not any projected coordinates
        self.proj4 = '+proj=latlong'

        logging.debug('Finding coordinate variables.')
        # Find x, y and z coordinates
        for var_name in self.Dataset.variables:
            var = self.Dataset.variables[var_name]
            if var.ndim > 1:
                continue  # Coordinates must be 1D-array
            attributes = var.ncattrs()
            standard_name = ''
            long_name = ''
            axis = ''
            units = ''
            CoordinateAxisType = ''
            if 'standard_name' in attributes:
                standard_name = var.__dict__['standard_name']
            if 'long_name' in attributes:
                long_name = var.__dict__['long_name']
            if 'axis' in attributes:
                axis = var.__dict__['axis']
            if 'units' in attributes:
                units = var.__dict__['units']
            if '_CoordinateAxisType' in attributes:
                CoordinateAxisType = var.__dict__['_CoordinateAxisType']
            if standard_name == 'longitude' or \
                    long_name == 'longitude' or \
                    var_name == 'longitude' or \
                    axis == 'X' or \
                    CoordinateAxisType == 'Lon' or \
                    standard_name == 'projection_x_coordinate':
                self.xname = var_name
                # Fix for units; should ideally use udunits package
                if units == 'km':
                    unitfactor = 1000
                else:
                    unitfactor = 1
                x = var[:] * unitfactor
                self.unitfactor = unitfactor
                self.numx = var.shape[0]
            if standard_name == 'latitude' or \
                    long_name == 'latitude' or \
                    var_name == 'latitude' or \
                    axis == 'Y' or \
                    CoordinateAxisType == 'Lat' or \
                    standard_name == 'projection_y_coordinate':
                self.yname = var_name
                # Fix for units; should ideally use udunits package
                if units == 'km':
                    unitfactor = 1000
                else:
                    unitfactor = 1
                y = var[:] * unitfactor
                self.numy = var.shape[0]
            if standard_name == 'depth' or axis == 'Z':
                if 'positive' not in var.ncattrs() or \
                        var.__dict__['positive'] == 'up':
                    self.z = var[:]
                else:
                    self.z = -var[:]
            if standard_name == 'time' or axis == 'T' or var_name == 'time':
                # Read and store time coverage (of this particular file)
                time = var[:]
                time_units = units
                self.times = num2date(time, time_units)
                self.start_time = self.times[0]
                self.end_time = self.times[-1]
                if len(self.times) > 1:
                    self.time_step = self.times[1] - self.times[0]
                else:
                    self.time_step = None

        if 'x' not in locals():
            raise ValueError('Did not find x-coordinate variable')
        if 'y' not in locals():
            raise ValueError('Did not find y-coordinate variable')

        self.lon = x
        self.lat = y

        # Find all variables having standard_name
        self.variable_mapping = {}
        for var_name in self.Dataset.variables:
            if var_name in [self.xname, self.yname, 'depth']:
                continue  # Skip coordinate variables
            var = self.Dataset.variables[var_name]
            attributes = var.ncattrs()
            if 'standard_name' in attributes:
                standard_name = str(var.__dict__['standard_name'])
                if standard_name in variable_aliases:  # Mapping if needed
                    standard_name = variable_aliases[standard_name]
                self.variable_mapping[standard_name] = str(var_name)
            elif var_name in fvcom_mapping:
                self.variable_mapping[fvcom_mapping[var_name]] = \
                    str(var_name)

        self.variables = self.variable_mapping.keys()

        self.xmin = self.lon.min()
        self.xmax = self.lon.max()
        self.ymin = self.lat.min()
        self.ymax = self.lat.max()

        # Run constructor of parent Reader class
        super(Reader, self).__init__()
Example #35
0
 def _read_tcoord(self):
     """ Read time coordinate information from netcdf file(s) """
     nc = MFDataset(self.f)
     t = nc.variables[self.tcoord]
     self.dates = num2date(MFTime(t)[:], calendar=t.calendar, units=t.units)
keeptmax = False
if options.tx90pc or options.tx90pcd:
    keeptmax = True
keeptmin = False
if options.tn90pc or options.tn90pcd:
    keeptmin = True
keeptave = True
if options.noehf:
    keeptave = False


if options.verbose:
    print "Loading data"
# Load time data
try:
    tmaxnc = MFDataset(options.tmaxfile, "r")
except IndexError:
    tmaxnc = Dataset(options.tmaxfile, "r")
nctime = tmaxnc.variables[options.timevname]
try:
    nctime = MFTime(nctime)
except AttributeError:
    pass
except ValueError:
    pass
calendar = nctime.calendar
if not calendar:
    print "Unrecognized calendar. Using gregorian."
    calendar = "gregorian"
elif calendar == "360_day":
    daysinyear = 360
Example #37
0
class CRUDataManager:
    def __init__(self, path="/RECH/skynet1_rech3/huziy/cru_data/CRUTS3.1/cru_ts_3_10.1901.2009.tmp.dat.nc",
                 var_name="tmp", lazy=False):

        self.times = None
        self.var_data = None

        self.times_var = None
        self.kdtree = None
        self.times_num = None
        self.lons2d, self.lats2d = None, None

        self.lazy = lazy
        self.var_name = var_name



        try:
            with Dataset(path) as ds:
                self._init_fields(ds)

            # Cannot go into with, since it needs to be open
            self.nc_dataset = Dataset(path)

        except OSError as oserr:
            with MFDataset(path) as ds:
                self._init_fields(ds)

            # Cannot go into with, since it needs to be open
            self.nc_dataset = MFDataset(path)


        self.nc_vars = ds.variables


    def close(self):
        self.nc_vars = None
        self.nc_dataset.close()
        del self


    def _init_fields(self, nc_dataset):
        nc_vars = nc_dataset.variables
        lons = nc_vars["lon"][:]
        lats = nc_vars["lat"][:]


        if lons.ndim == 1:
            lats2d, lons2d = np.meshgrid(lats, lons)
        elif lons.ndim == 2:
            lats2d, lons2d = lats, lons
        else:
            raise NotImplementedError("Cannot handle {}-dimensional coordinates".format(lons.ndim))


        self.lons2d, self.lats2d = lons2d, lats2d

        self.times_var = nc_vars["time"]
        self.times_num = nc_vars["time"][:]

        if hasattr(self.times_var, "calendar"):
            self.times = num2date(self.times_num, self.times_var.units, self.times_var.calendar)
        else:
            self.times = num2date(self.times_num, self.times_var.units)


        if not self.lazy:

            self.var_data = nc_vars[self.var_name][:]
            if nc_vars[self.var_name].shape[1:] != self.lons2d.shape:
                print("nc_vars[self.var_name].shape = {}".format(nc_vars[self.var_name].shape))
                self.var_data = np.transpose(self.var_data, axes=[0, 2, 1])


        x_in, y_in, z_in = lat_lon.lon_lat_to_cartesian(self.lons2d.flatten(), self.lats2d.flatten())
        self.kdtree = cKDTree(list(zip(x_in, y_in, z_in)))








    def get_seasonal_means_with_ttest_stats_interp_to(self, lons2d=None, lats2d=None,
                                                      season_to_monthperiod=None, start_year=None, end_year=None):

        #TODO: implement
        pass





    def get_seasonal_means_with_ttest_stats(self, season_to_monthperiod=None, start_year=None, end_year=None):
        """
        Note: the periods of different seasons should not overlap.


        precip are converted to mm/day before the mean and std calculations

        :param season_to_monthperiod: 
        :param start_year: 
        :param end_year:
        :return dict(season: [mean, std, nobs])
        """

        nt, nx, ny = self.var_data.shape
        panel = pandas.DataFrame(data=self.var_data.reshape(nt, -1), index=self.times)
        panel = panel[(panel.index.year >= start_year) & (panel.index.year <= end_year)]

        # Calculate monthly means, convert precip to mm/day
        if self.var_name.lower() in ["pre"]:
            monthly_panel = panel.groupby([panel.index.year, panel.index.month]).sum()

            monthly_panel = monthly_panel / monthly_panel.index.map(lambda ym: calendar.monthrange(*ym)[1])[:, np.newaxis]

        else:
            monthly_panel = panel.groupby([panel.index.year, panel.index.month]).mean()



        print("monthly panel:")
        print(monthly_panel.describe())

        season_to_res = OrderedDict()

        for season, month_period in season_to_monthperiod.items():
            assert isinstance(month_period, MonthPeriod)

            print("{} ------- (months: {}) ".format(season, month_period.months))

            ym_to_period = month_period.get_year_month_to_period_map(start_year=start_year, end_year=end_year)
            # print(ym_to_period)

            # select data for the seasons of interest
            monthly_panel_tmp = monthly_panel.select(lambda ym: (ym[1] in month_period.months) and (ym in ym_to_period))

            # print("monthly_panel_tmp, afterselect: {}".format(monthly_panel_tmp))

            days_per_month = monthly_panel_tmp.index.map(lambda ym: calendar.monthrange(*ym)[1])


            monthly_panel_tmp = monthly_panel_tmp * days_per_month[:, np.newaxis]

            seasonal_groups = monthly_panel_tmp.groupby(lambda ym: (ym_to_period[ym].start,  ym_to_period[ym].end))

            nobs = len(seasonal_groups)




            seasonal_means = []
            days_per_season = []


            for kv, gv in seasonal_groups:
                # print(kv, "---->", gv)

                # calculate seasonal mean for each year
                ndays = (Pendulum.instance(kv[1]).add(microseconds=1) - Pendulum.instance(kv[0])).total_days()  # because the end of each period is 1 microsecond before midnight
                seas_mean = gv.sum(axis=0) / ndays

                seasonal_means.append(seas_mean.values)
                days_per_season.append(ndays)



            seasonal_means = np.array(seasonal_means)
            days_per_season = np.array(days_per_season)

            # calculate climatological mean
            clim_mean = (seasonal_means * days_per_season[:, np.newaxis]).sum(axis=0) / days_per_season.sum()



            # calculate interannual std
            clim_std = (((seasonal_means - clim_mean) ** 2 * days_per_season[:, np.newaxis]).sum(axis=0) / days_per_season.sum()) ** 0.5


            # reshape back to the 2d field
            clim_mean = clim_mean.reshape(nx, ny)
            clim_std = clim_std.reshape(nx, ny)

            spatial_mask = (self.var_data[0] > 1e10) | np.isnan(self.var_data[0])

            if hasattr(self.var_data, "mask"):
                spatial_mask = spatial_mask | self.var_data[0].mask


            clim_mean = np.ma.masked_where(spatial_mask, clim_mean)
            clim_std = np.ma.masked_where(spatial_mask, clim_std)


            print(season)
            print("clim_mean.shape={}".format(clim_mean.shape))
            print("clim_std.shape={}".format(clim_std.shape))


            season_to_res[season] = [clim_mean, clim_std, nobs]

        return season_to_res




    def get_seasonal_means(self, season_name_to_months=None, start_year=None, end_year=None):
        if season_name_to_months is None:
            season_name_to_months = OrderedDict([
                ("Winter", (1, 2, 12)),
                ("Spring", list(range(3, 6))),
                ("Summer", list(range(6, 9))),
                ("Fall", list(range(9, 12)))])

        season_name_to_coef = {}
        for sname, months in season_name_to_months.items():
            season_name_to_coef[sname] = 1

            if self.var_name.lower() in ["pre", "precip"]:
                days = sum([calendar.monthrange(y, m)[1] for m in months for y in range(start_year, end_year + 1)])
                season_name_to_coef[sname] = 1.0 / float(days)

        month_to_season = collections.defaultdict(lambda: "no_season")
        for sname, mlist in season_name_to_months.items():
            for m in mlist:
                month_to_season[m] = sname

        if self.var_data is None:
            self.var_data = self.nc_dataset.variables[self.var_name][:]
            if self.var_name.lower() not in ["swe"]:
                if self.var_data.shape != self.lons2d.shape:
                    self.var_data = np.transpose(self.var_data, axes=[0, 2, 1])

        nt, nx, ny = self.var_data.shape
        panel = pandas.Panel(data=self.var_data, items=self.times, major_axis=list(range(nx)),
                             minor_axis=list(range(ny)))
        panel = panel.select(lambda d: start_year <= d.year <= end_year)

        if self.var_name in ["pre", "precip"]:
            panel_seasonal = panel.groupby(lambda d: month_to_season[d.month], axis="items").sum()
        else:
            panel_seasonal = panel.groupby(lambda d: month_to_season[d.month], axis="items").mean()

        season_to_mean = OrderedDict()
        for sname, _ in season_name_to_months.items():
            season_to_mean[sname] = panel_seasonal[sname].values * season_name_to_coef[sname]
            if hasattr(self.var_data[0], "mask"):
                season_to_mean[sname] = np.ma.masked_where(self.var_data[0].mask, season_to_mean[sname])

        return season_to_mean


    def get_mean(self, start_year, end_year, months=None):
        """
        returns the mean for the period [start_year, end_year], over the months
        :type months: list
        months = list of month numbers over which the averaging is done
        """

        if months is None:
            months = list(range(1, 13))

        start_date = datetime(start_year, 1, 1)
        end_date = datetime(end_year + 1, 1, 1)

        start_date_num = date2num(start_date, self.times_var.units)
        end_date_num = date2num(end_date, self.times_var.units)

        sel_query = (self.times_num >= start_date_num) & (self.times_num < end_date_num)
        sel_dates = self.times_num[sel_query]
        sel_data = np.transpose(self.nc_vars[self.var_name][sel_query, :, :], axes=[0, 2, 1])

        sel_dates = num2date(sel_dates, self.times_var.units)

        ind_vector = np.where([(x.month in months) for x in sel_dates])[0]
        return np.mean(sel_data[ind_vector, :, :], axis=0)


    def get_daily_climatology_dataframe(self, start_year, end_year, stamp_year=2001):
        """
        returns a pandas dataframe (365, nx, ny) with daily climatological means
        """
        nt, nx, ny = self.var_data.shape
        data_panel = pandas.Panel(data=self.var_data, items=self.times, major_axis=list(range(nx)),
                                  minor_axis=list(range(ny)))
        data_panel = data_panel.select(
            lambda d: (start_year <= d.year <= end_year) and not (d.day == 29 and d.month == 2))

        data_panel = data_panel.groupby(lambda d: datetime(stamp_year, d.month, d.day), axis="items").mean()
        assert isinstance(data_panel, pandas.Panel)
        data_panel = data_panel.sort_index()
        print(data_panel.values.shape)
        return data_panel


    def get_daily_climatology(self, start_year, end_year, stamp_year=2001):
        """
        returns a numpy array of shape (365, nx, ny) with daily climatological means
        """
        return self.get_daily_climatology_dataframe(**locals()).values


    def interpolate_daily_climatology_to(self, clim_data, lons2d_target=None, lats2d_target=None):
        # expects clim_data to have the following shape (365, nx, ny)
        #        lons2d_target: (nx, ny)
        #        lats2d_target: (nx, ny)


        x, y, z = lat_lon.lon_lat_to_cartesian(lons2d_target.flatten(), lats2d_target.flatten())

        nt = clim_data.shape[0]
        data_help = np.reshape(clim_data, (nt, -1))

        dists, inds = self.kdtree.query(list(zip(x, y, z)))

        return data_help[:, inds].reshape((nt,) + lons2d_target.shape)

        pass


    def get_thawing_index_from_climatology(self, daily_temps_clim, t0=0.0):

        nt, nx, ny = daily_temps_clim.shape
        result = np.zeros((nx, ny))

        for t in range(nt):
            tfield = daily_temps_clim[t, :, :]
            result += tfield * np.array(tfield >= t0).astype(int)
        return result


    def create_monthly_means_file(self, start_year, end_year):
        fname = "{0}_monthly_means.nc".format(self.var_name)
        year_range = list(range(start_year, end_year + 1))
        dsm = Dataset(fname, "w", format="NETCDF3_CLASSIC")
        dsm.createDimension('year', len(year_range))
        dsm.createDimension("month", 12)
        dsm.createDimension('lon', self.lons2d.shape[0])
        dsm.createDimension('lat', self.lons2d.shape[1])

        lonVariable = dsm.createVariable('longitude', 'f4', ('lon', 'lat'))
        latVariable = dsm.createVariable('latitude', 'f4', ('lon', 'lat'))
        yearVariable = dsm.createVariable("year", "i4", ("year",))

        variable = dsm.createVariable(self.var_name, "f4", ('year', "month", 'lon', 'lat'))
        for i, the_year in enumerate(year_range):
            print(the_year)
            for j, the_month in enumerate(range(1, 13)):
                variable[i, j, :, :] = self.get_mean(the_year, the_year, months=[the_month])

        lonVariable[:] = self.lons2d
        latVariable[:] = self.lats2d
        yearVariable[:] = np.array(year_range)
        dsm.close()

        pass


    def _interp_and_sum(self, data1d, mults_1d, x, y, z, nneighbors=1):
        data_interp = self.interpolate_data_to_cartesian(data1d, x, y, z, nneighbours=nneighbors)
        return np.sum(mults_1d * data_interp)

    def get_monthly_timeseries_using_mask(self, mask, lons2d_target, lats2d_target, multipliers_2d, start_date=None,
                                          end_date=None):
        """
        multipliers_2d used to multiply the values when aggregating into a single timeseries
        sum(mi * vi) - in space
        """

        bool_vect = np.array([start_date <= t <= end_date for t in self.times])

        new_times = list(filter(lambda t: start_date <= t <= end_date, self.times))
        new_vals = self.var_data[bool_vect, :, :]
        x_out, y_out, z_out = lat_lon.lon_lat_to_cartesian(lons2d_target.flatten(), lats2d_target.flatten())

        print(len(new_times))
        flat_mask = mask.flatten()
        x_out = x_out[flat_mask == 1]
        y_out = y_out[flat_mask == 1]
        z_out = z_out[flat_mask == 1]
        mults = multipliers_2d.flatten()[flat_mask == 1]

        data_interp = [self._interp_and_sum(new_vals[t, :, :].flatten(), mults, x_out, y_out, z_out) for t in
                       range(len(new_times))]

        print("Interpolated data", data_interp)

        print("Interpolated all")
        return TimeSeries(time=new_times, data=data_interp).get_ts_of_monthly_means()


    def get_mean_upstream_timeseries_monthly(self, model_point, data_manager):
        """
        get mean swe upstream of the model_point

        year range for selection is in model_point.continuous_data_years() ..
        """
        assert isinstance(model_point, ModelPoint)
        assert isinstance(data_manager, Crcm5ModelDataManager)



        # create the mask of points over which the averaging is going to be done
        lons_targ = data_manager.lons2D[model_point.flow_in_mask == 1]
        lats_targ = data_manager.lats2D[model_point.flow_in_mask == 1]

        xt, yt, zt = lat_lon.lon_lat_to_cartesian(lons_targ, lats_targ)

        nxs, nys = self.lons2d.shape
        i_source, j_source = list(range(nxs)), list(range(nys))

        j_source, i_source = np.meshgrid(j_source, i_source)

        i_source = i_source.flatten()
        j_source = j_source.flatten()

        dists, inds = self.kdtree.query(list(zip(xt, yt, zt)), k=1)
        ixsel = i_source[inds]
        jysel = j_source[inds]

        print("Calculating spatial mean")
        #calculate spatial mean
        #calculate spatial mean
        if self.lazy:
            theVar = self.nc_vars[self.var_name]

            data_series = []
            for i, j in zip(ixsel, jysel):
                data_series.append(theVar[:, j, i])

            data_series = np.mean(data_series, axis=0)
        else:
            data_series = np.mean(self.var_data[:, ixsel, jysel], axis=1)

        print("Finished calculating spatial mean")

        #calculate daily climatology
        df = pandas.DataFrame(data=data_series, index=self.times, columns=["values"])

        df["year"] = df.index.map(lambda d: d.year)

        df = df[df["year"].isin(model_point.continuous_data_years)]
        monthly_clim = df.groupby(by=lambda d: d.month).mean()

        month_dates = [datetime(1985, m, 15) for m in range(1, 13)]
        vals = [monthly_clim.ix[d.month, "values"] for d in month_dates]

        return pandas.TimeSeries(data=vals, index=month_dates)


    def get_mean_upstream_timeseries_daily(self, model_point, dm, stamp_dates=None):
        """
        get mean swe upstream of the model_point
        """
        assert isinstance(model_point, ModelPoint)

        assert isinstance(dm, Crcm5ModelDataManager)



        # create the mask of points over which the averaging is going to be done
        lons_targ = dm.lons2D[model_point.flow_in_mask == 1]
        lats_targ = dm.lats2D[model_point.flow_in_mask == 1]

        xt, yt, zt = lat_lon.lon_lat_to_cartesian(lons_targ, lats_targ)

        nxs, nys = self.lons2d.shape
        i_source, j_source = list(range(nxs)), list(range(nys))

        j_source, i_source = np.meshgrid(j_source, i_source)

        i_source = i_source.flatten()
        j_source = j_source.flatten()

        dists, inds = self.kdtree.query(list(zip(xt, yt, zt)), k=1)
        ixsel = i_source[inds]
        jysel = j_source[inds]

        df_empty = pandas.DataFrame(index=self.times)
        df_empty["year"] = df_empty.index.map(lambda d: d.year)

        # calculate spatial mean
        sel_date_indices = np.where(df_empty["year"].isin(model_point.continuous_data_years))[0]
        if self.lazy:
            the_var = self.nc_vars[self.var_name]
            data_series = np.mean([the_var[sel_date_indices, j, i] for i, j in zip(ixsel, jysel)], axis=0)
        else:
            data_series = np.mean(self.var_data[:, ixsel, jysel], axis=1)


        # calculate daily climatology
        df = pandas.DataFrame(data=data_series, index=self.times, columns=["values"])

        df["year"] = df.index.map(lambda d: d.year)
        df = df[df["year"].isin(model_point.continuous_data_years)]
        daily_clim = df.groupby(by=lambda d: (d.month, d.day)).mean()

        vals = [daily_clim.ix[(d.month, d.day), "values"] for d in stamp_dates]
        return pandas.TimeSeries(data=vals, index=stamp_dates)


    def get_daily_timeseries_using_mask(self, mask, lons2d_target, lats2d_target, multipliers_2d, start_date=None,
                                        end_date=None):
        """
        multipliers_2d used to multiply the values when aggregating into a single timeseries
        sum(mi * vi) - in space
        """

        bool_vect = np.array([start_date <= t <= end_date for t in self.times])

        new_times = list(filter(lambda t: start_date <= t <= end_date, self.times))
        new_vals = self.var_data[bool_vect, :, :]
        x_out, y_out, z_out = lat_lon.lon_lat_to_cartesian(lons2d_target.flatten(), lats2d_target.flatten())

        print(len(new_times))

        flat_mask = mask.flatten()
        x_out = x_out[flat_mask == 1]
        y_out = y_out[flat_mask == 1]
        z_out = z_out[flat_mask == 1]
        mults = multipliers_2d.flatten()[flat_mask == 1]
        data_interp = [self._interp_and_sum(new_vals[t, :, :].flatten(), flat_mask, x_out, y_out, z_out) for t in
                       range(len(new_times))]

        print("Interpolated all")
        return TimeSeries(time=new_times, data=data_interp).get_ts_of_daily_means()


    def interpolate_data_to_cartesian(self, data_in_flat, x, y, z, nneighbours=4):
        """
        len(data_in_flat) , len(x) == len(y) == len(z) == len(data_out_flat) - all 1D
        """
        print("start query")
        dst, ind = self.kdtree.query(list(zip(x, y, z)), k=nneighbours)
        print("end query")

        inverse_square = 1.0 / dst ** 2
        if len(dst.shape) > 1:
            norm = np.sum(inverse_square, axis=1)
            norm = np.array([norm] * dst.shape[1]).transpose()
            coefs = inverse_square / norm

            data_out_flat = np.sum(coefs * data_in_flat[ind], axis=1)
        elif len(dst.shape) == 1:
            data_out_flat = data_in_flat[ind]
        else:
            raise Exception("Could not find neighbor points")
        return data_out_flat


    def interpolate_data_to(self, data_in, lons2d, lats2d, nneighbours=4):
        """
        Interpolates data_in to the grid defined by (lons2d, lats2d)
        assuming that the data_in field is on the initial CRU grid

        interpolate using 4 nearest neighbors and inverse of squared distance
        """

        x_out, y_out, z_out = lat_lon.lon_lat_to_cartesian(lons2d.flatten(), lats2d.flatten())
        dst, ind = self.kdtree.query(list(zip(x_out, y_out, z_out)), k=nneighbours)

        data_in_flat = data_in.flatten()

        inverse_square = 1.0 / dst ** 2
        if len(dst.shape) > 1:
            norm = np.sum(inverse_square, axis=1)
            norm = np.array([norm] * dst.shape[1]).transpose()
            coefs = inverse_square / norm

            data_out_flat = np.sum(coefs * data_in_flat[ind], axis=1)
        elif len(dst.shape) == 1:
            data_out_flat = data_in_flat[ind]
        else:
            raise Exception("Could not find neighbor points")
        return np.reshape(data_out_flat, lons2d.shape)
Example #38
0
 def runTest(self):
     """testing multi-file dataset access"""
     f = MFDataset(self.files, check=True)
     f.set_auto_maskandscale(True)  # issue570
     assert f.history == 'created today'
     assert_array_equal(np.arange(0, nx), f.variables['x'][:])
     varin = f.variables['data']
     datin = varin[:]
     assert_array_equal(datin.mask, data.mask)
     varin.set_auto_maskandscale(False)
     data2 = data.filled()
     assert varin.long_name == 'phony data'
     assert len(varin) == nx
     assert varin.shape == (nx, ydim, zdim)
     assert varin.dimensions == ('x', 'y', 'z')
     assert_array_equal(varin[4:-4:4, 3:5, 2:8], data2[4:-4:4, 3:5, 2:8])
     assert varin[0, 0, 0] == data2[0, 0, 0]
     assert_array_equal(varin[:], data2)
     assert getattr(varin, 'nonexistantatt', None) == None
     f.close()
     # test master_file kwarg (issue #835).
     f = MFDataset(self.files, master_file=self.files[-1], check=True)
     assert_array_equal(np.arange(0, nx), f.variables['x'][:])
     varin = f.variables['data']
     assert_array_equal(varin[4:-4:4, 3:5, 2:8], data2[4:-4:4, 3:5, 2:8])
     f.close()
     # testing multi-file get_variables_by_attributes
     f = MFDataset(self.files, check=True)
     assert f.get_variables_by_attributes(axis='T') == []
     f.get_variables_by_attributes(units='zlotys')[0] == f['x']
     f.close()
Example #39
0
def readEnsemble(wrfinit, timerange=None, fields=None, debug=False):
    ''' Reads in desired fields and returns 2-D arrays of data for each field (barb/contour/field) '''
    if debug: print fields

    datadict = {}
    file_list, missing_list = makeEnsembleList(
        wrfinit, timerange)  #construct list of files

    # loop through fill field, contour field, barb field and retrieve required data
    for f in ['fill', 'contour', 'barb']:
        if not fields[f].keys(): continue
        if debug:
            print 'Reading field:', fields[f]['name'], 'from', fields[f][
                'filename']

        # save some variables for use in this function
        filename = fields[f]['filename']
        arrays = fields[f]['arrayname']
        fieldtype = fields[f]['ensprod']
        fieldname = fields[f]['name']
        if fieldtype in ['prob', 'neprob']: thresh = fields[f]['thresh']
        if fieldtype[0:3] == 'mem': member = int(fieldtype[3:])

        # open Multi-file netcdf dataset
        if debug: print file_list[filename]
        fh = MFDataset(file_list[filename])

        # loop through each field, wind fields will have two fields that need to be read
        datalist = []
        for n, array in enumerate(arrays):
            if debug: print 'Reading', array

            #read in 3D array (times*members,ny,nx) from file object
            if 'arraylevel' in fields[f]:
                if isinstance(fields[f]['arraylevel'], list):
                    level = fields[f]['arraylevel'][n]
                else:
                    level = fields[f]['arraylevel']
            else:
                level = None

            if level == 'max':
                data = np.amax(fh.variables[array][:, :, :, :], axis=1)
            elif level is None:
                data = fh.variables[array][:, :, :]
            else:
                data = fh.variables[array][:, level, :, :]

            # change units for certain fields
            if array in [
                    'U_PL', 'V_PL', 'UBSHR6', 'VBSHR6', 'U10', 'V10',
                    'U_COMP_STM', 'V_COMP_STM', 'S_PL'
            ]:
                data = data * 1.93  # m/s > kt
            elif array in ['DEWPOINT_2M', 'T2', 'AFWA_WCHILL', 'AFWA_HEATIDX']:
                data = (data - 273.15) * 1.8 + 32.0  # K > F
            elif array in [
                    'PREC_ACC_NC', 'PREC_ACC_C', 'AFWA_PWAT', 'PWAT',
                    'AFWA_SNOWFALL', 'AFWA_SNOW', 'AFWA_ICE', 'AFWA_FZRA'
            ]:
                data = data * 0.0393701  # mm > in
                #hcl elif array in ['AFWA_PWAT', 'PWAT', 'AFWA_SNOWFALL', 'AFWA_SNOW', 'AFWA_ICE', 'AFWA_FZRA']:   data = data*0.0393701 # mm > in
            elif array in [
                    'RAINNC', 'RAINC', 'GRPL_MAX', 'SNOW_ACC_NC', 'AFWA_HAIL'
            ]:
                data = data * 0.0393701  # mm > in
            elif array in ['T_PL', 'TD_PL', 'SFC_LI']:
                data = data - 273.15  # K > C
            elif array in ['AFWA_MSLP', 'MSLP']:
                data = data * 0.01  # Pa > hPa
            elif array in ['ECHOTOP']:
                data = data * 3.28084  # m > ft
            elif array in ['AFWA_VIS']:
                data = (data * 0.001) / 1.61  # m > mi
            elif array in ['SBCINH', 'MLCINH', 'W_DN_MAX']:
                data = data * -1.0  # make cin positive
            elif array in ['PVORT_320K']:
                data = data * 1000000  # multiply by 1e6
            elif array in ['SBT123_GDS3_NTAT', 'SBT124_GDS3_NTAT']:
                data = data - 273.15  # K -> C
            elif array in ['HAIL_MAXK1', 'HAIL_MAX2D']:
                data = data * 39.3701  #  m -> inches
            elif array in ['PBMIN', 'PBMIN_SFC']:
                data = data * 0.01  #  Pa -> hPa
            #            elif array in ['LTG1_MAX1', 'LTG2_MAX', 'LTG3_MAX']:   data = data*0.20 #  scale down excess values

            datalist.append(data)

        # these are derived fields, we don't have in any of the input files but we can compute
        if 'name' in fields[f]:
            if fieldname in ['shr06mag', 'shr01mag', 'bunkmag', 'speed10m']:
                datalist = [np.sqrt(datalist[0]**2 + datalist[1]**2)]
            elif fieldname == 'stp':
                datalist = [computestp(datalist)]
                # GSR in fields are T(K), mixing ratio (kg/kg), and surface pressure (Pa)
            elif fieldname == 'thetae':
                datalist = [compute_thetae(datalist)]
            elif fieldname == 'pbmin':
                datalist = [datalist[1] - datalist[0][:, 0, :]]
            #elif fieldname in ['precip', 'precipacc']: datalist = [ datalist[0]+datalist[1] ]

        datadict[f] = []
        for data in datalist:
            # perform mean/max/variance/etc to reduce 3D array to 2D
            if (fieldtype == 'mean'): data = np.mean(data, axis=0)
            elif (fieldtype == 'pmm'): data = compute_pmm(data)
            elif (fieldtype == 'max'): data = np.amax(data, axis=0)
            elif (fieldtype == 'var'): data = np.std(data, axis=0)
            elif (fieldtype == 'summean'):
                for i in missing_list[filename]:
                    data = np.insert(data, i, np.nan,
                                     axis=0)  #insert nan for missing files
                #hcl data = np.reshape(data, (data.shape[0]/10,10,data.shape[1],data.shape[2]))
                data = np.nansum(data, axis=0)
                #hcl data = np.nanmean(data, axis=0)
            elif (fieldtype == 'summax'):
                for i in missing_list[filename]:
                    data = np.insert(data, i, np.nan,
                                     axis=0)  #insert nan for missing files
                data = np.reshape(
                    data,
                    (data.shape[0] / 10, 10, data.shape[1], data.shape[2]))
                data = np.nansum(data, axis=0)
                data = np.nanmax(data, axis=0)
            elif (fieldtype[0:3] == 'mem'):
                for i in missing_list[filename]:
                    data = np.insert(data, i, np.nan,
                                     axis=0)  #insert nan for missing files
                data = np.reshape(
                    data,
                    (data.shape[0] / 10, 10, data.shape[1], data.shape[2]))
                data = np.nanmax(data, axis=0)
                data = data[member - 1, :]
            elif (fieldtype in ['prob', 'neprob']):
                data = (data >= thresh).astype('float')
                for i in missing_list[filename]:
                    data = np.insert(data, i, np.nan,
                                     axis=0)  #insert nan for missing files
                data = np.reshape(
                    data,
                    (data.shape[0] / 10, 10, data.shape[1], data.shape[2]))
                data = np.nanmax(data, axis=0)
                if (fieldtype == 'neprob'):
                    data = compute_neprob(data,
                                          roi=14,
                                          sigma=float(fields['sigma']),
                                          type='gaussian')
                else:
                    data = np.nanmean(data, axis=0)
                data = data + 0.001  #hack to ensure that plot displays discrete prob values
            if debug:
                print 'field', fieldname, 'has shape', data.shape, 'max', data.max(
                ), 'min', data.min()

            # attach data arrays for each type of field (e.g. { 'fill':[data], 'barb':[data,data] })
            datadict[f].append(data)

        fh.close()

    return (datadict, missing_list)
Example #40
0
print '\n +++ LENDO OS DADOS +++'
pcpaccaux = np.zeros((30, 64, 128))
pcpaccaux[:] = np.nan

for i, ano in enumerate(range(1982, 2012)):

    # print ano

    nc1 = 'nc/pcp-daily-echam46-amip-{0}01.nc'.format(ano)
    nc2 = 'nc/pcp-daily-echam46-amip-{0}02.nc'.format(ano)
    nc3 = 'nc/pcp-daily-echam46-amip-{0}03.nc'.format(ano)

    netcdfs = [nc1, nc2, nc3]

    data     = MFDataset(netcdfs)
    pcp      = data.variables['pcp'][:]
    lons_360 = data.variables['longitude'][:]
    lats     = data.variables['latitude'][:]
    data.close()

    a = np.nansum(pcp, axis=0)

    pcpaccaux[i, :, :] = np.nansum(pcp, axis=0)

print pcpaccaux.shape

pcpacc, lons = shiftgrid(180., pcpaccaux, lons_360, start=False)

print '\n +++ INTERPOLACAO +++'
newlats = np.linspace(-90, 90, 181)
Example #41
0
for fhr in range(0, 49):
    print 'forecast hour', fhr
    if fhr in fhdone:
        fha.write(str(fhr) + "\n")
        continue

    RUN_DIR = '/glade/scratch/hclin/CONUS/wrfda/postdir/soundings'
    files = []
    sound = '%s/%s/sound_wrfda_Fhr_%03d.nc' % (RUN_DIR, yyyymmddhh, fhr)
    print sound
    if os.path.exists(sound): files.append(sound)
    if len(files) < 1: continue

    print time.ctime(time.time()), ':', 'Reading data'
    numens = len(files)
    fh = MFDataset(files)
    numstations = len(fh.dimensions['stations'])
    numlevels = len(fh.dimensions['bottom_top'])
    tmpc = fh.variables['TEMP_MODLEV'][:].reshape(
        (numens, numlevels, numstations))
    dwpc = fh.variables['DEWPOINT_MODLEV'][:].reshape(
        (numens, numlevels, numstations))
    hght = fh.variables['HEIGHT_MODLEV'][:].reshape(
        (numens, numlevels, numstations))
    pres = fh.variables['PRESSURE_MODLEV'][:].reshape(
        (numens, numlevels, numstations))
    ugrd = fh.variables['U_GRID_MODLEV'][:].reshape(
        (numens, numlevels, numstations))
    vgrd = fh.variables['V_GRID_MODLEV'][:].reshape(
        (numens, numlevels, numstations))
    stns = chartostring(fh.variables['stn'][:, 0:3])
Example #42
0
def smart_reader(fNcdf,var_list,suppress_warning=False):
    """
    Smarter alternative to using var=fNcdf.variables['var'][:] when handling PROCESSED files that also check 
    matching XXXXX.atmos_average.nc (or daily...) and XXXXX.fixed.nc files
    
    Args:
        fNcdf: Netcdf file object (i.e. already opened with Dataset or MFDataset)
        var_list: variable or list of variables, e.g 'areo' or ['pk','bk','areo']
        suppress_warning: Suppress debug statement, useful if variable is not expected to be found in the file anyway  
    Returns:
        out_list: variables content as singleton or values to unpack
        
    -------    
    Example: 
    
    from netCDF4 import Dataset
    
    fNcdf=Dataset('/u/akling/FV3/00668.atmos_average_pstd.nc','r') 
     
    ucomp= fNcdf.variables['ucomp'][:]   # << this is the regular way
    vcomp= smart_reader(fNcdf,'vcomp')   # << this is exacly equivalent  
    pk,bk,areo= smart_reader(fNcdf,['pk','bk','areo'])  # this will get 'areo' from 00668.atmos_average.nc is not available in the original _pstd.nc file
                                                        # if pk and bk are absent from 0668.atmos_average.nc, it will also check 00668.fixed.n
    *** NOTE ***
        -Only the variables' content is returned, not the attributes
    """  
    
    #This out_list is for the variable
    out_list=[]
    one_element=False
    file_is_MF=False
    
    Ncdf_path= get_Ncdf_path(fNcdf) #Return string (Dataset) or list (MFDataset)
    if type(Ncdf_path)==list:file_is_MF=True
    
    #For generality convert to list if only one variable is provided, e.g 'areo'>['areo']
    if type(var_list)==str:
        one_element=True
        var_list=[var_list]

    for ivar in var_list:
    #First try to read in the original file
        if ivar in fNcdf.variables.keys():
            out_list.append(fNcdf.variables[ivar][:])
        else:
            full_path_try=alt_FV3path(Ncdf_path,alt='raw',test_exist=True) 
            if file_is_MF:
                f_tmp=MFDataset(full_path_try,'r')
            else:    
                f_tmp=Dataset(full_path_try,'r')
                
            if ivar in f_tmp.variables.keys():
                out_list.append(f_tmp.variables[ivar][:])
                if not suppress_warning: print('**Warning*** Using variable %s in %s instead of original file(s)'%(ivar,full_path_try))
                f_tmp.close()
            else:    
                f_tmp.close()
                full_path_try=alt_FV3path(Ncdf_path,alt='fixed',test_exist=True) 
                if file_is_MF:full_path_try=full_path_try[0]
                
                f_tmp=Dataset(full_path_try,'r')
                if ivar in f_tmp.variables.keys():
                    out_list.append(f_tmp.variables[ivar][:])
                    f_tmp.close()
                    if not suppress_warning: print('**Warning*** Using variable %s in %s instead of original file(s)'%(ivar,full_path_try))
                else: 
                    print('***ERROR*** Variable %s not found in %s, NOR in raw output or fixed file'%(ivar,full_path_try))
                    print('            >>> Assigning  %s  to NaN'%(ivar))
                    f_tmp.close()
                    out_list.append(np.NaN)
    if one_element:out_list=out_list[0]
    return out_list
Example #43
0
 def merge_files_from_list(self, Ncfilename_list):
     Mf_IN = MFDataset(Ncfilename_list, 'r')
     self.copy_all_dims_from_Ncfile(Mf_IN)
     self.copy_all_vars_from_Ncfile(Mf_IN)
     Mf_IN.close()
Example #44
0
@author: deborahkhider

Opening a dataset contained in multiple netCDF files
"""

from netCDF4 import MFDataset
# Just get a list of netCDF files.
root = "/Volumes/Data HD/Documents/MINT/Climate/netCDFTutorial"
files = ["Oct2010.nc", "Nov2010.nc", "Dec2010.nc"]

file_names = []
for name in files:
    file_names.append(root + "/" + name)

#Open the file and get the keys for this example
nc_fid = MFDataset(file_names)
keys = []
nc_vars = [var for var in nc_fid.variables]
for vars in nc_vars:
    keys.append(getattr(nc_fid.variables[vars], 'long_name'))


# First let's print out the file
def MFncdump(nc_fid):
    """
    MFncdump prints dimensions, variables and their attribute info
    
    Args:
        nc_fid: a netCDF file    
    """
Example #45
0
import os, time
from netCDF4 import MFDataset, Dataset  #,num2date,date2num
import numpy as np
import matplotlib.pyplot as plt
import warnings

test = False

if test == True:
    files = [
        "X:/ARCTIC2030/a20_avg_11705_arctic2030.nc",
        "X:/ARCTIC2030/a20_avg_11733_arctic2030.nc",
        "X:/ARCTIC2030/a20_avg_11761_arctic2030.nc"
    ]
    f = MFDataset(files)
else:
    f = MFDataset("X:/ARCTIC2030/*.nc")

latitude = np.array(f.variables['lat_rho'])
longitude = np.array(f.variables['lon_rho'])

# coordinates of needed station
st_lon = 126.82
st_lat = 76.47  # real lat 76.77

# function 'def find_xi_eta' is based on
# Model2roms  Python toolbox
# https://github.com/trondkr/model2roms

Example #46
0
def ncread(file, vars=None, dims=False, noisy=False, atts=False, datetimes=False):
    """
    Read in the FVCOM results file and spit out numpy arrays for each of the
    variables specified in the vars list.

    Optionally specify a dict with keys whose names match the dimension names
    in the netCDF file and whose values are strings specifying alternative
    ranges or lists of indices. For example, to extract the first hundred time
    steps, supply dims as:

        dims = {'time':'0:100'}

    To extract the first, 400th and 10,000th values of any array with nodes:

        dims = {'node':'[0, 3999, 9999]'}

    Any dimension not given in dims will be extracted in full.

    Specify atts=True to extract the variable attributes. Set datetimes=True
    to convert the FVCOM Modified Julian Day values to python datetime objects.

    Parameters
    ----------
    file : str, list
        If a string, the full path to an FVCOM netCDF output file. If a list,
        a series of files to be loaded. Data will be concatenated into a single
        dict.
    vars : list, optional
        List of variable names to be extracted. If omitted, all variables are
        returned.
    dims : dict, optional
        Dict whose keys are dimensions and whose values are a string of either
        a range (e.g. {'time':'0:100'}) or a list of individual indices (e.g.
        {'time':'[0, 1, 80, 100]'}). Slicing is supported (::5 for every fifth
        value).
    noisy : bool, optional
        Set to True to enable verbose output.
    atts : bool, optional
        Set to True to enable output of the attributes (defaults to False).
    datetimes : bool, optional
        Set to True to convert FVCOM Modified Julian Days to Python datetime
        objects (creates a new `datetime' key in the output dict. Only
        applies if `vars' includes either the `Times' or `time' variables.
        Note: if FVCOM has been run with single precision output, then the
        conversion of the `time' values to a datetime object suffers rounding
        errors. It's best to either run FVCOM in double precision or specify
        only the `Times' data in the `vars' list.

    Returns
    -------
    FVCOM : dict
        Dict of data extracted from the netCDF file. Keys are those given in
        vars and the data are stored as ndarrays. If `datetimes' is True,
        then this also includes a `datetime' key in which is the FVCOM
        Modified Julian Day time series converted to Python datetime objects.
    attributes : dict, optional
        If atts=True, returns the attributes as a dict for each
        variable in vars. The key `dims' contains the array dimensions (each
        variable contains the names of its dimensions) as well as the shape of
        the dimensions defined in the netCDF file. The key `global' contains
        the global attributes.

    See Also
    --------
    read_probes : read in FVCOM ASCII probes output files.

    """

    # Set to True when we've converted from Modified Julian Day so we don't
    # end up doing the conversion twice, once for `Times' and again for
    # `time' if both variables have been requested in `vars'.
    done_datetimes = False
    # Check whether we'll be able to fulfill the datetime request.
    if datetimes and vars and not list(set(vars) & set(('Times', 'time'))):
        raise ValueError("Conversion from Modified Julian Day to python "
                         "datetimes has been requested but no time variable "
                         "(`Times' or `time') has been requested in vars.")

    # If we have a list, assume it's lots of files and load them all.
    if isinstance(file, list):
        try:
            try:
                rootgrp = MFDataset(file, 'r')
            except IOError as msg:
                raise IOError('Unable to open file {} ({}). Aborting.'.format(file, msg))
        except:
            # Try aggregating along a 'time' dimension (for POLCOMS,
            # for example).
            try:
                rootgrp = MFDataset(file, 'r', aggdim='time')
            except IOError as msg:
                raise IOError('Unable to open file {} ({}). Aborting.'.format(file, msg))

    else:
        rootgrp = Dataset(file, 'r')

    # Create a dict of the dimension names and their current sizes
    read_dims = {}
    for key, var in list(rootgrp.dimensions.items()):
        # Make the dimensions ranges so we can use them to extract all the
        # values.
        read_dims[key] = '0:' + str(len(var))

    # Compare the dimensions in the netCDF file with those provided. If we've
    # been given a dict of dimensions which differs from those in the netCDF
    # file, then use those.
    if dims:
        commonKeys = set(read_dims).intersection(list(dims.keys()))
        for k in commonKeys:
            read_dims[k] = dims[k]

    if noisy:
        print("File format: {}".format(rootgrp.file_format))

    if not vars:
        vars = iter(list(rootgrp.variables.keys()))

    FVCOM = {}

    # Save the dimensions in the attributes dict.
    if atts:
        attributes = {}
        attributes['dims'] = read_dims
        attributes['global'] = {}
        for g in rootgrp.ncattrs():
            attributes['global'][g] = getattr(rootgrp, g)

    for key, var in list(rootgrp.variables.items()):
        if noisy:
            print('Found ' + key, end=' ')
            sys.stdout.flush()

        if key in vars:
            vDims = rootgrp.variables[key].dimensions

            toExtract = [read_dims[d] for d in vDims]

            # If we have no dimensions, we must have only a single value, in
            # which case set the dimensions to empty and append the function to
            # extract the value.
            if not toExtract:
                toExtract = '.getValue()'

            # Thought I'd finally figured out how to replace the eval approach,
            # but I still can't get past the indexing needed to be able to
            # subset the data.
            # FVCOM[key] = rootgrp.variables.get(key)[0:-1]
            # I know, I know, eval() is evil.
            getData = 'rootgrp.variables[\'{}\']{}'.format(key, str(toExtract).replace('\'', ''))
            FVCOM[key] = eval(getData)

            # Add the units and dimensions for this variable to the list of
            # attributes.
            if atts:
                attributes[key] = {}
                try:
                    attributes[key]['units'] = rootgrp.variables[key].units
                except:
                    pass

                try:
                    attributes[key]['dims'] = rootgrp.variables[key].dimensions
                except:
                    pass

            if datetimes and key in ('Times', 'time') and not done_datetimes:
                # Convert the time data to datetime objects. How we do this
                # depends on which we hit first - `Times' or `time'. For the
                # former, we need to parse the strings, for the latter we can
                # leverage num2date from the netCDF4 module and use the time
                # units attribute.
                if key == 'Times':
                    try:
                        FVCOM['datetime'] = [datetime.strptime(''.join(i), '%Y-%m-%dT%H:%M:%S.%f') for i in FVCOM[key]]
                    except ValueError:
                        # Try a different format before bailing out.
                        FVCOM['datetime'] = [datetime.strptime(''.join(i), '%Y/%m/%d %H:%M:%S.%f') for i in    FVCOM[key]]

                    done_datetimes = True
                elif key == 'time':
                    FVCOM['datetime'] = num2date(FVCOM[key],
                                                 rootgrp.variables[key].units)
                    done_datetimes = True

            if noisy:
                if len(str(toExtract)) < 60:
                    print('(extracted {})'.format(str(toExtract).replace('\'', '')))
                else:
                    print('(extracted given indices)')

        elif noisy:
                print()

    # Close the open file.
    rootgrp.close()

    if atts:
        return FVCOM, attributes
    else:
        return FVCOM
Example #47
0
def melt(ncfiles,
         vars=None,
         global_atts=None,
         var_atts=None,
         coord_vars=None,
         missing=None):
    """ Build a (molten) Pandas DataFrame from a series of netcdf files. This is a flexible, but very 
    memory-inneficient data structure, so be careful calling this with large netcdf files.
    
    Arguments:
      ncfiles     -- the input filenames
      vars        -- the variables to read, if None all variables in files read
      var_atts    -- variable attributes to include in each line of output, default all
      global_atts -- global attributes to include in each row of output
      coord_vars  -- variables to treat as coordinates, if None will use variables with 
                     the same name as dimensions"""

    logger = loghelper.get_logger(LOGGER)
    frames = []

    if len(ncfiles) == 1:
        dataset = Dataset(ncfiles[0])
    else:
        dataset = MFDataset(ncfiles)

    coord_vars = get_coordinate_vars(dataset, coord_vars)
    variables = dataset.variables

    # get global attributes in dataset
    # shouldn't really use this, but it works
    dataset_atts = dataset.__dict__

    use_global_atts = _lookup(global_atts, dataset_atts, missing)

    # if no vars specified, use all in ncfiles
    if (vars == None or vars == ["all"]): vars = list(variables.keys())

    # variables are a function of var(reftime,leadtime,height,location)
    # or var(reftime,leadtime,location)
    usevars = [v for v in vars if v not in coord_vars]

    logger.debug("usevars: %s" % usevars)

    # There must be a clean way of doing this in a general
    # way, but I don't have the time to code this properly,
    # so I'm looping over fixed and hard-coded dimension names

    location = coord_vars['location']
    reftime = coord_vars['reftime']
    leadtime = coord_vars['leadtime']
    height = coord_vars['height']
    #lat      = coord_vars['lat']
    #lon      = coord_vars['lon']

    nloc = len(location)
    nreftime = len(reftime)
    nleadtime = len(leadtime)

    # dimension order is reftime, leadtime, location, height
    # or reftime, leadtime, location
    vars2D = [v for v in usevars if len(variables[v].shape) == 3]
    vars3D = [v for v in usevars if len(variables[v].shape) == 4]

    series = []

    for v in vars2D:
        vname = v
        variable = variables[v]

        use_var_atts = _lookup(var_atts, variable.__dict__, missing)

        factors = [reftime, leadtime, [HGT2DNUM], location, [vname]] + map(
            _listify, use_global_atts.values()) + map(_listify,
                                                      use_var_atts.values())
        names = ['reftime', 'leadtime', 'height', 'location', 'variable'
                 ] + use_global_atts.keys() + use_var_atts.keys()

        index = pd.MultiIndex.from_product(factors, names=names)
        #index = pd.MultiIndex.from_tuples([(ref,lead,loc,HGT2DNUM,vname) for ref in reftime for lead in leadtime for loc in location], names=['reftime', 'leadtime', 'location', 'height','variable'])

        if type(variable[:]) == np.ma.core.MaskedArray:
            data = variable[:].flatten().filled(np.nan).astype(np.float)
        else:
            data = variable[:].flatten().astype(np.float)

        series.append(pd.Series(data=data, index=index, name='value'))

    for v in vars3D:
        variable = variables[v]
        vname = v
        use_var_atts = _lookup(var_atts, variable.__dict__, missing)
        for h, hgt in enumerate(height):
            subvar = variable[:, :, :, h]
            vname = "%s.%03d" % (v, hgt)
            vname = v
            factors = [reftime, leadtime, [hgt], location, [vname]] + map(
                _listify, use_global_atts.values()) + map(
                    _listify, use_var_atts.values())
            names = ['reftime', 'leadtime', 'height', 'location', 'variable'
                     ] + use_global_atts.keys() + use_var_atts.keys()
            index = pd.MultiIndex.from_product(factors, names=names)
            #index = pd.MultiIndex.from_tuples([(ref,lead,loc,hgt,vname) for ref in reftime for lead in leadtime for loc in location], names=['reftime', 'leadtime', 'location','height', 'variable'])
            if type(subvar) == np.ma.core.MaskedArray:
                data = subvar[:].flatten().filled(np.nan).astype(np.float)
            else:
                data = subvar[:].flatten().astype(np.float)

            series.append(pd.Series(data=data, index=index, name='value'))

    # this is molten data, to use Haldey Wickham's terminology
    # or perhaps 5th normal form?
    result = pd.concat(series, axis=0).reset_index()
    return result
Example #48
0
def get_tile_dimension(in_files, var_name, transfer_limit_Mbytes=None, time_range=None):
    '''
    Computes the total size of 3D variable array and returns the optimal tile dimension for spatial chunking.
    
    :param in_files: absolute path(s) to NetCDF dataset(s) (including OPeNDAP URLs)
    :type in_files: list
    
    :param var_name: variable name to process
    :type var_name: str
    
    :param transfer_limit_Mbytes: maximum OPeNDAP/THREDDS transfer limit in Mbytes (default: None) 
    :type transfer_limit_Mbytes: float
    
    :param time_range: time range
    :type time_range: list of 2 datetime objects: [dt1, dt2]
    
    rtype: int

    .. warning:: only for 3D variables
    
    '''
    
    if transfer_limit_Mbytes==None:
        return 0
    else:
        transfer_limit_bytes = transfer_limit_Mbytes * 1024 * 1024 # Mbytes --> bytes

        in_files.sort()
        mfnc = MFDataset(in_files, 'r', aggdim='time')

        ndim = mfnc.variables[var_name].ndim
        if ndim != 3:
            print("ERROR: The variable to process must be 3D")
            
        v = mfnc.variables[var_name]
        v_shape = v.shape
        v_dtype = v.dtype
        v_nb_bytes = v_dtype.itemsize 
        
        if time_range == None: 
                                   
            total_array_size_bytes = v_shape[0] * v_shape[1] * v_shape[2] * v_nb_bytes
            optimal_tile_dimension = int(   numpy.sqrt( transfer_limit_bytes / (v.shape[0] * v_nb_bytes)  )   )
            
        else:
            
            var_time =  mfnc.variables['time']
            try:
                time_calend = var_time.calendar
            except:
                time_calend = 'gregorian'
            
            time_units = var_time.units
            time_arr = var_time[:]
            dt_arr = numpy.array([util_dt.num2date(dt, calend=time_calend, units=time_units) for dt in time_arr])
            indices_subset = util_dt.get_indices_subset(dt_arr, time_range)
            
            nb_time_steps_after_subset = len(indices_subset)
            total_array_size_bytes = nb_time_steps_after_subset * v_shape[1] * v_shape[2] * v_nb_bytes
            
            optimal_tile_dimension = int(   numpy.sqrt( transfer_limit_bytes / (nb_time_steps_after_subset * v_nb_bytes)  )   )
        
        mfnc.close()
        
        return optimal_tile_dimension
Example #49
0
    def __init__(self, filename=None, name=None, gridfile=None):

        if filename is None:
            raise ValueError('Need filename as argument to constructor')
        filestr = str(filename)
        if name is None:
            self.name = filestr
        else:
            self.name = name

        # Due to misspelled standard_name in
        # some (Akvaplan-NIVA) FVCOM files
        variable_aliases = {
            'eastward_sea_water_velocity': 'x_sea_water_velocity',
            'Northward_sea_water_velocity': 'y_sea_water_velocity',
            'eastward wind': 'x_wind',
            'northward wind': 'y_wind'
        }

        # Mapping FVCOM variable names to CF standard_name
        fvcom_mapping = {
            'um': 'x_sea_water_velocity',
            'vm': 'y_sea_water_velocity'
        }

        self.return_block = True

        try:
            # Open file, check that everything is ok
            logging.info('Opening dataset: ' + filestr)
            if ('*' in filestr) or ('?' in filestr) or ('[' in filestr):
                logging.info('Opening files with MFDataset')
                self.Dataset = MFDataset(filename)
            else:
                logging.info('Opening file with Dataset')
                self.Dataset = Dataset(filename, 'r')
        except Exception as e:
            raise ValueError(e)

        # We are reading and using lon/lat arrays,
        # and not any projected coordinates
        self.proj4 = '+proj=latlong'

        logging.debug('Finding coordinate variables.')
        # Find x, y and z coordinates
        # first check if we have specified a separate grid file
        if gridfile is None:
            self.gridfile = self.Dataset
        else:
            self.gridfile = Dataset(gridfile)
            logging.info('Opening Grid file')
        # now check content of grid- or datafile
        for var_name in self.gridfile.variables:
            var = self.gridfile.variables[var_name]
            if var.ndim > 1:
                continue  # Coordinates must be 1D-array
            attributes = var.ncattrs()
            standard_name = ''
            long_name = ''
            axis = ''
            units = ''
            CoordinateAxisType = ''
            if 'standard_name' in attributes:
                standard_name = var.__dict__['standard_name']
            if 'long_name' in attributes:
                long_name = var.__dict__['long_name']
            if 'axis' in attributes:
                axis = var.__dict__['axis']
            if 'grid' in attributes:
                grid = var.__dict__['grid']
            if 'units' in attributes:
                units = var.__dict__['units']
            if '_CoordinateAxisType' in attributes:
                CoordinateAxisType = var.__dict__['_CoordinateAxisType']
            # read FVCOM Elements/Center grid ( for u and v):
            if standard_name == 'longitude' and grid == 'Elems' or \
                    var_name == 'lonc':
                self.xname = var_name
                self.numx = var.shape[0]
                x = var[:]
            if standard_name == 'latitude' and grid == 'Elems' or \
                    var_name == 'latc':
                self.yname = var_name
                self.numy = var.shape[0]
                y = var[:]
            if var_name == 'siglayz_center' and grid == 'Elems':
                if 'positive' not in var.ncattrs() or \
                        var.__dict__['positive'] == 'up':
                    self.z = var[:]
                else:
                    self.z = -var[:]

            # todo: read FVCOM Vertices grid ( for tracers)
            #

        self.lon = x
        self.lat = y

        # Find all variables having standard_name
        self.variable_mapping = {}
        for var_name in self.Dataset.variables:
            if var_name in [self.xname, self.yname, 'depth']:
                continue  # Skip coordinate variables
            var = self.Dataset.variables[var_name]
            attributes = var.ncattrs()
            standard_name = ''
            long_name = ''
            axis = ''
            units = ''
            CoordinateAxisType = ''
            if 'standard_name' in attributes:
                standard_name = var.__dict__['standard_name']
            if 'long_name' in attributes:
                long_name = var.__dict__['long_name']
            if 'axis' in attributes:
                axis = var.__dict__['axis']
            if 'grid' in attributes:
                grid = var.__dict__['grid']
            if 'units' in attributes:
                units = var.__dict__['units']

            if standard_name == 'time' or axis == 'T' or var_name == 'time':
                # Read and store time coverage (of this particular file)
                time = var[:]
                time_units = units
                self.times = num2date(time, time_units)
                self.start_time = self.times[0]
                self.end_time = self.times[-1]
                if len(self.times) > 1:
                    self.time_step = self.times[1] - self.times[0]
                else:
                    self.time_step = None

            if 'standard_name' in attributes:
                standard_name = str(var.__dict__['standard_name'])
                if standard_name in variable_aliases:  # Mapping if needed
                    standard_name = variable_aliases[standard_name]
                self.variable_mapping[standard_name] = str(var_name)
            elif var_name in fvcom_mapping:
                self.variable_mapping[fvcom_mapping[var_name]] = \
                    str(var_name)

        self.variables = self.variable_mapping.keys()

        self.xmin = self.lon.min()
        self.xmax = self.lon.max()
        self.ymin = self.lat.min()
        self.ymax = self.lat.max()

        # Run constructor of parent Reader class
        super(Reader, self).__init__()
Example #50
0
cp_ocean = 3992.10322329649

# Read 'descriptor' and 'years' from external file
f = open("files.txt")
for line in f.readlines():
  exec(line.lstrip())
f.close()
model_label = "%s (%s)" % (descriptor,years)

# TMPDIR where input files are located
tmpdir = "./"

# Open input files
#fstatic = Dataset(tmpdir+'19000101.ocean_geometry.nc', 'r')
fstatic = Dataset(tmpdir+'ocean_annual.static.nc', 'r')
ftemp = MFDataset(tmpdir+'ocean_annual.*.temp.nc')
fsalt = MFDataset(tmpdir+'ocean_annual.*.salt.nc')

# Time info
time = ftemp.variables["time"]
ntimes = len(time[:])
date = num2date(time,time.units,time.calendar.lower())
year = [d.year for d in date]
time_days = date2num(date,'days since 01-01-0001',time.calendar.lower())

# Grid info
#area = fstatic.variables["Ah"][:]
area = fstatic.variables["area_t"][:]

z = ftemp.variables["zl"][:]
nz = len(z) 
Example #51
0
def get_data(exp, area, out):
    if out == '':
        s = Dataset(exp + '/ice_month.nc')
    else:
        s = MFDataset(exp + '/ice_month*.nc')

    print 'Reading file', exp
    tm = len(s.variables['time'][:])
    SW = np.zeros(tm)
    LW = np.zeros(tm)
    SH = np.zeros(tm)
    LH = np.zeros(tm)
    HS = np.zeros(tm)  # snow thick
    SV = np.zeros(tm)  # snow vol
    SF = np.zeros(tm)  # snow fall
    HI = np.zeros(tm)  # ice thick
    IV = np.zeros(tm)  # ice vol
    FRA = np.zeros(tm)
    BHEAT = np.zeros(tm)
    BMELT = np.zeros(tm)
    SST = np.zeros(tm)
    SSS = np.zeros(tm)
    ALB = np.zeros(tm)
    SALTF = np.zeros(tm)
    time = np.zeros(tm)
    for t in range(tm):
        time[t] = s.variables['time'][t] / 365.
        print 'Time (years):', time[t]
        hi_tmp = s.variables['HI'][t, :]
        sst_tmp = s.variables['SST'][t, :]
        sss_tmp = s.variables['SSS'][t, :]
        sw_tmp = s.variables['SW'][t, :]
        lw_tmp = s.variables['LW'][t, :]
        sh_tmp = s.variables['SH'][t, :]
        lh_tmp = s.variables['LH'][t, :]
        hs_tmp = s.variables['HS'][t, :]
        fra_tmp = s.variables['FRAZIL'][t, :]
        bh_tmp = s.variables['BHEAT'][t, :]
        bm_tmp = s.variables['BMELT'][t, :]
        saltf_tmp = s.variables['SALTF'][t, :]
        sf_tmp = s.variables['SNOWFL'][t, :]
        alb_tmp = s.variables['ALB'][t, :]

        HI[t] = (area * hi_tmp).sum() / area.sum()
        SSS[t] = (area * sss_tmp).sum() / area.sum()
        SST[t] = (area * sst_tmp).sum() / area.sum()
        BHEAT[t] = (area * bh_tmp).sum()
        BMELT[t] = (area * bm_tmp).sum()
        HS[t] = (area * hs_tmp).sum() // area.sum()
        SW[t] = (area * sw_tmp).sum()
        LW[t] = (area * lw_tmp).sum()
        LH[t] = (area * lh_tmp).sum()
        SH[t] = (area * sh_tmp).sum()
        FRA[t] = (area * fra_tmp).sum()
        ALB[t] = (area * alb_tmp).sum() / area.sum()
        IV[t] = (area * hi_tmp).sum()
        SV[t] = (area * hs_tmp).sum()
        SALTF[t] = (area * saltf_tmp).sum()

        print 'SSS, SST, HI, ALB, FRAZIL: ', SSS[t], SST[t], HI[t], ALB[
            t], FRA[t]
    s.close()
    return SSS, SST, HI, BHEAT, BMELT, IV, HS, SV, SH, LH, SW, LW, FRA, ALB, SALTF, time
Example #52
0
def readFVCOM(file, varList=None, clipDims=False, noisy=False, atts=False):
    """
    Read in the FVCOM results file and spit out numpy arrays for each of the
    variables specified in the varList list.

    Optionally specify a dict with keys whose names match the dimension names
    in the NetCDF file and whose values are strings specifying alternative
    ranges or lists of indices. For example, to extract the first hundred time
    steps, supply clipDims as:

        clipDims = {'time':'0:100'}

    To extract the first, 400th and 10,000th values of any array with nodes:

        clipDims = {'node':'[0, 3999, 9999]'}

    Any dimension not given in clipDims will be extracted in full.

    Specify atts=True to extract the variable attributes.

    Parameters
    ----------
    file : str, list
        If a string, the full path to an FVCOM NetCDF output file. If a list,
        a series of files to be loaded. Data will be concatenated into a single
        dict.
    varList : list, optional
        List of variable names to be extracted. If omitted, all variables are
        returned.
    clipDims : dict, optional
        Dict whose keys are dimensions and whose values are a string of either
        a range (e.g. {'time':'0:100'}) or a list of individual indices (e.g.
        {'time':'[0, 1, 80, 100]'}). Slicing is supported (::5 for every fifth
        value) but it is not possible to extract data from the end of the array
        with a negative index (e.g. 0:-4).
    noisy : bool, optional
        Set to True to enable verbose output.
    atts : bool, optional
        Set to True to enable output of the attributes (defaults to False).

    Returns
    -------
    FVCOM : dict
        Dict of data extracted from the NetCDF file. Keys are those given in
        varList and the data are stored as ndarrays.
    attributes : dict, optional
        If atts=True, returns the attributes as a dict for each
        variable in varList. The key 'dims' contains the array dimensions (each
        variable contains the names of its dimensions) as well as the shape of
        the dimensions defined in the NetCDF file. The key 'global' contains
        the global attributes.

    See Also
    --------
    readProbes : read in FVCOM ASCII probes output files.

    """

    # If we have a list, assume it's lots of files and load them all.
    if isinstance(file, list):
        try:
            try:
                rootgrp = MFDataset(file, 'r')
            except IOError as msg:
                raise IOError('Unable to open file {} ({}). Aborting.'.format(file, msg))
        except:
            # Try aggregating along a 'time' dimension (for POLCOMS, for example)
            try:
                rootgrp = MFDataset(file, 'r', aggdim='time')
            except IOError as msg:
                raise IOError('Unable to open file {} ({}). Aborting.'.format(file, msg))

    else:
        rootgrp = Dataset(file, 'r')

    # Create a dict of the dimension names and their current sizes
    dims = {}
    for key, var in list(rootgrp.dimensions.items()):
        # Make the dimensions ranges so we can use them to extract all the
        # values.
        dims[key] = '0:' + str(len(var))

    # Compare the dimensions in the NetCDF file with those provided. If we've
    # been given a dict of dimensions which differs from those in the NetCDF
    # file, then use those.
    if clipDims:
        commonKeys = set(dims).intersection(list(clipDims.keys()))
        for k in commonKeys:
            dims[k] = clipDims[k]

    if noisy:
        print("File format: {}".format(rootgrp.file_format))

    if not varList:
        varList = iter(list(rootgrp.variables.keys()))

    FVCOM = {}

    # Save the dimensions in the attributes dict.
    if atts:
        attributes = {}
        attributes['dims'] = dims
        attributes['global'] = {}
        for g in rootgrp.ncattrs():
            attributes['global'][g] = getattr(rootgrp, g)

    for key, var in list(rootgrp.variables.items()):
        if noisy:
            print('Found ' + key, end=' ')
            sys.stdout.flush()

        if key in varList:
            vDims = rootgrp.variables[key].dimensions

            toExtract = [dims[d] for d in vDims]

            # If we have no dimensions, we must have only a single value, in
            # which case set the dimensions to empty and append the function to
            # extract the value.
            if not toExtract:
                toExtract = '.getValue()'

            # Thought I'd finally figured out how to replace the eval approach,
            # but I still can't get past the indexing needed to be able to
            # subset the data.
            # FVCOM[key] = rootgrp.variables.get(key)[0:-1]
            # I know, I know, eval() is evil.
            getData = 'rootgrp.variables[\'{}\']{}'.format(key, str(toExtract).replace('\'', ''))
            FVCOM[key] = eval(getData)

            # Add the units and dimensions for this variable to the list of
            # attributes.
            if atts:
                attributes[key] = {}
                try:
                    attributes[key]['units'] = rootgrp.variables[key].units
                except:
                    pass

                try:
                    attributes[key]['dims'] = rootgrp.variables[key].dimensions
                except:
                    pass

            if noisy:
                if len(str(toExtract)) < 60:
                    print('(extracted {})'.format(str(toExtract).replace('\'', '')))
                else:
                    print('(extracted given indices)')

        elif noisy:
                print()

    # Close the open file.
    rootgrp.close()

    if atts:
        return FVCOM, attributes
    else:
        return FVCOM
    def __init__(self, filename=None, name=None, proj4=None):

        if filename is None:
            raise ValueError('Need filename as argument to constructor')

        filestr = str(filename)
        if name is None:
            self.name = filestr
        else:
            self.name = name

        try:
            # Open file, check that everything is ok
            logging.info('Opening dataset: ' + filestr)
            if ('*' in filestr) or ('?' in filestr) or ('[' in filestr):
                logging.info('Opening files with MFDataset')
                if has_xarray:
                    self.Dataset = xr.open_mfdataset(filename)
                else:
                    self.Dataset = MFDataset(filename)
            else:
                logging.info('Opening file with Dataset')
                if has_xarray:
                    self.Dataset = xr.open_dataset(filename)
                else:
                    self.Dataset = Dataset(filename, 'r')
        except Exception as e:
            raise ValueError(e)

        logging.debug('Finding coordinate variables.')
        if proj4 is not None:  # If user has provided a projection apriori
            self.proj4 = proj4
        # Find x, y and z coordinates
        for var_name in self.Dataset.variables:
            logging.debug('Parsing variable: ' + var_name)
            var = self.Dataset.variables[var_name]
            #if var.ndim > 1:
            #    continue  # Coordinates must be 1D-array
            if has_xarray:
                attributes = var.attrs
                att_dict = var.attrs
            else:
                attributes = var.ncattrs()
                att_dict = var.__dict__
            standard_name = ''
            long_name = ''
            axis = ''
            units = ''
            CoordinateAxisType = ''
            if not hasattr(self, 'proj4'):
                for att in attributes:
                    if 'proj4' in att:
                        if has_xarray:
                            self.proj4 = str(att_dict[att])
                        else:
                            self.proj4 = str(var.__getattr__(att))
                    else:
                        if 'grid_mapping_name' in att:
                            mapping_dict = att_dict
                            logging.debug(
                                ('Parsing CF grid mapping dictionary:'
                                 ' ' + str(mapping_dict)))
                            try:
                                self.proj4, proj =\
                                    proj_from_CF_dict(mapping_dict)
                            except:
                                logging.info('Could not parse CF grid_mapping')

            if 'standard_name' in attributes:
                standard_name = att_dict['standard_name']
            if 'long_name' in attributes:
                long_name = att_dict['long_name']
            if 'axis' in attributes:
                axis = att_dict['axis']
            if 'units' in attributes:
                units = att_dict['units']
            if '_CoordinateAxisType' in attributes:
                CoordinateAxisType = att_dict['_CoordinateAxisType']
            # has_xarray checks in each case below to avoid loading
            # data if it isn't a coord
            # is there a better way??
            if standard_name == 'longitude' or \
                    CoordinateAxisType == 'Lon' or \
                    long_name.lower() == 'longitude':
                if has_xarray:
                    var_data = var.values
                else:
                    var_data = var[:]
                self.lon = var_data
                lon_var_name = var_name
            if standard_name == 'latitude' or \
                    CoordinateAxisType == 'Lat' or \
                    long_name.lower() == 'latitude':
                if has_xarray:
                    var_data = var.values
                else:
                    var_data = var[:]
                self.lat = var_data
                lat_var_name = var_name
            if axis == 'X' or \
                    standard_name == 'projection_x_coordinate':
                self.xname = var_name
                # Fix for units; should ideally use udunits package
                if units == 'km':
                    unitfactor = 1000
                elif units == '100  km':
                    unitfactor = 100000
                else:
                    unitfactor = 1
                if has_xarray:
                    var_data = var.values
                else:
                    var_data = var[:]
                x = var_data * unitfactor
                self.numx = var_data.shape[0]
            if axis == 'Y' or \
                    standard_name == 'projection_y_coordinate':
                self.yname = var_name
                # Fix for units; should ideally use udunits package
                if units == 'km':
                    unitfactor = 1000
                elif units == '100  km':
                    unitfactor = 100000
                else:
                    unitfactor = 1
                self.unitfactor = unitfactor
                if has_xarray:
                    var_data = var.values
                else:
                    var_data = var[:]
                y = var_data * unitfactor
                self.numy = var_data.shape[0]
            if standard_name == 'depth' or axis == 'Z':
                if has_xarray:
                    var_data = var.values
                else:
                    var_data = var[:]
                if var_data.ndim == 1:
                    if 'positive' not in attributes or \
                            att_dict['positive'] == 'up':
                        self.z = var_data
                    else:
                        self.z = -var_data
            if standard_name == 'time' or axis == 'T' or var_name in [
                    'time', 'vtime'
            ]:
                # Read and store time coverage (of this particular file)
                if has_xarray:
                    var_data = var.values
                else:
                    var_data = var[:]
                time = var_data
                time_units = units
                if has_xarray:
                    self.times = [
                        datetime.utcfromtimestamp(
                            (OT - np.datetime64('1970-01-01T00:00:00Z')) /
                            np.timedelta64(1, 's')) for OT in time
                    ]
                else:
                    self.times = num2date(time, time_units)
                self.start_time = self.times[0]
                self.end_time = self.times[-1]
                if len(self.times) > 1:
                    self.time_step = self.times[1] - self.times[0]
                else:
                    self.time_step = None
            if standard_name == 'realization':
                if has_xarray:
                    var_data = var.values
                else:
                    var_data = var[:]
                self.realizations = var_data
                logging.debug('%i ensemble members available' %
                              len(self.realizations))

        if 'x' not in locals():
            if self.lon.ndim == 1:
                x = self.lon[:]
                self.xname = lon_var_name
                self.numx = len(x)
            else:
                raise ValueError('Did not find x-coordinate variable')
        if 'y' not in locals():
            if self.lat.ndim == 1:
                y = self.lat[:]
                self.yname = lat_var_name
                self.numy = len(y)
            else:
                raise ValueError('Did not find y-coordinate variable')

        if not hasattr(self, 'unitfactor'):
            self.unitfactor = 1
        if 'x' in locals() and 'y' in locals():
            self.xmin, self.xmax = x.min(), x.max()
            self.ymin, self.ymax = y.min(), y.max()
            self.delta_x = np.abs(x[1] - x[0])
            self.delta_y = np.abs(y[1] - y[0])
            rel_delta_x = (x[1::] - x[0:-1])
            rel_delta_x = np.abs(
                (rel_delta_x.max() - rel_delta_x.min()) / self.delta_x)
            rel_delta_y = (y[1::] - y[0:-1])
            rel_delta_y = np.abs(
                (rel_delta_y.max() - rel_delta_y.min()) / self.delta_y)
            if rel_delta_x > 0.05:  # Allow 5 % deviation
                print(rel_delta_x)
                print(x[1::] - x[0:-1])
                raise ValueError('delta_x is not constant!')
            if rel_delta_y > 0.05:
                print(rel_delta_y)
                print(y[1::] - y[0:-1])
                raise ValueError('delta_y is not constant!')
            self.x = x  # Store coordinate vectors
            self.y = y
        else:
            if hasattr(self, 'lon') and hasattr(self, 'lat'):
                logging.info('No projection found, using lon/lat arrays')
                self.xname = lon_var_name
                self.yname = lat_var_name
            else:
                raise ValueError(
                    'Neither x/y-coordinates or lon/lat arrays found')

        if not hasattr(self, 'proj4'):
            if self.lon.ndim == 1:
                logging.debug(
                    'Lon and lat are 1D arrays, assuming latong projection')
                self.proj4 = '+proj=latlong'
            elif self.lon.ndim == 2:
                logging.debug(
                    'Reading lon lat 2D arrays, since projection is not given')
                self.lon = self.lon[:]
                self.lat = self.lat[:]
                self.projected = False

        if hasattr(self, 'proj4') and 'latlong' in self.proj4 and hasattr(
                self, 'xmax') and self.xmax > 360:
            logging.info('Longitudes > 360 degrees, subtracting 360')
            self.xmin -= 360
            self.xmax -= 360
            self.x -= 360
            self.x -= 360

        # Find all variables having standard_name
        self.variable_mapping = {}
        for var_name in self.Dataset.variables:
            if var_name in [self.xname, self.yname, 'depth']:
                continue  # Skip coordinate variables
            var = self.Dataset.variables[var_name]
            if has_xarray:
                attributes = var.attrs
                att_dict = var.attrs
            else:
                attributes = var.ncattrs()
                att_dict = var.__dict__
            if 'standard_name' in attributes:
                standard_name = str(att_dict['standard_name'])
                if standard_name in self.variable_aliases:  # Mapping if needed
                    standard_name = self.variable_aliases[standard_name]
                self.variable_mapping[standard_name] = str(var_name)

        self.variables = list(self.variable_mapping.keys())

        # Run constructor of parent Reader class
        super(Reader, self).__init__()
Example #54
0
    def __init__(self, filename=None, name=None):
        if filename is None:
            raise ValueError('Filename is missing')
        filestr = str(filename)
        if name is None:
            self.name = filestr
        else:
            self.name = name

        # xarray currently does not handle this type of grid:
        # https://github.com/pydata/xarray/issues/2233

        self.timer_start("open dataset")
        logger.info('Opening dataset: ' + filestr)
        if ('*' in filestr) or ('?' in filestr) or ('[' in filestr):
            logger.info('Opening files with MFDataset')
            self.dataset = MFDataset(filename)
        else:
            logger.info('Opening file with Dataset')
            self.dataset = Dataset(filename, 'r')

        self.proj4 = '+proj=lonlat'

        logger.info('Reading grid and coordinate variables..')

        self.x, self.y = self.dataset['longitude'][:], self.dataset[
            'latitude'][:]

        ref_time = datetime.fromisoformat(self.dataset['time'].units[14:33])

        self.times = np.array([
            ref_time + timedelta(seconds=d.item())
            for d in self.dataset['time'][:]
        ])
        self.start_time = self.times[0]
        self.end_time = self.times[-1]
        # time steps are not constant

        self.xmin = np.min(self.x)
        self.xmax = np.max(self.x)
        self.ymin = np.min(self.y)
        self.ymax = np.max(self.y)

        # levels are the depth of the bottom of each layer. re-assign to middle of layer
        # for nearest interpolation.
        self.z = -self.dataset['level'][:]
        self.z = np.insert(self.z, 0, [0.])
        self.z = self.z[:-1] + (np.diff(self.z) / 2)
        assert len(self.z) == len(self.dataset['level'][:])
        self.zmin, self.zmax = np.min(self.z), 0.
        assert (self.z <= 0).all()

        self.variable_mapping = {}
        for var_name in self.dataset.variables:
            # skipping coordinate variables
            if var_name in ['time', 'longitude', 'latitude', 'levels']:
                continue

            var = self.dataset[var_name]
            if 'standard_name' in var.ncattrs():
                std_name = var.getncattr('standard_name')
                std_name = self.variable_aliases.get(std_name, std_name)
                self.variable_mapping[std_name] = str(var_name)

        self.variables = list(self.variable_mapping.keys())

        # Run constructor of parent Reader class
        super().__init__()

        self.boundary = self._build_boundary_polygon_(self.x.compressed(),
                                                      self.y.compressed())

        self.timer_start("build index")
        logger.debug("building index of nodes..")
        self.nodes_idx = self._build_ckdtree_(self.x, self.y)
        self.timer_end("build index")

        self.timer_end("open dataset")
Example #55
0
def getMFNcVar(nc_files, keys):
    ''' Extract variables from a dataset across multiple netCDF files.
    
    This function gets the variable contained in a netCDF file 
    and return them into Python nested dictionaries. The first
    dictionary's key contains the longname, while the
    second dictionary contains values, standard name (CF),
    units and the missing data flag.
    
    Args:
        nc_file (str): A name (path) of a netCDF file
        keys (list): A list of keys to fetch the variables according
            to the CF standard
    
    Returns:
        dict_out (dict): A dictionary containing the standard names as keys and
            the associated data as values.
    '''
    # Import the package
    from netCDF4 import MFDataset
    # Open the netCDF files
    nc_fid = MFDataset(nc_files)
    # Get the variable names
    nc_vars = [var for var in nc_fid.variables]

    #Make empty lists to collect the info
    #longname (should be using the CF conventions)
    nc_vars_longname = []
    #Units
    nc_vars_units = []
    # Get the standard name
    nc_vars_standardname = []
    #Corrections
    nc_vars_scale_factor = []
    nc_vars_add_offset = []
    #Missing values
    nc_vars_missing_value = []

    for vars in nc_vars:
        if 'long_name' in nc_fid.variables[vars].ncattrs():
            nc_vars_longname.append(
                getattr(nc_fid.variables[vars], 'long_name'))
        else:
            nc_vars_longname.append(vars)
        if 'units' in nc_fid.variables[vars].ncattrs():
            nc_vars_units.append(getattr(nc_fid.variables[vars], 'units'))
        else:
            nc_vars_units.append('NA')
        if 'standard_name' in nc_fid.variables[vars].ncattrs():
            nc_vars_standardname.append(
                getattr(nc_fid.variables[vars], 'standard_name'))
        else:
            nc_vars_standardname.append("NA")
        if 'scale_factor' in nc_fid.variables[vars].ncattrs():
            nc_vars_scale_factor.append(
                getattr(nc_fid.variables[vars], 'scale_factor'))
        else:
            nc_vars_scale_factor.append(1)
        if 'add_offset' in nc_fid.variables[vars].ncattrs():
            nc_vars_add_offset.append(
                getattr(nc_fid.variables[vars], 'add_offset'))
        else:
            nc_vars_add_offset.append(0)
        if 'missing_value' in nc_fid.variables[vars].ncattrs():
            nc_vars_missing_value.append(
                getattr(nc_fid.variables[vars], 'missing_value'))
        else:
            nc_vars_missing_value.append('NA')
    # Check for the list against the desired variables and output.
    dict_out = {}
    for name in nc_vars_longname:
        if name in keys:
            f = {
                'values': [],
                'units': [],
                'missing_value': [],
                'standard_name': {}
            }
            idx = nc_vars_longname.index(name)
            f['values']=(nc_fid.variables[nc_vars[idx]][:]*nc_vars_scale_factor[idx])\
                +nc_vars_add_offset[idx]
            f['units'] = nc_vars_units[idx]
            f['missing_value'] = nc_vars_missing_value[idx]
            f['standard_name'] = nc_vars_standardname[idx]
            dict_out[name] = f

    return dict_out