def get_time_nc(nc_file, tv='time'): """ returns all timestamps of given netcdf file as datetime list. :param nc_file: NetCDF file(s) :param tv: name of temporal dimension :return format: netcdftime._datetime.datetime """ from netCDF4 import MFDataset, num2date ds = MFDataset(nc_file) try: time = ds.variables[tv] except: tv = 'time_counter' ds.close() try: ds = MFDataset(nc_file) time = ds.variables[tv] if (hasattr(time, 'units') and hasattr(time, 'calendar')) == True: timestamps = num2date(time[:], time.units, time.calendar) elif hasattr(time, 'units'): timestamps = num2date(time[:], time.units) else: timestamps = num2date(time[:]) ds.close() except Exception as e: raise Exception return timestamps
def ice_comp_model_to_osi(pathToModel, modelYear, modelIteration, boundLat, pathToOSI, param='area', threshold=0.15): ''' Plot sea ice area from satellite data and several model iterations ''' fsat = MFDataset(pathToOSI + '/OSI' + modelYear + '??.nc') osi_lat = fsat.variables['lat'][:] osi_lon = fsat.variables['lon'][:] osi_ice = fsat.variables['ice_conc'][0, :, :] area_osi = np.ones(osi_ice.shape) * 100 osi_area = [] for mm in range(12): if param == 'area': osi_area.append(calc_area(fsat.variables['ice_conc'][mm,:,:]/100,\ area_osi, osi_lat, blat=boundLat, threshold=threshold)/1e6) elif param == 'extent': osi_area.append(calc_extent(fsat.variables['ice_conc'][mm,:,:]/100,\ area_osi, osi_lat, blat=boundLat, threshold=threshold)/1e6) g = Dataset('./grid.cdf') dxc = g.variables['dxc'][0, :, :] dyc = g.variables['dyc'][0, :, :] lat = g.variables['yc'][0, :, :] dxcXdyc = dxc * dyc area_model = np.zeros((len(modelIteration), 12)) for (it, iteration) in enumerate(modelIteration): fm = MFDataset(pathToModel + '/' + modelYear + '/' + 'it' + str(iteration) + '/fw/*.cdf') for mm in range(12): if param == 'area': area_model[it,mm] = calc_area(fm.variables['area'][mm,:,:],\ dxcXdyc, lat, blat=boundLat,threshold=threshold)/10e11 elif param == 'extent': area_model[it,mm] = calc_extent(fm.variables['area'][mm,:,:],\ dxcXdyc, lat, blat=boundLat,threshold=threshold)/10e11 fm.close() dates = pd.date_range(modelYear + '-01', str(int(modelYear) + 1) + '-01', freq='M') dd = pd.DataFrame(index=dates) dd['Satellite'] = osi_area for (it, iteration) in enumerate(modelIteration): dd['it' + str(iteration)] = area_model[it, :] return dd.plot(figsize=(10, 5), lw=3)
def _read_zcoord(self): """ Read z coordinate data from netcdf file(s) """ if self.surface_field: self.z = None else: nc = MFDataset(self.f) self.z = np.abs(nc.variables[self.zcoord][:]) nc.close()
def ice_comp_model_to_osi_table(pathToModel, modelYears, modelIteration, boundLat, pathToOSI, param='area', threshold=0.15): diff_array = numpy.zeros((len(modelYears), 12)) for (nnum, yyear) in enumerate(modelYears): fsat = MFDataset(pathToOSI + '/OSI' + yyear + '??.nc') osi_lat = fsat.variables['lat'][:] osi_lon = fsat.variables['lon'][:] osi_ice = fsat.variables['ice_conc'][0, :, :] area_osi = np.ones(osi_ice.shape) * 100 osi_area = [] for mm in range(12): if param == 'area': osi_area.append(calc_area(fsat.variables['ice_conc'][mm,:,:]/100,\ area_osi, osi_lat, blat=boundLat, threshold=threshold)/1e6) elif param == 'extent': osi_area.append(calc_extent(fsat.variables['ice_conc'][mm,:,:]/100,\ area_osi, osi_lat, blat=boundLat, threshold=threshold)/1e6) g = Dataset('./grid.cdf') dxc = g.variables['dxc'][0, :, :] dyc = g.variables['dyc'][0, :, :] lat = g.variables['yc'][0, :, :] dxcXdyc = dxc * dyc area_model = np.zeros((len(modelIteration), 12)) if modelIteration[0] == 'last': gg = glob.glob(pathToModel + '/' + yyear + '/' + 'it*') gg.sort() lastit = [int(gg[-1].split('/')[-1].split('t')[-1])] else: lastit = modelIteration for (it, iteration) in enumerate(lastit): fm = MFDataset(pathToModel + '/' + yyear + '/' + 'it' + str(iteration) + '/fw/*.cdf') for mm in range(12): if param == 'area': area_model[it,mm] = calc_area(fm.variables['area'][mm,:,:],\ dxcXdyc, lat, blat=boundLat)/10e11 elif param == 'extent': area_model[it,mm] = calc_extent(fm.variables['area'][mm,:,:],\ dxcXdyc, lat, blat=boundLat)/10e11 fm.close() diff_array[nnum, :] = area_model[0, :] - osi_area[:] return diff_array
def get_climatologic_field(self, varname="mrro", gcm="", rcm="", start_year=None, end_year=None, months=None): """ for time t: start_year <= t <= end_year """ mfds = MFDataset("{0}/{1}-{2}/current/{3}_*.nc".format( self.folder_with_nc_data, gcm, rcm, varname)) self.lon2d = mfds.variables[self.lon_name][:].transpose() self.lat2d = mfds.variables[self.lat_name][:].transpose() self._init_kd_tree() cache_file = self._get_clim_cache_file_path(varname=varname, gcm=gcm, rcm=rcm, start_year=start_year, end_year=end_year, months=months) cache_file = os.path.join(self.cache_files_folder, cache_file) if os.path.isfile(cache_file): f = open(cache_file) mfds.close() return pickle.load(f) t = mfds.variables["time"] t_units = t.units t_calendar = t.calendar t_start = date2num(datetime(start_year, 1, 1), t_units, calendar=t_calendar) t_end = date2num(datetime(end_year + 1, 1, 1), t_units, calendar=t_calendar) t = t[:] t_sel = t[(t_start <= t) & (t < t_end)] dates_sel = num2date(t_sel, t_units, calendar=t_calendar) bool_vect = np.array([x.month in months for x in dates_sel], dtype=np.bool) data_sel = mfds.variables[varname][np.where((t_start <= t) & (t < t_end))[0], :, :] #save results to a cache file for reuse result = data_sel[bool_vect, :, :].mean(axis=0).transpose() pickle.dump(result, open(cache_file, "w")) mfds.close() return result #because in the file the axes are inversed
def ice_comp_model_to_sat_table_rm(pathToModel, modelYears, modelIteration,\ boundLat, pathToOSI, param): diff_array = numpy.zeros((len(modelYears), 12)) for (nnum, yyear) in enumerate(modelYears): g = Dataset('./grid.cdf') dxc = g.variables['dxc'][0,:,:] dyc = g.variables['dyc'][0,:,:] lat = g.variables['yc'][0,:,:] topo = g.variables['topo'][0,:,:] dxcXdyc = dxc*dyc #area_model=np.zeros((len(modelIteration), 12)) if modelIteration[0] == 'last': gg = glob.glob(pathToModel+'/'+yyear+'/'+'it*') gg.sort() lastit = [int(gg[-1].split('/')[-1].split('t')[-1])] else: lastit = modelIteration for (it, iteration) in enumerate(lastit): fm = MFDataset(pathToModel+'/'+yyear+'/'+'it'+str(iteration)+'/fw/*.cdf') fsat = MFDataset(pathToOSI+yyear+'??.nc') for mm in range(12): if param == 'area': aa_model = np.ma.filled(fm.variables['area'][mm,:,:], 0) * dxcXdyc bb_satel = (fsat.variables['ice'][mm,:,:]) * dxcXdyc cc_diff = aa_model - bb_satel diff_array[nnum,mm] = np.sqrt(cc_diff**2).sum() if param == 'extent': dmodel = np.ma.filled(fm.variables['area'][mm,:,:], 0) dmodel[dmodel<0.15] = 0 dmodel[dmodel>=0.15] = 1 aa_model = dmodel * dxcXdyc dsat = fsat.variables['ice'][mm,:,:] dsat[dsat<0.15] = 0 dsat[dsat>=0.15] = 1 bb_satel = dsat * dxcXdyc cc_diff = aa_model - bb_satel diff_array[nnum,mm] = np.sqrt(cc_diff**2).sum() fm.close() fsat.close() return diff_array
def _read_ycoord(self): """ Read latitude data in decimal degrees from netcdf file(s) """ nc = MFDataset(self.f) ncvar = nc.variables[self.ycoord] if ncvar.ndim == 2: self.y = ncvar[self.j1:self.j2+1,self.i1:self.i2+1] else: # Dummy j-index for averaging self.y = ncvar[self.i1:self.i2+1][np.newaxis] nc.close()
def _read_mask(self): """ Read mask data from netcdf file(s) """ nc = MFDataset(self.maskf) ncvar = nc.variables[self.maskvar] if ncvar.ndim == 4: # If time dim present in mask field mask = ncvar[0,:,self.j1:self.j2+1, self.i1:self.i2+1] == self.maskmdi else: mask = ncvar[:,self.j1:self.j2+1, self.i1:self.i2+1] == self.maskmdi nc.close() return mask
def ice_comp_model_to_sat_table_rm(pathToModel, modelYears, modelIteration,\ boundLat, pathToOSI, param): diff_array = numpy.zeros((len(modelYears), 12)) for (nnum, yyear) in enumerate(modelYears): g = Dataset('./grid.cdf') dxc = g.variables['dxc'][0, :, :] dyc = g.variables['dyc'][0, :, :] lat = g.variables['yc'][0, :, :] topo = g.variables['topo'][0, :, :] dxcXdyc = dxc * dyc #area_model=np.zeros((len(modelIteration), 12)) if modelIteration[0] == 'last': gg = glob.glob(pathToModel + '/' + yyear + '/' + 'it*') gg.sort() lastit = [int(gg[-1].split('/')[-1].split('t')[-1])] else: lastit = modelIteration for (it, iteration) in enumerate(lastit): fm = MFDataset(pathToModel + '/' + yyear + '/' + 'it' + str(iteration) + '/fw/*.cdf') fsat = MFDataset(pathToOSI + yyear + '??.nc') for mm in range(12): if param == 'area': aa_model = np.ma.filled(fm.variables['area'][mm, :, :], 0) * dxcXdyc bb_satel = (fsat.variables['ice'][mm, :, :]) * dxcXdyc cc_diff = aa_model - bb_satel diff_array[nnum, mm] = np.sqrt(cc_diff**2).sum() if param == 'extent': dmodel = np.ma.filled(fm.variables['area'][mm, :, :], 0) dmodel[dmodel < 0.15] = 0 dmodel[dmodel >= 0.15] = 1 aa_model = dmodel * dxcXdyc dsat = fsat.variables['ice'][mm, :, :] dsat[dsat < 0.15] = 0 dsat[dsat >= 0.15] = 1 bb_satel = dsat * dxcXdyc cc_diff = aa_model - bb_satel diff_array[nnum, mm] = np.sqrt(cc_diff**2).sum() fm.close() fsat.close() return diff_array
def _read_data(self): """ Read data from netcdf file(s) """ nc = MFDataset(self.f) ncvar = nc.variables[self.var] if self.surface_field: self.data = ncvar[:, self.j1:self.j2+1, self.i1:self.i2+1] if self.data.ndim != 3: raise ShapeError('Surface fields must have dimensions [t,y,x]') else: self.data = ncvar[:, :, self.j1:self.j2+1, self.i1:self.i2+1] if self.data.ndim != 4: raise ShapeError('Section fields must have dimensions [t,z,y,x]') nc.close()
def ice_comp_model_to_osi_table(pathToModel, modelYears, modelIteration, boundLat, pathToOSI, param = 'area', threshold=0.15): diff_array = numpy.zeros((len(modelYears), 12)) for (nnum, yyear) in enumerate(modelYears): fsat = MFDataset(pathToOSI+'/OSI'+yyear+'??.nc') osi_lat = fsat.variables['lat'][:] osi_lon = fsat.variables['lon'][:] osi_ice = fsat.variables['ice_conc'][0,:,:] area_osi = np.ones(osi_ice.shape)*100 osi_area = [] for mm in range(12): if param == 'area': osi_area.append(calc_area(fsat.variables['ice_conc'][mm,:,:]/100,\ area_osi, osi_lat, blat=boundLat, threshold=threshold)/1e6) elif param == 'extent': osi_area.append(calc_extent(fsat.variables['ice_conc'][mm,:,:]/100,\ area_osi, osi_lat, blat=boundLat, threshold=threshold)/1e6) g = Dataset('./grid.cdf') dxc = g.variables['dxc'][0,:,:] dyc = g.variables['dyc'][0,:,:] lat = g.variables['yc'][0,:,:] dxcXdyc = dxc*dyc area_model=np.zeros((len(modelIteration), 12)) if modelIteration[0] == 'last': gg = glob.glob(pathToModel+'/'+yyear+'/'+'it*') gg.sort() lastit = [int(gg[-1].split('/')[-1].split('t')[-1])] else: lastit = modelIteration for (it, iteration) in enumerate(lastit): fm = MFDataset(pathToModel+'/'+yyear+'/'+'it'+str(iteration)+'/fw/*.cdf') for mm in range(12): if param == 'area': area_model[it,mm] = calc_area(fm.variables['area'][mm,:,:],\ dxcXdyc, lat, blat=boundLat)/10e11 elif param == 'extent': area_model[it,mm] = calc_extent(fm.variables['area'][mm,:,:],\ dxcXdyc, lat, blat=boundLat)/10e11 fm.close() diff_array[nnum,:] = area_model[0,:]-osi_area[:] return diff_array
def ice_comp_model_to_osi(pathToModel, modelYear, modelIteration, boundLat, pathToOSI, param = 'area', threshold=0.15): ''' Plot sea ice area from satellite data and several model iterations ''' fsat = MFDataset(pathToOSI+'/OSI'+modelYear+'??.nc') osi_lat = fsat.variables['lat'][:] osi_lon = fsat.variables['lon'][:] osi_ice = fsat.variables['ice_conc'][0,:,:] area_osi = np.ones(osi_ice.shape)*100 osi_area = [] for mm in range(12): if param == 'area': osi_area.append(calc_area(fsat.variables['ice_conc'][mm,:,:]/100,\ area_osi, osi_lat, blat=boundLat, threshold=threshold)/1e6) elif param == 'extent': osi_area.append(calc_extent(fsat.variables['ice_conc'][mm,:,:]/100,\ area_osi, osi_lat, blat=boundLat, threshold=threshold)/1e6) g = Dataset('./grid.cdf') dxc = g.variables['dxc'][0,:,:] dyc = g.variables['dyc'][0,:,:] lat = g.variables['yc'][0,:,:] dxcXdyc = dxc*dyc area_model=np.zeros((len(modelIteration), 12)) for (it, iteration) in enumerate(modelIteration): fm = MFDataset(pathToModel+'/'+modelYear+'/'+'it'+str(iteration)+'/fw/*.cdf') for mm in range(12): if param == 'area': area_model[it,mm] = calc_area(fm.variables['area'][mm,:,:],\ dxcXdyc, lat, blat=boundLat,threshold=threshold)/10e11 elif param == 'extent': area_model[it,mm] = calc_extent(fm.variables['area'][mm,:,:],\ dxcXdyc, lat, blat=boundLat,threshold=threshold)/10e11 fm.close() dates = pd.date_range(modelYear+'-01', str(int(modelYear)+1)+'-01', freq='M') dd = pd.DataFrame(index=dates) dd['Satellite']=osi_area for (it , iteration) in enumerate(modelIteration): dd['it'+str(iteration)]=area_model[it,:] return dd.plot(figsize=(10,5), lw = 3)
def get_climatologic_field(self, varname = "mrro", gcm = "", rcm = "", start_year = None, end_year = None, months = None ): """ for time t: start_year <= t <= end_year """ mfds = MFDataset("{0}/{1}-{2}/current/{3}_*.nc".format(self.folder_with_nc_data, gcm, rcm, varname)) self.lon2d = mfds.variables[self.lon_name][:].transpose() self.lat2d = mfds.variables[self.lat_name][:].transpose() self._init_kd_tree() cache_file = self._get_clim_cache_file_path(varname = varname, gcm=gcm, rcm = rcm, start_year=start_year, end_year=end_year, months=months) cache_file = os.path.join(self.cache_files_folder, cache_file) if os.path.isfile(cache_file): f = open(cache_file) mfds.close() return pickle.load(f) t = mfds.variables["time"] t_units = t.units t_calendar = t.calendar t_start = date2num(datetime(start_year, 1,1), t_units, calendar=t_calendar) t_end = date2num(datetime(end_year+1, 1,1), t_units, calendar=t_calendar) t = t[:] t_sel = t[(t_start <= t) & (t < t_end)] dates_sel = num2date(t_sel, t_units, calendar=t_calendar) bool_vect = np.array( [x.month in months for x in dates_sel], dtype=np.bool ) data_sel = mfds.variables[varname][ np.where( (t_start <= t) & (t < t_end) )[0],:,:] #save results to a cache file for reuse result = data_sel[bool_vect,:,:].mean(axis = 0).transpose() pickle.dump(result, open(cache_file,"w")) mfds.close() return result #because in the file the axes are inversed
def get_data(path): if os.path.isfile(path+'/ocean.stats.nc'): if os.path.isfile(path+'/out1/ocean.stats.nc'): s=MFDataset(path+'/out?/ocean.stats.nc') else: s=Dataset(path+'/ocean.stats.nc') else: s=MFDataset(path+'/out?/ocean.stats.nc') print 'Reading exp', path tmp = 365*2 # 2 years dS_dt=(s.variables['Salt'][-1]-s.variables['Salt'][-tmp])*0.5 # kg/year dT_dt=(s.variables['Heat'][-1]-s.variables['Heat'][-tmp])*0.5 # J/year s.close() return dS_dt, dT_dt
def getCruData(month, dimLon, dimLat, cruPath): from netCDF4 import MFDataset import numpy as np #dimLon = np.array((-30.25, 50.25)) #dimLat = np.array((30.25, 70.25)) # account python 0 indexing month = int(month) - 1 # open netCDF file nc = MFDataset(cruPath) # map lat lon data to file cru ncdf indexing schema lonStart = np.where(nc.variables['lon'][:] == snapToGrid(dimLon[0]))[0][0]+1 lonEnd = np.where(nc.variables['lon'][:] == snapToGrid(dimLon[1]))[0][0]-1 lonDim = lonEnd - lonStart latStart = np.where(nc.variables['lat'][:] == snapToGrid(dimLat[0]))[0][0]+1 latEnd = np.where(nc.variables['lat'][:] == snapToGrid(dimLat[1]))[0][0]-1 latDim = latEnd - latStart # get number of available years for every month yearsAvailable = len(nc.dimensions['time']) / 12 # predefine month array like 100 81 161 monthData = np.empty((yearsAvailable, latDim, lonDim)) monthData[:] = np.nan for i in range(0, yearsAvailable): # read data from file for given month monthData[i,:,:] = nc.variables['tmp'][month,latStart:latEnd,lonStart:lonEnd] month = month + 12 # to stay consitent with old array schema monthData = monthData.T # get vector of lat lon data longitude = nc.variables['lon'][lonStart:lonEnd] latitude = nc.variables['lat'][latStart:latEnd] nc.close() # set na values monthData[monthData > 100] = np.nan return monthData, longitude, latitude
class EcoFOCI_mfnetCDF(object): def __init__(self, file_name=None, aggdim=None): """Initialize opening of multiple netcdf files along same dimension (aggdim) in same path. Parameters ---------- file_name : str full path to file on disk (with wildcards) aggdim : str dimesion name to aggregate along. Slowest varying dimension or unlimited dimension will be choosen if no option is passed. """ self.nchandle = MFDataset(file_name,'a',aggdim=aggdim) self.file_name = file_name def get_global_atts(self): g_atts = {} att_names = self.nchandle.ncattrs() for name in att_names: g_atts[name] = self.nchandle.getncattr(name) return g_atts def get_vars(self): self.variables = self.nchandle.variables return self.nchandle.variables def ncreadfile_dic(self): data = {} for j, v in enumerate(self.nchandle.variables): if v in self.nchandle.variables.keys(): #check for nc variable data[v] = self.nchandle.variables[v][:] else: #if parameter doesn't exist fill the array with zeros data[v] = None return (data) def close(self): self.nchandle.close()
def get_data(path): if os.path.isfile(path + '/prog.nc'): if os.path.isfile(path + '/out1/prog.nc'): s = MFDataset(path + '/out?/prog.nc') tmp = 730 else: s = Dataset(path + '/prog.nc') tmp = len(s.variables['time'][:]) else: s = MFDataset(path + '/out?/prog.nc') tmp = 730 print 'Reading exp', path MLD = (s.variables['ePBL_h_ML'][-tmp::, :]).mean() # m s.close() return MLD
def get_timerange(resource): """ returns from/to timestamp of given netcdf file(s). :param resource: list of path(s) to netCDF file(s) :returns netcdf.datetime.datetime: start, end """ start = end = None if type(resource) != list: resource = [resource] LOGGER.debug('length of recources: %s files' % len(resource)) try: if len(resource) > 1: ds = MFDataset(resource) LOGGER.debug('MFDataset loaded for %s of files in resource:' % len(resource)) else: ds = Dataset(resource[0]) LOGGER.debug('Dataset loaded for %s file in resource:' % len(resource)) time = ds.variables['time'] if (hasattr(time, 'units') and hasattr(time, 'calendar')) is True: s = num2date(time[0], time.units, time.calendar) e = num2date(time[-1], time.units, time.calendar) elif hasattr(time, 'units'): s = num2date(time[0], time.units) e = num2date(time[-1], time.units) else: s = num2date(time[0]) e = num2date(time[-1]) # TODO: include frequency start = '%s%s%s' % (s.year, str(s.month).zfill(2), str(s.day).zfill(2)) end = '%s%s%s' % (e.year, str(e.month).zfill(2), str(e.day).zfill(2)) ds.close() except Exception: msg = 'failed to get time range' LOGGER.exception(msg) ds.close() raise Exception(msg) return start, end
def _read_xcoord(self): """ Read longitude data in decimal degrees from netcdf file(s). """ nc = MFDataset(self.f) ncvar = nc.variables[self.xcoord] if ncvar.ndim == 2: self.x = ncvar[self.j1:self.j2+1,self.i1:self.i2+1] else: # Dummy j-index for averaging self.x = ncvar[self.i1:self.i2+1][np.newaxis] # Set range -180 to 180 if self.x.max() > 180: self.x[self.x > 180] = self.x[self.x > 180] - 360 nc.close()
def latlon_plot(args, ncfile, grd, variable): nc = MFDataset(ncfile) time = nc.variables['time'][:] / 365. tm = len(time) for var in nc.variables: if var == variable: print("### About to plot variable {} ### \n".format(var)) if var == 'SSH': clim = [-2, 2] elif var == 'SSS': clim = [30.75, 38.0] elif var == 'SST': clim = [-1.5, 31] elif var == 'KPP_OBLdepth': clim = [0, 500] elif var == 'MEKE': clim = [0, 0.3] elif var == 'MLD_003': clim = [0, 2000] for t in range(tm): filename = str('PNG/%s_%05d.png' % (var, t)) if os.path.isfile(filename): print( "File {} already exists! Moving to the next one...\n". format(filename)) else: print("time index {} of {}".format(t, tm)) data = nc.variables[var][t, :] units = nc.variables[var].units #TODO: convert days to date xyplot(data, grd.geolon, grd.geolat, area=grd.Ah, suptitle=case_name, title=r'%s, [%s] - Year: %5.1f' % (var, units, time[t]), extend='both', clim=clim, save=filename) nc.close() return
def get_timerange(resource): """ returns from/to timestamp of given netcdf file(s). :param resource: list of path(s) to netCDF file(s) :returns netcdf.datetime.datetime: start, end """ start = end = None if type(resource) != list: resource = [resource] print resource try: if len(resource) > 1: ds = MFDataset(resource) time = ds.variables['time'] else: ds = Dataset(resource[0]) time = ds.variables['time'] if (hasattr(time, 'units') and hasattr(time, 'calendar')) == True: s = num2date(time[0], time.units, time.calendar) e = num2date(time[-1], time.units, time.calendar) elif hasattr(time, 'units'): s = num2date(time[0], time.units) e = num2date(time[-1], time.units) else: s = num2date(time[0]) e = num2date(time[-1]) ##TODO: include frequency start = '%s%s%s' % (s.year, str(s.month).zfill(2), str(s.day).zfill(2)) end = '%s%s%s' % (e.year, str(e.month).zfill(2), str(e.day).zfill(2)) ds.close() except Exception as e: msg = 'failed to get time range: %s ' % e logger.exception(msg) raise Exception(msg) return start, end
def get_time(resource, format=None): """ returns all timestamps of given netcdf file as datetime list. :param resource: NetCDF file(s) :param format: if a format is provided (e.g format='%Y%d%m'), values will be converted to string :return : list of timesteps """ if type(resource) != list: resource = [resource] try: if len(resource) > 1: ds = MFDataset(resource) time = ds.variables['time'] else: ds = Dataset(resource[0]) time = ds.variables['time'] except: msg = 'failed to get time' LOGGER.exception(msg) raise Exception(msg) try: if (hasattr(time, 'units') and hasattr(time, 'calendar')) is True: timestamps = num2date(time[:], time.units, time.calendar) elif hasattr(time, 'units'): timestamps = num2date(time[:], time.units) else: timestamps = num2date(time[:]) ds.close() try: if format is not None: timestamps = [t.strftime(format=format) for t in timestamps] except: msg = 'failed to convert times to string' print msg LOGGER.debug(msg) except: msg = 'failed to convert time' LOGGER.exception(msg) raise Exception(msg) return timestamps
def get_time(resource, format=None): """ returns all timestamps of given netcdf file as datetime list. :param resource: NetCDF file(s) :param format: if a format is provided (e.g format='%Y%d%m'), values will be converted to string :return : list of timesteps """ if type(resource) != list: resource = [resource] try: if len(resource) > 1: ds = MFDataset(resource) time = ds.variables["time"] else: ds = Dataset(resource[0]) time = ds.variables["time"] except: msg = "failed to get time" logger.exception(msg) raise Exception(msg) try: if (hasattr(time, "units") and hasattr(time, "calendar")) == True: timestamps = num2date(time[:], time.units, time.calendar) elif hasattr(time, "units"): timestamps = num2date(time[:], time.units) else: timestamps = num2date(time[:]) ds.close() try: if format != None: timestamps = [t.strftime(format=format) for t in timestamps] except: msg = "failed to convert times to string" print msg logger.debug(msg) except: msg = "failed to convert time" logger.exception(msg) raise Exception(msg) return timestamps
def get_timerange(resource): """ returns from/to timestamp of given netcdf file(s). :param resource: list of path(s) to netCDF file(s) :returns netcdf.datetime.datetime: start, end """ start = end = None if type(resource) != list: resource = [resource] print resource try: if len(resource) > 1: ds = MFDataset(resource) time = ds.variables["time"] else: ds = Dataset(resource[0]) time = ds.variables["time"] if (hasattr(time, "units") and hasattr(time, "calendar")) == True: s = num2date(time[0], time.units, time.calendar) e = num2date(time[-1], time.units, time.calendar) elif hasattr(time, "units"): s = num2date(time[0], time.units) e = num2date(time[-1], time.units) else: s = num2date(time[0]) e = num2date(time[-1]) ##TODO: include frequency start = "%s%s%s" % (s.year, str(s.month).zfill(2), str(s.day).zfill(2)) end = "%s%s%s" % (e.year, str(e.month).zfill(2), str(e.day).zfill(2)) ds.close() except Exception as e: msg = "failed to get time range: %s " % e logger.exception(msg) raise Exception(msg) return start, end
def runTest(self): """testing multi-file dataset access""" f = MFDataset(self.files,check=True) assert f.history == 'created today' assert_array_equal(np.arange(0,nx),f.variables['x'][:]) varin = f.variables['data'] datin = varin[:] assert_array_equal(datin.mask,data.mask) varin.set_auto_maskandscale(False) data2 = data.filled() assert varin.long_name == 'phony data' assert len(varin) == nx assert varin.shape == (nx,ydim,zdim) assert varin.dimensions == ('x','y','z') assert_array_equal(varin[4:-4:4,3:5,2:8],data2[4:-4:4,3:5,2:8]) assert varin[0,0,0] == data2[0,0,0] assert_array_equal(varin[:],data2) assert getattr(varin,'nonexistantatt',None) == None f.close()
def getCruData(NCFILE, month, dimLon, dimLat): from netCDF4 import MFDataset import numpy as np # account python 0 indexing month = month - 1 # open netCDF file nc = MFDataset(NCFILE) # map lat lon data to file cru ncdf indexing schema lonStart = np.where(nc.variables['lon'][:] == dimLon[0])[0][0] lonEnd = np.where(nc.variables['lon'][:] == dimLon[1])[0][0] lonDim = (lonEnd - lonStart) latStart = np.where(nc.variables['lat'][:] == dimLat[0])[0][0] latEnd = np.where(nc.variables['lat'][:] == dimLat[1])[0][0] latDim = (latEnd - latStart) # get number of available years for every month yearsAvailable = len(nc.dimensions['time']) / 12 # predefine month array like 100 81 161 monthData = np.empty((yearsAvailable, latDim, lonDim)) monthData[:] = np.nan for i in range(0, yearsAvailable): # read data from file for given month monthData[i, :, :] = nc.variables['tmp'][month, latStart:latEnd, lonStart:lonEnd] month = month + 12 # set na value monthData[monthData > 100] = np.nan # get vector of lat lon data longitude = nc.variables['lon'][lonStart:lonEnd] latitude = nc.variables['lat'][latStart:latEnd] nc.close() return monthData, longitude, latitude
def getCruData(NCFILE, month, dimLon, dimLat): from netCDF4 import MFDataset import numpy as np # account python 0 indexing month = month - 1 # open netCDF file nc = MFDataset(NCFILE) # map lat lon data to file cru ncdf indexing schema lonStart = np.where(nc.variables['lon'][:] == dimLon[0])[0][0] lonEnd = np.where(nc.variables['lon'][:] == dimLon[1])[0][0] lonDim = (lonEnd - lonStart) latStart = np.where(nc.variables['lat'][:] == dimLat[0])[0][0] latEnd = np.where(nc.variables['lat'][:] == dimLat[1])[0][0] latDim = (latEnd - latStart) # get number of available years for every month yearsAvailable = len(nc.dimensions['time']) / 12 # predefine month array like 100 81 161 monthData = np.empty((yearsAvailable, latDim, lonDim)) monthData[:] = np.nan for i in range(0, yearsAvailable): # read data from file for given month monthData[i,:,:] = nc.variables['tmp'][month,latStart:latEnd,lonStart:lonEnd] month = month + 12 # set na value monthData[monthData > 100] = np.nan # get vector of lat lon data longitude = nc.variables['lon'][lonStart:lonEnd] latitude = nc.variables['lat'][latStart:latEnd] nc.close() return monthData, longitude, latitude
def get_all_data(files, vname, options): """get_all_data loads all temperature data from a netcdf file. Returns temp - data in (time, x, y) coordinates. lats - latitudes""" if any([(wildcard in files) for wildcard in ['*', '?', '[']]): tempnc = MFDataset(files, 'r') else: tempnc = Dataset(files, 'r') temp = tempnc.variables[vname][:] if len(temp.shape) == 4: temp = temp.squeeze() # Test for increasing latitude and flip if decreasing latnames = ('lat', 'lats', 'latitude', 'latitudes') latkey = [vrbl in latnames for vrbl in tempnc.variables.keys()].index(True) latvname = list(tempnc.variables.keys())[latkey] lats = tempnc.variables[latvname][:] if (lats[-1] - lats[0]) < 0: lats = np.flipud(lats) if tempnc.variables[vname].units == 'K': temp -= 273.15 tempnc.close() return temp, lats
def runTest(self): """testing multi-file dataset access""" f = MFDataset(self.files,check=True) f.set_auto_maskandscale(True) # issue570 f.set_always_mask(False) assert f.history == 'created today' assert_array_equal(np.arange(0,nx),f.variables['x'][:]) varin = f.variables['data'] datin = varin[:] assert_array_equal(datin.mask,data.mask) varin.set_auto_maskandscale(False) data2 = data.filled() assert varin.long_name == 'phony data' assert len(varin) == nx assert varin.shape == (nx,ydim,zdim) assert varin.dimensions == ('x','y','z') assert_array_equal(varin[4:-4:4,3:5,2:8],data2[4:-4:4,3:5,2:8]) assert varin[0,0,0] == data2[0,0,0] assert_array_equal(varin[:],data2) assert getattr(varin,'nonexistantatt',None) == None f.close() # test master_file kwarg (issue #835). f = MFDataset(self.files,master_file=self.files[-1],check=True) assert_array_equal(np.arange(0,nx),f.variables['x'][:]) varin = f.variables['data'] assert_array_equal(varin[4:-4:4,3:5,2:8],data2[4:-4:4,3:5,2:8]) f.close() # testing multi-file get_variables_by_attributes f = MFDataset(self.files,check=True) assert f.get_variables_by_attributes(axis='T') == [] f.get_variables_by_attributes(units='zlotys')[0] == f['x'] f.close()
def runTest(self): """testing multi-file dataset access""" f = MFDataset(self.files, check=True) f.set_auto_maskandscale(True) # issue570 f.set_always_mask(False) assert f.history == 'created today' assert_array_equal(np.arange(0, nx), f.variables['x'][:]) varin = f.variables['data'] datin = varin[:] assert_array_equal(datin.mask, data.mask) varin.set_auto_maskandscale(False) data2 = data.filled() assert varin.long_name == 'phony data' assert len(varin) == nx assert varin.shape == (nx, ydim, zdim) assert varin.dimensions == ('x', 'y', 'z') assert_array_equal(varin[4:-4:4, 3:5, 2:8], data2[4:-4:4, 3:5, 2:8]) assert varin[0, 0, 0] == data2[0, 0, 0] assert_array_equal(varin[:], data2) assert getattr(varin, 'nonexistantatt', None) == None f.close() # test master_file kwarg (issue #835). f = MFDataset(self.files, master_file=self.files[-1], check=True) assert_array_equal(np.arange(0, nx), f.variables['x'][:]) varin = f.variables['data'] assert_array_equal(varin[4:-4:4, 3:5, 2:8], data2[4:-4:4, 3:5, 2:8]) f.close() # testing multi-file get_variables_by_attributes f = MFDataset(self.files, check=True) assert f.get_variables_by_attributes(axis='T') == [] f.get_variables_by_attributes(units='zlotys')[0] == f['x'] f.close()
def get_coordinates(resource, variable=None, unrotate=False): """ reads out the coordinates of a variable :param resource: netCDF resource file :param variable: variable name :param unrotate: If True the coordinates will be returned for unrotated pole :returns list, list: latitudes , longitudes """ if type(resource) != list: resource = [resource] if variable is None: variable = get_variable(resource) if unrotate is False: try: if len(resource) > 1: ds = MFDataset(resource) else: ds = Dataset(resource[0]) var = ds.variables[variable] dims = list(var.dimensions) if 'time' in dims: dims.remove('time') # TODO: find position of lat and long in list and replace dims[0] dims[1] lats = ds.variables[dims[0]][:] lons = ds.variables[dims[1]][:] ds.close() LOGGER.info('got coordinates without pole rotation') except Exception: msg = 'failed to extract coordinates' LOGGER.exception(msg) else: lats, lons = unrotate_pole(resource) LOGGER.info('got coordinates with pole rotation') return lats, lons
def read_clim_data(var): if var == 'hurs': ds_hurs = MFDataset(os.path.join(raw_data, "hurs_*.nc4")) dt = ds_hurs.variables['hurs'][:] no_data = ds_hurs.variables['hurs'].missing_value ds_hurs.close() return dt, no_data elif var == 'tas': ds_tas = MFDataset(os.path.join(raw_data, "tas_*.nc4")) dt = ds_tas.variables['tas'][:] no_data = ds_tas.variables['tas'].missing_value ds_tas.close() return dt, no_data elif var == 'pr': ds_pr = MFDataset(os.path.join(raw_data, "pr_*.nc4")) dt = ds_pr.variables['pr'][:] no_data = ds_pr.variables['pr'].missing_value ds_pr.close() return dt, no_data elif var == 'ps': ds_ps = MFDataset(os.path.join(raw_data, "ps_*.nc4")) dt = ds_ps.variables['ps'][:] no_data = ds_ps.variables['ps'].missing_value ds_ps.close() return dt, no_data elif var == 'rsds': ds_rsds = MFDataset(os.path.join(raw_data, "rsds_*.nc4")) dt = ds_rsds.variables['rsds'][:] no_data = ds_rsds.variables['rsds'].missing_value ds_rsds.close() return dt, no_data
month2 = 6 month3 = 7 # load ozone deposition velocity slp_files = MFDataset('/data5/oeclifton/c48L48_am3p12_rcp85_' + year + '_am3dd' + extension + '/pp/' + section + '/ts/monthly/1yr/' + section + '.201*.O3_tot_con.nc') slp1 = np.array(slp_files.variables['O3_tot_con'] [:])[:, :, :] #get the i-th tracer from netcdf file slp1 = np.squeeze(slp1[:, :, :]) # now shape is nyears*12, 90, 144, reshape to nyears,12,90,144 slp2 = np.squeeze(slp1.reshape(-1, nyears, 12, 90, 144)) slp = np.squeeze(slp2.mean(axis=0)) # average across years lon = slp_files.variables['lon'][:] #grab model lat and lons lat = slp_files.variables['lat'][:] #need last part to actually get the data slp_files.close() # load effective stomatal conductance slp_files = MFDataset('/data5/oeclifton/c48L48_am3p12_rcp85_' + year + '_am3dd' + extension + '/pp/' + section + '/ts/monthly/1yr/' + section + '.201*.O3_econ_stom.nc') slp1 = np.array(slp_files.variables['O3_econ_stom'] [:])[:, :, :] #get the i-th tracer from netcdf file· slp1 = np.squeeze(slp1[:, :, :]) # now shape is nyears*12, 90, 144, reshape to nyears,12,90,144 slp2 = np.squeeze(slp1.reshape(-1, nyears, 12, 90, 144)) slp2 = np.squeeze(slp2.mean(axis=0)) # average across years slp_files.close() # calculate stomatal fraction slp = np.divide(slp2, slp)
'/data5/oeclifton/c48L48_am3p12_rcp85_2010_am3dd_newest_final/pp/tracer_level/ts/monthly/1yr/tracer_level.201*12.O3.nc' ) static = MFDataset( '/data5/oeclifton/c48L48_am3p12_rcp85_2010_am3dd' + extension + '/pp/tracer_level/ts/monthly/1yr/tracer_level.*12.O3.nc') # load all years for xactive mmvaluesx = np.array(xactive.variables['O3'] [:])[:, 47, :, :] #get the i-th tracer from netcdf file· # now shape is nyears*12, 90, 144, reshape to nyears,12,90,144 mmvaluesx = np.squeeze(mmvaluesx.reshape(-1, 10, 12, 90, 144)) mmvaluesx = np.squeeze(mmvaluesx.mean(axis=0)) # average across years mmvaluesx = np.divide((mmvaluesx[month1, :, :] + mmvaluesx[month2, :, :] + mmvaluesx[month3, :, :]), 3.0) mmvaluesx = np.squeeze(mmvaluesx) * 1e9 #get rid of singleton dimension xactive.close() # load all years for static mmvaluess = np.array(static.variables['O3'] [:])[:, 47, :, :] #get the i-th tracer from netcdf file· # now shape is nyears*12, 90, 144, reshape to nyears,12,90,144 mmvaluess = np.squeeze(mmvaluess.reshape(-1, nyears, 12, 90, 144)) mmvaluess = np.squeeze(mmvaluess.mean(axis=0)) # average across years mmvaluess = np.divide((mmvaluess[month1, :, :] + mmvaluess[month2, :, :] + mmvaluess[month3, :, :]), 3.0) mmvaluess = np.squeeze(mmvaluess) * 1e9 #get rid of singleton dimension lon = static.variables['lon'][:] #grab model lat and lons lat = static.variables['lat'][:] #need last part to actually get the data static.close() # sample model at observational sites
def get_time(resource): """ returns all timestamps of given netcdf file as datetime list. :param resource: NetCDF file(s) :return : list of timesteps """ # :param format: if a format is provided (e.g format='%Y%d%m'), values will be converted to string if type(resource) != list: resource = [resource] try: if len(resource) > 1: ds = MFDataset(resource) else: ds = Dataset(resource[0]) time = ds.variables['time'] except: msg = 'failed to get time' LOGGER.exception(msg) raise Exception(msg) try: if (hasattr(time, 'units') and hasattr(time, 'calendar')) is True: timestamps = num2date(time[:], time.units, time.calendar) elif hasattr(time, 'units'): timestamps = num2date(time[:], time.units) else: timestamps = num2date(time[:]) ds.close() try: ts = [dt.strptime(str(i), '%Y-%m-%d %H:%M:%S') for i in timestamps] # if date_format is not None: # ts = [t.strftime(format=date_format) for t in timestamps] # else: # ts = [dt.strptime(str(i), '%Y-%m-%d %H:%M:%S') for i in timestamps] # TODO give out dateformat by frequency # ERROR: ValueError: unconverted data remains: 12:00:00 # from flyingpigeon.metadata import get_frequency # frq = get_frequency(resource) # if frq is 'day': # ts = [dt.strptime(str(i), '%Y-%m-%d') for i in timestamps] # elif frq is 'mon': # ts = [dt.strptime(str(i), '%Y-%m') for i in timestamps] # elif frq is 'sem': # ts = [dt.strptime(str(i), '%Y-%m') for i in timestamps] # elif frq is 'yr': # ts = [dt.strptime(str(i), '%Y') for i in timestamps] # else: # ts = [dt.strptime(str(i), '%Y-%m-%d %H:%M:%S') for i in timestamps] except Exception as e: msg = 'failed to convert times to string: {}'.format(e) LOGGER.exception(msg) except Exception as e: msg = 'failed to convert time: {}'.format(e) LOGGER.exception(msg) return ts
# Compute vertical profile of zemperature tmp = temp[itime,:,:,:] contmp = salt[itime,:,:,:] for iz in range(nz): ztemp[itime,iz] = ma.average(tmp[iz,:,:], weights=area) zsalt[itime,iz] = ma.average(contmp[iz,:,:], weights=area) # Transpose for compatibility with contour plots ztemp = ztemp.transpose() zsalt = zsalt.transpose() # Close files fstatic.close() ftemp.close() fsalt.close() # ----------------------------------------------------------------------------- # Create plot # Specify plots position in points: [left bottom right top] page = [ 0.0, 0.0, 612.0, 792.0 ] # corresponding to papertype='letter' plot1a = [ 89.0, 497.0, 480.0, 670.0 ] plot1b = [ 89.0, 324.0, 480.0, 497.0 ] cbar = [ 506.0, 324.0, 531.0, 670.0 ] plot2 = [ 89.0, 99.0, 480.0, 272.0 ] plot = [ 89.0, 99.0, 480.0, 670.0 ] #plt.rcParams['legend.fontsize'] = 10 plt.rcParams['figure.dpi'] = 72.0
def test_get_by_mfdataset(self): """testing multi-file get_variables_by_attributes.""" f = MFDataset(self.files,check=True) assert f.get_variables_by_attributes(axis='T') == [] f.get_variables_by_attributes(units='zlotys')[0] == f['x'] f.close()
for itime in range(1): # Compute vertical profile of zemperature tmp = temp[itime, :, :, :] contmp = salt[itime, :, :, :] for iz in range(nz): ztemp[itime, iz] = ma.average(tmp[iz, :, :], weights=area) zsalt[itime, iz] = ma.average(contmp[iz, :, :], weights=area) # Transpose for compatibility with contour plots ztemp = ztemp.transpose() zsalt = zsalt.transpose() # Close files fstatic.close() ftemp.close() fsalt.close() # ----------------------------------------------------------------------------- # Create plot # Specify plots position in points: [left bottom right top] page = [0.0, 0.0, 612.0, 792.0] # corresponding to papertype='letter' plot1a = [89.0, 497.0, 480.0, 670.0] plot1b = [89.0, 324.0, 480.0, 497.0] cbar = [506.0, 324.0, 531.0, 670.0] plot2 = [89.0, 99.0, 480.0, 272.0] plot = [89.0, 99.0, 480.0, 670.0] #plt.rcParams['legend.fontsize'] = 10 plt.rcParams['figure.dpi'] = 72.0
def ncread(file, vars=None, dims=False, noisy=False, atts=False, datetimes=False): """ Read in the FVCOM results file and spit out numpy arrays for each of the variables specified in the vars list. Optionally specify a dict with keys whose names match the dimension names in the netCDF file and whose values are strings specifying alternative ranges or lists of indices. For example, to extract the first hundred time steps, supply dims as: dims = {'time':'0:100'} To extract the first, 400th and 10,000th values of any array with nodes: dims = {'node':'[0, 3999, 9999]'} Any dimension not given in dims will be extracted in full. Specify atts=True to extract the variable attributes. Set datetimes=True to convert the FVCOM Modified Julian Day values to python datetime objects. Parameters ---------- file : str, list If a string, the full path to an FVCOM netCDF output file. If a list, a series of files to be loaded. Data will be concatenated into a single dict. vars : list, optional List of variable names to be extracted. If omitted, all variables are returned. dims : dict, optional Dict whose keys are dimensions and whose values are a string of either a range (e.g. {'time':'0:100'}) or a list of individual indices (e.g. {'time':'[0, 1, 80, 100]'}). Slicing is supported (::5 for every fifth value). noisy : bool, optional Set to True to enable verbose output. atts : bool, optional Set to True to enable output of the attributes (defaults to False). datetimes : bool, optional Set to True to convert FVCOM Modified Julian Days to Python datetime objects (creates a new `datetime' key in the output dict. Only applies if `vars' includes either the `Times' or `time' variables. Note: if FVCOM has been run with single precision output, then the conversion of the `time' values to a datetime object suffers rounding errors. It's best to either run FVCOM in double precision or specify only the `Times' data in the `vars' list. Returns ------- FVCOM : dict Dict of data extracted from the netCDF file. Keys are those given in vars and the data are stored as ndarrays. If `datetimes' is True, then this also includes a `datetime' key in which is the FVCOM Modified Julian Day time series converted to Python datetime objects. attributes : dict, optional If atts=True, returns the attributes as a dict for each variable in vars. The key `dims' contains the array dimensions (each variable contains the names of its dimensions) as well as the shape of the dimensions defined in the netCDF file. The key `global' contains the global attributes. See Also -------- read_probes : read in FVCOM ASCII probes output files. """ # Set to True when we've converted from Modified Julian Day so we don't # end up doing the conversion twice, once for `Times' and again for # `time' if both variables have been requested in `vars'. done_datetimes = False # Check whether we'll be able to fulfill the datetime request. if datetimes and vars and not list(set(vars) & set(('Times', 'time'))): raise ValueError("Conversion from Modified Julian Day to python " "datetimes has been requested but no time variable " "(`Times' or `time') has been requested in vars.") # If we have a list, assume it's lots of files and load them all. if isinstance(file, list): try: try: rootgrp = MFDataset(file, 'r') except IOError as msg: raise IOError('Unable to open file {} ({}). Aborting.'.format(file, msg)) except: # Try aggregating along a 'time' dimension (for POLCOMS, # for example). try: rootgrp = MFDataset(file, 'r', aggdim='time') except IOError as msg: raise IOError('Unable to open file {} ({}). Aborting.'.format(file, msg)) else: rootgrp = Dataset(file, 'r') # Create a dict of the dimension names and their current sizes read_dims = {} for key, var in list(rootgrp.dimensions.items()): # Make the dimensions ranges so we can use them to extract all the # values. read_dims[key] = '0:' + str(len(var)) # Compare the dimensions in the netCDF file with those provided. If we've # been given a dict of dimensions which differs from those in the netCDF # file, then use those. if dims: commonKeys = set(read_dims).intersection(list(dims.keys())) for k in commonKeys: read_dims[k] = dims[k] if noisy: print("File format: {}".format(rootgrp.file_format)) if not vars: vars = iter(list(rootgrp.variables.keys())) FVCOM = {} # Save the dimensions in the attributes dict. if atts: attributes = {} attributes['dims'] = read_dims attributes['global'] = {} for g in rootgrp.ncattrs(): attributes['global'][g] = getattr(rootgrp, g) for key, var in list(rootgrp.variables.items()): if noisy: print('Found ' + key, end=' ') sys.stdout.flush() if key in vars: vDims = rootgrp.variables[key].dimensions toExtract = [read_dims[d] for d in vDims] # If we have no dimensions, we must have only a single value, in # which case set the dimensions to empty and append the function to # extract the value. if not toExtract: toExtract = '.getValue()' # Thought I'd finally figured out how to replace the eval approach, # but I still can't get past the indexing needed to be able to # subset the data. # FVCOM[key] = rootgrp.variables.get(key)[0:-1] # I know, I know, eval() is evil. getData = 'rootgrp.variables[\'{}\']{}'.format(key, str(toExtract).replace('\'', '')) FVCOM[key] = eval(getData) # Add the units and dimensions for this variable to the list of # attributes. if atts: attributes[key] = {} try: attributes[key]['units'] = rootgrp.variables[key].units except: pass try: attributes[key]['dims'] = rootgrp.variables[key].dimensions except: pass if datetimes and key in ('Times', 'time') and not done_datetimes: # Convert the time data to datetime objects. How we do this # depends on which we hit first - `Times' or `time'. For the # former, we need to parse the strings, for the latter we can # leverage num2date from the netCDF4 module and use the time # units attribute. if key == 'Times': try: FVCOM['datetime'] = [datetime.strptime(''.join(i), '%Y-%m-%dT%H:%M:%S.%f') for i in FVCOM[key]] except ValueError: # Try a different format before bailing out. FVCOM['datetime'] = [datetime.strptime(''.join(i), '%Y/%m/%d %H:%M:%S.%f') for i in FVCOM[key]] done_datetimes = True elif key == 'time': FVCOM['datetime'] = num2date(FVCOM[key], rootgrp.variables[key].units) done_datetimes = True if noisy: if len(str(toExtract)) < 60: print('(extracted {})'.format(str(toExtract).replace('\'', ''))) else: print('(extracted given indices)') elif noisy: print() # Close the open file. rootgrp.close() if atts: return FVCOM, attributes else: return FVCOM
def readFVCOM(file, varList=None, clipDims=False, noisy=False, atts=False): """ Read in the FVCOM results file and spit out numpy arrays for each of the variables specified in the varList list. Optionally specify a dict with keys whose names match the dimension names in the NetCDF file and whose values are strings specifying alternative ranges or lists of indices. For example, to extract the first hundred time steps, supply clipDims as: clipDims = {'time':'0:100'} To extract the first, 400th and 10,000th values of any array with nodes: clipDims = {'node':'[0, 3999, 9999]'} Any dimension not given in clipDims will be extracted in full. Specify atts=True to extract the variable attributes. Parameters ---------- file : str, list If a string, the full path to an FVCOM NetCDF output file. If a list, a series of files to be loaded. Data will be concatenated into a single dict. varList : list, optional List of variable names to be extracted. If omitted, all variables are returned. clipDims : dict, optional Dict whose keys are dimensions and whose values are a string of either a range (e.g. {'time':'0:100'}) or a list of individual indices (e.g. {'time':'[0, 1, 80, 100]'}). Slicing is supported (::5 for every fifth value) but it is not possible to extract data from the end of the array with a negative index (e.g. 0:-4). noisy : bool, optional Set to True to enable verbose output. atts : bool, optional Set to True to enable output of the attributes (defaults to False). Returns ------- FVCOM : dict Dict of data extracted from the NetCDF file. Keys are those given in varList and the data are stored as ndarrays. attributes : dict, optional If atts=True, returns the attributes as a dict for each variable in varList. The key 'dims' contains the array dimensions (each variable contains the names of its dimensions) as well as the shape of the dimensions defined in the NetCDF file. The key 'global' contains the global attributes. See Also -------- readProbes : read in FVCOM ASCII probes output files. """ # If we have a list, assume it's lots of files and load them all. if isinstance(file, list): try: try: rootgrp = MFDataset(file, 'r') except IOError as msg: raise IOError('Unable to open file {} ({}). Aborting.'.format(file, msg)) except: # Try aggregating along a 'time' dimension (for POLCOMS, for example) try: rootgrp = MFDataset(file, 'r', aggdim='time') except IOError as msg: raise IOError('Unable to open file {} ({}). Aborting.'.format(file, msg)) else: rootgrp = Dataset(file, 'r') # Create a dict of the dimension names and their current sizes dims = {} for key, var in list(rootgrp.dimensions.items()): # Make the dimensions ranges so we can use them to extract all the # values. dims[key] = '0:' + str(len(var)) # Compare the dimensions in the NetCDF file with those provided. If we've # been given a dict of dimensions which differs from those in the NetCDF # file, then use those. if clipDims: commonKeys = set(dims).intersection(list(clipDims.keys())) for k in commonKeys: dims[k] = clipDims[k] if noisy: print("File format: {}".format(rootgrp.file_format)) if not varList: varList = iter(list(rootgrp.variables.keys())) FVCOM = {} # Save the dimensions in the attributes dict. if atts: attributes = {} attributes['dims'] = dims attributes['global'] = {} for g in rootgrp.ncattrs(): attributes['global'][g] = getattr(rootgrp, g) for key, var in list(rootgrp.variables.items()): if noisy: print('Found ' + key, end=' ') sys.stdout.flush() if key in varList: vDims = rootgrp.variables[key].dimensions toExtract = [dims[d] for d in vDims] # If we have no dimensions, we must have only a single value, in # which case set the dimensions to empty and append the function to # extract the value. if not toExtract: toExtract = '.getValue()' # Thought I'd finally figured out how to replace the eval approach, # but I still can't get past the indexing needed to be able to # subset the data. # FVCOM[key] = rootgrp.variables.get(key)[0:-1] # I know, I know, eval() is evil. getData = 'rootgrp.variables[\'{}\']{}'.format(key, str(toExtract).replace('\'', '')) FVCOM[key] = eval(getData) # Add the units and dimensions for this variable to the list of # attributes. if atts: attributes[key] = {} try: attributes[key]['units'] = rootgrp.variables[key].units except: pass try: attributes[key]['dims'] = rootgrp.variables[key].dimensions except: pass if noisy: if len(str(toExtract)) < 60: print('(extracted {})'.format(str(toExtract).replace('\'', ''))) else: print('(extracted given indices)') elif noisy: print() # Close the open file. rootgrp.close() if atts: return FVCOM, attributes else: return FVCOM
def ice_comp_model_to_sat_table(pathToModel, modelYears, modelIteration,\ boundLat, pathToOSI, param = 'area', threshold=0.15, coast_exp=False): diff_array = numpy.zeros((len(modelYears), 12)) for (nnum, yyear) in enumerate(modelYears): g = Dataset('./grid.cdf') dxc = g.variables['dxc'][0,:,:] dyc = g.variables['dyc'][0,:,:] lat = g.variables['yc'][0,:,:] topo = g.variables['topo'][0,:,:] dxcXdyc = dxc*dyc if coast_exp==True: topo2 = expand_coast(topo) area_model=np.zeros((len(modelIteration), 12)) if modelIteration[0] == 'last': gg = glob.glob(pathToModel+'/'+yyear+'/'+'it*') gg.sort() lastit = [int(gg[-1].split('/')[-1].split('t')[-1])] else: lastit = modelIteration for (it, iteration) in enumerate(lastit): fm = MFDataset(pathToModel+'/'+yyear+'/'+'it'+str(iteration)+'/fw/*.cdf') for mm in range(12): if param == 'area': if expand_coast==True: temp_area = fm.variables['area'][mm,:,:] temp_area = np.ma.masked_array(temp_area, mask = topo2.mask) area_model[it,mm] = calc_area(temp_area,\ dxcXdyc, lat, blat=boundLat, threshold=threshold)/10e11 else: area_model[it,mm] = calc_area(fm.variables['area'][mm,:,:],\ dxcXdyc, lat, blat=boundLat, threshold=threshold)/10e11 elif param == 'extent': if expand_coast==True: temp_area = fm.variables['area'][mm,:,:] temp_area = np.ma.masked_array(temp_area, mask = topo2.mask) area_model[it,mm] = calc_extent(temp_area,\ dxcXdyc, lat, blat=boundLat, threshold=threshold)/10e11 else: area_model[it,mm] = calc_extent(fm.variables['area'][mm,:,:],\ dxcXdyc, lat, blat=boundLat, threshold=threshold)/10e11 fm.close() fsat = MFDataset(pathToOSI+yyear+'??.nc') osi_area = [] for mm in range(12): if param == 'area': area_temp = fsat.variables['ice'][mm,:,:] if coast_exp==True: area_temp = np.ma.masked_array(area_temp, mask = topo2.mask) else: area_temp = np.ma.masked_array(area_temp, mask = topo.mask) area_temp = np.ma.masked_less_equal(area_temp, threshold) osi_area.append(calc_area(np.ma.filled(area_temp,0),\ dxcXdyc, lat, blat=boundLat, threshold=threshold)/10e11) elif param == 'extent': area_temp = fsat.variables['ice'][mm,:,:] if coast_exp==True: area_temp = np.ma.masked_array(area_temp, mask = topo2.mask) else: area_temp = np.ma.masked_array(area_temp, mask = topo.mask) area_temp = np.ma.masked_less_equal(area_temp, threshold) osi_area.append(calc_extent(np.ma.filled(area_temp,0),\ dxcXdyc, lat, blat=boundLat, threshold=threshold)/10e11) diff_array[nnum,:] = area_model[0,:]-osi_area[:] return diff_array
def ice_comp_model_to_sat(pathToModel, modelYear, modelIteration, \ boundLat, pathToOSI, param = 'area', threshold=0.15, coast_exp=False): ''' Plot sea ice area from satellite data and several model iterations ''' g = Dataset('./grid.cdf') dxc = g.variables['dxc'][0,:,:] dyc = g.variables['dyc'][0,:,:] lat = g.variables['yc'][0,:,:] topo = g.variables['topo'][0,:,:] dxcXdyc = dxc*dyc if coast_exp==True: topo2 = expand_coast(topo) area_model=np.zeros((len(modelIteration), 12)) for (it, iteration) in enumerate(modelIteration): fm = MFDataset(pathToModel+'/'+modelYear+'/'+'it'+str(iteration)+'/fw/*.cdf') for mm in range(12): if param == 'area': if expand_coast==True: temp_area = fm.variables['area'][mm,:,:] temp_area = np.ma.masked_array(temp_area, mask = topo2.mask) area_model[it,mm] = calc_area(temp_area,\ dxcXdyc, lat, blat=boundLat, threshold=threshold)/10e11 else: area_model[it,mm] = calc_area(fm.variables['area'][mm,:,:],\ dxcXdyc, lat, blat=boundLat, threshold=threshold)/10e11 elif param == 'extent': if expand_coast==True: temp_area = fm.variables['area'][mm,:,:] temp_area = np.ma.masked_array(temp_area, mask = topo2.mask) area_model[it,mm] = calc_extent(temp_area,\ dxcXdyc, lat, blat=boundLat, threshold=threshold)/10e11 else: area_model[it,mm] = calc_extent(fm.variables['area'][mm,:,:],\ dxcXdyc, lat, blat=boundLat, threshold=threshold)/10e11 fm.close() fsat = MFDataset(pathToOSI+modelYear+'??.nc') #osi_ice = fsat.variables['ice'][0,:,:] osi_area = [] for mm in range(12): if param == 'area': area_temp = fsat.variables['ice'][mm,:,:] if coast_exp==True: area_temp = np.ma.masked_array(area_temp, mask = topo2.mask) else: area_temp = np.ma.masked_array(area_temp, mask = topo.mask) area_temp = np.ma.masked_less_equal(area_temp, threshold) osi_area.append(calc_area(np.ma.filled(area_temp,0),\ dxcXdyc, lat, blat=boundLat, threshold=threshold)/10e11) elif param == 'extent': area_temp = fsat.variables['ice'][mm,:,:] if coast_exp==True: area_temp = np.ma.masked_array(area_temp, mask = topo2.mask) else: area_temp = np.ma.masked_array(area_temp, mask = topo.mask) area_temp = np.ma.masked_less_equal(area_temp, threshold) osi_area.append(calc_extent(np.ma.filled(area_temp,0),\ dxcXdyc, lat, blat=boundLat, threshold=threshold)/10e11) dates = pd.date_range(modelYear+'-01', str(int(modelYear)+1)+'-01', freq='M') dd = pd.DataFrame(index=dates) dd['Satellite']=osi_area for (it , iteration) in enumerate(modelIteration): dd['it'+str(iteration)]=area_model[it,:] return dd.plot(figsize=(10,5))
class CRUDataManager: def __init__(self, path="/RECH/skynet1_rech3/huziy/cru_data/CRUTS3.1/cru_ts_3_10.1901.2009.tmp.dat.nc", var_name="tmp", lazy=False): self.times = None self.var_data = None self.times_var = None self.kdtree = None self.times_num = None self.lons2d, self.lats2d = None, None self.lazy = lazy self.var_name = var_name try: with Dataset(path) as ds: self._init_fields(ds) # Cannot go into with, since it needs to be open self.nc_dataset = Dataset(path) except OSError as oserr: with MFDataset(path) as ds: self._init_fields(ds) # Cannot go into with, since it needs to be open self.nc_dataset = MFDataset(path) self.nc_vars = ds.variables def close(self): self.nc_vars = None self.nc_dataset.close() del self def _init_fields(self, nc_dataset): nc_vars = nc_dataset.variables lons = nc_vars["lon"][:] lats = nc_vars["lat"][:] if lons.ndim == 1: lats2d, lons2d = np.meshgrid(lats, lons) elif lons.ndim == 2: lats2d, lons2d = lats, lons else: raise NotImplementedError("Cannot handle {}-dimensional coordinates".format(lons.ndim)) self.lons2d, self.lats2d = lons2d, lats2d self.times_var = nc_vars["time"] self.times_num = nc_vars["time"][:] if hasattr(self.times_var, "calendar"): self.times = num2date(self.times_num, self.times_var.units, self.times_var.calendar) else: self.times = num2date(self.times_num, self.times_var.units) if not self.lazy: self.var_data = nc_vars[self.var_name][:] if nc_vars[self.var_name].shape[1:] != self.lons2d.shape: print("nc_vars[self.var_name].shape = {}".format(nc_vars[self.var_name].shape)) self.var_data = np.transpose(self.var_data, axes=[0, 2, 1]) x_in, y_in, z_in = lat_lon.lon_lat_to_cartesian(self.lons2d.flatten(), self.lats2d.flatten()) self.kdtree = cKDTree(list(zip(x_in, y_in, z_in))) def get_seasonal_means_with_ttest_stats_interp_to(self, lons2d=None, lats2d=None, season_to_monthperiod=None, start_year=None, end_year=None): #TODO: implement pass def get_seasonal_means_with_ttest_stats(self, season_to_monthperiod=None, start_year=None, end_year=None): """ Note: the periods of different seasons should not overlap. precip are converted to mm/day before the mean and std calculations :param season_to_monthperiod: :param start_year: :param end_year: :return dict(season: [mean, std, nobs]) """ nt, nx, ny = self.var_data.shape panel = pandas.DataFrame(data=self.var_data.reshape(nt, -1), index=self.times) panel = panel[(panel.index.year >= start_year) & (panel.index.year <= end_year)] # Calculate monthly means, convert precip to mm/day if self.var_name.lower() in ["pre"]: monthly_panel = panel.groupby([panel.index.year, panel.index.month]).sum() monthly_panel = monthly_panel / monthly_panel.index.map(lambda ym: calendar.monthrange(*ym)[1])[:, np.newaxis] else: monthly_panel = panel.groupby([panel.index.year, panel.index.month]).mean() print("monthly panel:") print(monthly_panel.describe()) season_to_res = OrderedDict() for season, month_period in season_to_monthperiod.items(): assert isinstance(month_period, MonthPeriod) print("{} ------- (months: {}) ".format(season, month_period.months)) ym_to_period = month_period.get_year_month_to_period_map(start_year=start_year, end_year=end_year) # print(ym_to_period) # select data for the seasons of interest monthly_panel_tmp = monthly_panel.select(lambda ym: (ym[1] in month_period.months) and (ym in ym_to_period)) # print("monthly_panel_tmp, afterselect: {}".format(monthly_panel_tmp)) days_per_month = monthly_panel_tmp.index.map(lambda ym: calendar.monthrange(*ym)[1]) monthly_panel_tmp = monthly_panel_tmp * days_per_month[:, np.newaxis] seasonal_groups = monthly_panel_tmp.groupby(lambda ym: (ym_to_period[ym].start, ym_to_period[ym].end)) nobs = len(seasonal_groups) seasonal_means = [] days_per_season = [] for kv, gv in seasonal_groups: # print(kv, "---->", gv) # calculate seasonal mean for each year ndays = (Pendulum.instance(kv[1]).add(microseconds=1) - Pendulum.instance(kv[0])).total_days() # because the end of each period is 1 microsecond before midnight seas_mean = gv.sum(axis=0) / ndays seasonal_means.append(seas_mean.values) days_per_season.append(ndays) seasonal_means = np.array(seasonal_means) days_per_season = np.array(days_per_season) # calculate climatological mean clim_mean = (seasonal_means * days_per_season[:, np.newaxis]).sum(axis=0) / days_per_season.sum() # calculate interannual std clim_std = (((seasonal_means - clim_mean) ** 2 * days_per_season[:, np.newaxis]).sum(axis=0) / days_per_season.sum()) ** 0.5 # reshape back to the 2d field clim_mean = clim_mean.reshape(nx, ny) clim_std = clim_std.reshape(nx, ny) spatial_mask = (self.var_data[0] > 1e10) | np.isnan(self.var_data[0]) if hasattr(self.var_data, "mask"): spatial_mask = spatial_mask | self.var_data[0].mask clim_mean = np.ma.masked_where(spatial_mask, clim_mean) clim_std = np.ma.masked_where(spatial_mask, clim_std) print(season) print("clim_mean.shape={}".format(clim_mean.shape)) print("clim_std.shape={}".format(clim_std.shape)) season_to_res[season] = [clim_mean, clim_std, nobs] return season_to_res def get_seasonal_means(self, season_name_to_months=None, start_year=None, end_year=None): if season_name_to_months is None: season_name_to_months = OrderedDict([ ("Winter", (1, 2, 12)), ("Spring", list(range(3, 6))), ("Summer", list(range(6, 9))), ("Fall", list(range(9, 12)))]) season_name_to_coef = {} for sname, months in season_name_to_months.items(): season_name_to_coef[sname] = 1 if self.var_name.lower() in ["pre", "precip"]: days = sum([calendar.monthrange(y, m)[1] for m in months for y in range(start_year, end_year + 1)]) season_name_to_coef[sname] = 1.0 / float(days) month_to_season = collections.defaultdict(lambda: "no_season") for sname, mlist in season_name_to_months.items(): for m in mlist: month_to_season[m] = sname if self.var_data is None: self.var_data = self.nc_dataset.variables[self.var_name][:] if self.var_name.lower() not in ["swe"]: if self.var_data.shape != self.lons2d.shape: self.var_data = np.transpose(self.var_data, axes=[0, 2, 1]) nt, nx, ny = self.var_data.shape panel = pandas.Panel(data=self.var_data, items=self.times, major_axis=list(range(nx)), minor_axis=list(range(ny))) panel = panel.select(lambda d: start_year <= d.year <= end_year) if self.var_name in ["pre", "precip"]: panel_seasonal = panel.groupby(lambda d: month_to_season[d.month], axis="items").sum() else: panel_seasonal = panel.groupby(lambda d: month_to_season[d.month], axis="items").mean() season_to_mean = OrderedDict() for sname, _ in season_name_to_months.items(): season_to_mean[sname] = panel_seasonal[sname].values * season_name_to_coef[sname] if hasattr(self.var_data[0], "mask"): season_to_mean[sname] = np.ma.masked_where(self.var_data[0].mask, season_to_mean[sname]) return season_to_mean def get_mean(self, start_year, end_year, months=None): """ returns the mean for the period [start_year, end_year], over the months :type months: list months = list of month numbers over which the averaging is done """ if months is None: months = list(range(1, 13)) start_date = datetime(start_year, 1, 1) end_date = datetime(end_year + 1, 1, 1) start_date_num = date2num(start_date, self.times_var.units) end_date_num = date2num(end_date, self.times_var.units) sel_query = (self.times_num >= start_date_num) & (self.times_num < end_date_num) sel_dates = self.times_num[sel_query] sel_data = np.transpose(self.nc_vars[self.var_name][sel_query, :, :], axes=[0, 2, 1]) sel_dates = num2date(sel_dates, self.times_var.units) ind_vector = np.where([(x.month in months) for x in sel_dates])[0] return np.mean(sel_data[ind_vector, :, :], axis=0) def get_daily_climatology_dataframe(self, start_year, end_year, stamp_year=2001): """ returns a pandas dataframe (365, nx, ny) with daily climatological means """ nt, nx, ny = self.var_data.shape data_panel = pandas.Panel(data=self.var_data, items=self.times, major_axis=list(range(nx)), minor_axis=list(range(ny))) data_panel = data_panel.select( lambda d: (start_year <= d.year <= end_year) and not (d.day == 29 and d.month == 2)) data_panel = data_panel.groupby(lambda d: datetime(stamp_year, d.month, d.day), axis="items").mean() assert isinstance(data_panel, pandas.Panel) data_panel = data_panel.sort_index() print(data_panel.values.shape) return data_panel def get_daily_climatology(self, start_year, end_year, stamp_year=2001): """ returns a numpy array of shape (365, nx, ny) with daily climatological means """ return self.get_daily_climatology_dataframe(**locals()).values def interpolate_daily_climatology_to(self, clim_data, lons2d_target=None, lats2d_target=None): # expects clim_data to have the following shape (365, nx, ny) # lons2d_target: (nx, ny) # lats2d_target: (nx, ny) x, y, z = lat_lon.lon_lat_to_cartesian(lons2d_target.flatten(), lats2d_target.flatten()) nt = clim_data.shape[0] data_help = np.reshape(clim_data, (nt, -1)) dists, inds = self.kdtree.query(list(zip(x, y, z))) return data_help[:, inds].reshape((nt,) + lons2d_target.shape) pass def get_thawing_index_from_climatology(self, daily_temps_clim, t0=0.0): nt, nx, ny = daily_temps_clim.shape result = np.zeros((nx, ny)) for t in range(nt): tfield = daily_temps_clim[t, :, :] result += tfield * np.array(tfield >= t0).astype(int) return result def create_monthly_means_file(self, start_year, end_year): fname = "{0}_monthly_means.nc".format(self.var_name) year_range = list(range(start_year, end_year + 1)) dsm = Dataset(fname, "w", format="NETCDF3_CLASSIC") dsm.createDimension('year', len(year_range)) dsm.createDimension("month", 12) dsm.createDimension('lon', self.lons2d.shape[0]) dsm.createDimension('lat', self.lons2d.shape[1]) lonVariable = dsm.createVariable('longitude', 'f4', ('lon', 'lat')) latVariable = dsm.createVariable('latitude', 'f4', ('lon', 'lat')) yearVariable = dsm.createVariable("year", "i4", ("year",)) variable = dsm.createVariable(self.var_name, "f4", ('year', "month", 'lon', 'lat')) for i, the_year in enumerate(year_range): print(the_year) for j, the_month in enumerate(range(1, 13)): variable[i, j, :, :] = self.get_mean(the_year, the_year, months=[the_month]) lonVariable[:] = self.lons2d latVariable[:] = self.lats2d yearVariable[:] = np.array(year_range) dsm.close() pass def _interp_and_sum(self, data1d, mults_1d, x, y, z, nneighbors=1): data_interp = self.interpolate_data_to_cartesian(data1d, x, y, z, nneighbours=nneighbors) return np.sum(mults_1d * data_interp) def get_monthly_timeseries_using_mask(self, mask, lons2d_target, lats2d_target, multipliers_2d, start_date=None, end_date=None): """ multipliers_2d used to multiply the values when aggregating into a single timeseries sum(mi * vi) - in space """ bool_vect = np.array([start_date <= t <= end_date for t in self.times]) new_times = list(filter(lambda t: start_date <= t <= end_date, self.times)) new_vals = self.var_data[bool_vect, :, :] x_out, y_out, z_out = lat_lon.lon_lat_to_cartesian(lons2d_target.flatten(), lats2d_target.flatten()) print(len(new_times)) flat_mask = mask.flatten() x_out = x_out[flat_mask == 1] y_out = y_out[flat_mask == 1] z_out = z_out[flat_mask == 1] mults = multipliers_2d.flatten()[flat_mask == 1] data_interp = [self._interp_and_sum(new_vals[t, :, :].flatten(), mults, x_out, y_out, z_out) for t in range(len(new_times))] print("Interpolated data", data_interp) print("Interpolated all") return TimeSeries(time=new_times, data=data_interp).get_ts_of_monthly_means() def get_mean_upstream_timeseries_monthly(self, model_point, data_manager): """ get mean swe upstream of the model_point year range for selection is in model_point.continuous_data_years() .. """ assert isinstance(model_point, ModelPoint) assert isinstance(data_manager, Crcm5ModelDataManager) # create the mask of points over which the averaging is going to be done lons_targ = data_manager.lons2D[model_point.flow_in_mask == 1] lats_targ = data_manager.lats2D[model_point.flow_in_mask == 1] xt, yt, zt = lat_lon.lon_lat_to_cartesian(lons_targ, lats_targ) nxs, nys = self.lons2d.shape i_source, j_source = list(range(nxs)), list(range(nys)) j_source, i_source = np.meshgrid(j_source, i_source) i_source = i_source.flatten() j_source = j_source.flatten() dists, inds = self.kdtree.query(list(zip(xt, yt, zt)), k=1) ixsel = i_source[inds] jysel = j_source[inds] print("Calculating spatial mean") #calculate spatial mean #calculate spatial mean if self.lazy: theVar = self.nc_vars[self.var_name] data_series = [] for i, j in zip(ixsel, jysel): data_series.append(theVar[:, j, i]) data_series = np.mean(data_series, axis=0) else: data_series = np.mean(self.var_data[:, ixsel, jysel], axis=1) print("Finished calculating spatial mean") #calculate daily climatology df = pandas.DataFrame(data=data_series, index=self.times, columns=["values"]) df["year"] = df.index.map(lambda d: d.year) df = df[df["year"].isin(model_point.continuous_data_years)] monthly_clim = df.groupby(by=lambda d: d.month).mean() month_dates = [datetime(1985, m, 15) for m in range(1, 13)] vals = [monthly_clim.ix[d.month, "values"] for d in month_dates] return pandas.TimeSeries(data=vals, index=month_dates) def get_mean_upstream_timeseries_daily(self, model_point, dm, stamp_dates=None): """ get mean swe upstream of the model_point """ assert isinstance(model_point, ModelPoint) assert isinstance(dm, Crcm5ModelDataManager) # create the mask of points over which the averaging is going to be done lons_targ = dm.lons2D[model_point.flow_in_mask == 1] lats_targ = dm.lats2D[model_point.flow_in_mask == 1] xt, yt, zt = lat_lon.lon_lat_to_cartesian(lons_targ, lats_targ) nxs, nys = self.lons2d.shape i_source, j_source = list(range(nxs)), list(range(nys)) j_source, i_source = np.meshgrid(j_source, i_source) i_source = i_source.flatten() j_source = j_source.flatten() dists, inds = self.kdtree.query(list(zip(xt, yt, zt)), k=1) ixsel = i_source[inds] jysel = j_source[inds] df_empty = pandas.DataFrame(index=self.times) df_empty["year"] = df_empty.index.map(lambda d: d.year) # calculate spatial mean sel_date_indices = np.where(df_empty["year"].isin(model_point.continuous_data_years))[0] if self.lazy: the_var = self.nc_vars[self.var_name] data_series = np.mean([the_var[sel_date_indices, j, i] for i, j in zip(ixsel, jysel)], axis=0) else: data_series = np.mean(self.var_data[:, ixsel, jysel], axis=1) # calculate daily climatology df = pandas.DataFrame(data=data_series, index=self.times, columns=["values"]) df["year"] = df.index.map(lambda d: d.year) df = df[df["year"].isin(model_point.continuous_data_years)] daily_clim = df.groupby(by=lambda d: (d.month, d.day)).mean() vals = [daily_clim.ix[(d.month, d.day), "values"] for d in stamp_dates] return pandas.TimeSeries(data=vals, index=stamp_dates) def get_daily_timeseries_using_mask(self, mask, lons2d_target, lats2d_target, multipliers_2d, start_date=None, end_date=None): """ multipliers_2d used to multiply the values when aggregating into a single timeseries sum(mi * vi) - in space """ bool_vect = np.array([start_date <= t <= end_date for t in self.times]) new_times = list(filter(lambda t: start_date <= t <= end_date, self.times)) new_vals = self.var_data[bool_vect, :, :] x_out, y_out, z_out = lat_lon.lon_lat_to_cartesian(lons2d_target.flatten(), lats2d_target.flatten()) print(len(new_times)) flat_mask = mask.flatten() x_out = x_out[flat_mask == 1] y_out = y_out[flat_mask == 1] z_out = z_out[flat_mask == 1] mults = multipliers_2d.flatten()[flat_mask == 1] data_interp = [self._interp_and_sum(new_vals[t, :, :].flatten(), flat_mask, x_out, y_out, z_out) for t in range(len(new_times))] print("Interpolated all") return TimeSeries(time=new_times, data=data_interp).get_ts_of_daily_means() def interpolate_data_to_cartesian(self, data_in_flat, x, y, z, nneighbours=4): """ len(data_in_flat) , len(x) == len(y) == len(z) == len(data_out_flat) - all 1D """ print("start query") dst, ind = self.kdtree.query(list(zip(x, y, z)), k=nneighbours) print("end query") inverse_square = 1.0 / dst ** 2 if len(dst.shape) > 1: norm = np.sum(inverse_square, axis=1) norm = np.array([norm] * dst.shape[1]).transpose() coefs = inverse_square / norm data_out_flat = np.sum(coefs * data_in_flat[ind], axis=1) elif len(dst.shape) == 1: data_out_flat = data_in_flat[ind] else: raise Exception("Could not find neighbor points") return data_out_flat def interpolate_data_to(self, data_in, lons2d, lats2d, nneighbours=4): """ Interpolates data_in to the grid defined by (lons2d, lats2d) assuming that the data_in field is on the initial CRU grid interpolate using 4 nearest neighbors and inverse of squared distance """ x_out, y_out, z_out = lat_lon.lon_lat_to_cartesian(lons2d.flatten(), lats2d.flatten()) dst, ind = self.kdtree.query(list(zip(x_out, y_out, z_out)), k=nneighbours) data_in_flat = data_in.flatten() inverse_square = 1.0 / dst ** 2 if len(dst.shape) > 1: norm = np.sum(inverse_square, axis=1) norm = np.array([norm] * dst.shape[1]).transpose() coefs = inverse_square / norm data_out_flat = np.sum(coefs * data_in_flat[ind], axis=1) elif len(dst.shape) == 1: data_out_flat = data_in_flat[ind] else: raise Exception("Could not find neighbor points") return np.reshape(data_out_flat, lons2d.shape)
for itime in range(1): # Compute vertical profile of zemperature tmp = temp[itime,:,:,:] contmp = salt[itime,:,:,:] for iz in range(nz): ztemp[itime,iz] = ma.average(tmp[iz,:,:], weights=area) zsalt[itime,iz] = ma.average(contmp[iz,:,:], weights=area) # Transpose for compatibility with contour plots ztemp = ztemp.transpose() zsalt = zsalt.transpose() # Close files fstatic.close() ftemp.close() # ----------------------------------------------------------------------------- # Create plot # Specify plots position in points: [left bottom right top] page = [ 0.0, 0.0, 612.0, 792.0 ] # corresponding to papertype='letter' plot1a = [ 89.0, 497.0, 480.0, 670.0 ] plot1b = [ 89.0, 324.0, 480.0, 497.0 ] cbar = [ 506.0, 324.0, 531.0, 670.0 ] plot2 = [ 89.0, 99.0, 480.0, 272.0 ] plot = [ 89.0, 99.0, 480.0, 670.0 ] #plt.rcParams['legend.fontsize'] = 10 plt.rcParams['figure.dpi'] = 72.0
def checkNc(fn, dict1, overwrite=0, allowOverwrite=1, vb=0): ''' return ok1: 0 -- good 1 -- dimension is not a var itself 2 -- dimension has no units 3 -- dimension units not recognized 4 -- ref time is 0000 5 -- cannot open file 6 -- make up dim as 'i' 11-- 2d dim 12-- dimension is not a var itself. same as 1? ''' ok1 = 0 dict9 = copy.deepcopy(dict1) varList = [] varDict = {} check1 = '' warning = '' if fn.find('*')>-1: fn2 = glob.glob(fn) else: fn2 = [fn,] if 0: temp2 = os.path.split(fn2[0]) dict1['filename'] = temp2[1] dict1['filepath'] = fn2[0] dict1['nFile'] = len(fn2) # facets from fn #fn3 = fn2[0] fn3a = fn.lower() if 1: pp = '_' if fn3a.find('/mnt/')>-1: pp = 'staged' if fn3a.find('/home/svc/upload')>-1: pp = 'uploaded' if fn3a.find('http')>-1: pp = 'online' dict1['source'] = pp pp = '_' for prov in providers: if fn3a.find('cmip5/%s'%prov)>-1: pp = providers[prov] dict1['provider'] = pp pp = '_' for mod1 in models2: if fn3a.find(models2[mod1])>-1: pp = mod1 dict1['model'] = pp pp = '_' for exp1 in experiments2: if fn3a.find(experiments2[exp1])>-1: pp = exp1 dict1['experiment'] = pp pp = '_' for rr in runs2: if fn3a.find(runs2[rr])>-1: pp = rr dict1['run'] = pp try: if len(fn2)>1: nc = MFDataset(fn2) nc1 = Dataset(fn2[0]) nc2 = Dataset(fn2[-1]) else: if overwrite: nc = Dataset(fn2[0], 'r+') else: nc = Dataset(fn2[0]) except Exception as e : dict1['message'] += "File on server is not found: %s "%(fn) dict1['success'] = False ok1 = 5 print('ok1 = %d'%ok1) print("cannot open file: %s "%(fn2[0])) if len(fn2)>1: print("cannot open file: %s "%(fn2[-1])) print(traceback.format_exc()) return ok1 # loop_vars varListAll = nc.variables.keys() varListAll = [str(i) for i in varListAll] # gather global att title2 = '' try: title2 = nc.title except: pass summary2 = '' try: summary2 += nc.obs_project except: pass try: summary2 += nc.source except: pass try: summary2 += nc.history except: pass freq2 = '' try: if nc.frequency == 'mon': freq2 += 'monthly' except: pass # find dim str1 = '' dimList = [] for var in varListAll: # find_units units1 = '' d1 = nc.variables[var] try: units1 = d1.units except: temp1 = var.find('_bnds') if temp1==-1: check1 += var + ': need the units attribute.\n' # find_longname longName = '_' try: longName = d1.long_name except: pass try: longName = d1.longname except: pass # collect_dims # to remove u' (unicode thing) dim1 = list(d1.dimensions) for i in range(len(dim1)): dim1[i] = str(dim1[i]) if var.find('_bnds')==-1: str1 += '%s: %s\n'%(var, str(dim1)) dimList += list(dim1) varDict[var] = {'dim': dim1, 'units': units1, 'longName': longName, } str1 += '\nDimension Variables\n' dimList = list(set(dimList)) # only if the dim is a variable itself dimList2 = [] for d in dimList: if d in varListAll: dimList2.append(d) dimList = dimList2 dimList0 = dimList if vb==1: print('dimList0') if vb==1: print(dimList0) dimList = [] # collect_vars, only they are not dim var dimList0a = [i.lower() for i in dimList0] varList = [] varListLong = [] for k in varListAll: k1 = k.lower() if k not in dimList0: if not k1.endswith('_bnds') \ and not k1.endswith('err') \ and not k1.endswith('nobs') \ and not k1.endswith('stddev') \ and k1 not in ( 'month', 'year', 'height', 'plev', 'not_used', 'model_lat', 'model_lon'): varList.append(k) varListLong.append(varDict[k]['longName']) if vb==1: print('varList:') if vb==1: print(varList) # from the varList, # collect_the_real dim dimList = [] for var in varList: d1 = nc.variables[var] dim1 = list(d1.dimensions) for i in range(len(dim1)): dim1[i] = str(dim1[i]) if vb==1: print('dim1') if vb==1: print(dim1) str1 += '%s: %s\n'%(var, str(dim1)) dimList += list(dim1) # this is the list of dims of the real vars dimList = list(set(dimList)) if vb==1: print('dimList:') if vb==1: print(dimList) # check_dimList for dimVar in dimList: dimWhat = '' dimAsI = 0 try: d2 = nc.variables[dimVar] if len(fn2)>1: d2a1 = nc1.variables[dimVar] d2a2 = nc2.variables[dimVar] except: dimAsI = 1 hasUnits = 0 #ok1 = 1 print('this dim is not a var: %s'%dimVar) print(traceback.format_exc()) # test if 2d dim if not dimAsI: # check_var_dim for var1 in varList: for dimV in varDict[var1]['dim']: if dimV in varListAll: shape0 = nc.variables[dimV].shape if len(shape0)>1: #ok1 = 11 return ok1 try: units1 = str(d2.units) if len(fn2)>1: #xxxx should not do this. should do the dim of the real var. units1a1 = str(d2a1.units) units1a2 = str(d2a2.units) hasUnits = 1 print('units1: '), print(units1) except: hasUnits = 0 #ok1 = 2 print('this var has no units: %s'%dimVar) print(traceback.format_exc()) return ok1 #if overwrite==0 and allowOverwrite==1: # return checkNc_w(nc, fn, dict9) # if_hasUnits if hasUnits: goodUnits = 1 try: cfUnits = cf1.Units(units1) except: print(traceback.format_exc()) goodUnits = 0 # month since, and 0000 are ok with 360_day if not goodUnits: goodUnits = 1 try: cfUnits = cf1.Units(units1, calendar='360_day') except: print(traceback.format_exc()) goodUnits = 0 if 0: if not goodUnits: goodUnits = 1 try: cfUnits = cf1.Units(units1, calendar='365_day') except: print(traceback.format_exc()) goodUnits = 0 if not goodUnits: #ok1 = 3 print('ok1=3') print('units not recgnized: %s'%units1) print(traceback.format_exc()) goodUnits = 0 return ok1 if goodUnits: if vb==1: print('cfUnits:') if vb==1: print(cfUnits) if cfUnits.islongitude: dimWhat = 'lon' elif cfUnits.islatitude: dimWhat = 'lat' elif cfUnits.isreftime: dimWhat = 'time' elif cfUnits.ispressure or units1=='hPa': dimWhat = 'z' #if hasUnits: # check_time_limits if dimWhat=='time': if len(fn2)>1: units9 = units1a1 else: units9 = units1 if vb==1: print('units9') if vb==1: print(units9) date1 = cmac.num2date(netCDF4,d2[0], units9) date2 = cmac.num2date(netCDF4,d2[-1], units9) if vb==1: print(date1) if vb==1: print(date2) if (date1 is None) or (date2 is None): #ok1 = 11 return ok1 time1 = date1.timetuple() time2 = date2.timetuple() a1 = '%04d%02d%02d %02d:%02d:%02d'%(time1[0], time1[1], time1[2], time1[3], time1[4], time1[5]) a2 = '%04d%02d%02d %02d:%02d:%02d'%(time2[0], time2[1], time2[2], time2[3], time2[4], time2[5]) a1 = a1[:8] a2 = a2[:8] else: # not time try: d2a = d2[:] a1 = str(d2a.min()) a2 = str(d2a.max()) except: a1 = '0' a2 = '0' str1 += '%s: %s to %s (%s)\n'%(dimVar, a1, a2, units1) varDict[dimVar]['min'] = a1 varDict[dimVar]['max'] = a2 varDict[dimVar]['units'] = units1 varDict[dimVar]['what'] = dimWhat # end #if hasUnits: if ok1 > 0: return ok1 # construct_dim2 if 1: for var1 in varList: #for var1 in varDict.keys(): dim2 = [] for i in varDict[var1]['dim']: try: dim2.append( varDict[i]['what'] ) except: dim2.append( 'unknown' ) varDict[var1]['dim2'] = dim2 # construct_global_dim2 dim22 = [] for d in dimList: try: dim22.append( varDict[d]['what'] ) except: dim22.append( 'unknown' ) nc.close() check1 += '\nThe netCDF file has %d variables:\n%s'%(len(varList), str1) dict1['varDict'] = varDict dict1['varList'] = varList dict1['varListLong'] = varListLong dict1['dimList'] = dimList dict1['dim2'] = dim22 dict1['check'] = check1 dict1['warning'] = warning dict1['title'] = title2 dict1['summary'] = summary2 dict1['frequency'] = freq2 dict1['ok'] = 0 return ok1
for i, ano in enumerate(range(1982, 2012)): # print ano nc1 = 'nc/pcp-daily-echam46-amip-{0}01.nc'.format(ano) nc2 = 'nc/pcp-daily-echam46-amip-{0}02.nc'.format(ano) nc3 = 'nc/pcp-daily-echam46-amip-{0}03.nc'.format(ano) netcdfs = [nc1, nc2, nc3] data = MFDataset(netcdfs) pcp = data.variables['pcp'][:] lons_360 = data.variables['longitude'][:] lats = data.variables['latitude'][:] data.close() a = np.nansum(pcp, axis=0) pcpaccaux[i, :, :] = np.nansum(pcp, axis=0) print pcpaccaux.shape pcpacc, lons = shiftgrid(180., pcpaccaux, lons_360, start=False) print '\n +++ INTERPOLACAO +++' newlats = np.linspace(-90, 90, 181) newlons = np.linspace(-180, 179, 360) x, y = np.meshgrid(newlons, newlats) pcpacc1dg = np.zeros((int(pcpacc.shape[0]), int(len(newlats)), int(len(newlons)))) for i in range(0, int(pcpacc.shape[0])):
def readEnsemble(wrfinit, timerange=None, fields=None, debug=False): ''' Reads in desired fields and returns 2-D arrays of data for each field (barb/contour/field) ''' if debug: print fields datadict = {} file_list, missing_list = makeEnsembleList( wrfinit, timerange) #construct list of files # loop through fill field, contour field, barb field and retrieve required data for f in ['fill', 'contour', 'barb']: if not fields[f].keys(): continue if debug: print 'Reading field:', fields[f]['name'], 'from', fields[f][ 'filename'] # save some variables for use in this function filename = fields[f]['filename'] arrays = fields[f]['arrayname'] fieldtype = fields[f]['ensprod'] if fieldtype in ['prob', 'neprob']: thresh = fields[f]['thresh'] if fieldtype[0:3] == 'mem': member = int(fieldtype[3:]) # open Multi-file netcdf dataset if debug: print file_list[filename] fh = MFDataset(file_list[filename]) # loop through each field, wind fields will have two fields that need to be read datalist = [] for array in arrays: # read in 3D array (times*members,ny,nx) from file object if 'arraylevel' in fields[f]: if fields[f]['arraylevel'] != 'max': data = fh.variables[array][:, fields[f]['arraylevel'], :, :] else: data = np.amax(fh.variables[array][:, :, :, :], axis=1) #GSR else: data = fh.variables[array][:,0,:,:] # elif 'sfclevel' in fields[f]: data = fh.variables[array][:,:,:] # else: data = fh.variables[array][:,0,:,:] else: data = fh.variables[array][:, :, :] # change units for certain fields if array in [ 'U_GRID_PRS', 'V_GRID_PRS', 'UBSHR6', 'VBSHR6', 'U10', 'V10', 'U_COMP_STM', 'V_COMP_STM', 'S_PL' ]: data = data * 1.93 # m/s > kt if array in ['V10', 'U10']: data = data * 1.93 * 10.0 # m/s > .1 kt if array in ['V', 'U']: data = data * 1.93 # m/s > .1 kt if array in ['MSL_PRES']: data = data / 100. # mb if array in ['P_WAT']: data = data * 0.0393701 # mb elif array in ['DEWPOINT_2M', 'T2', 'AFWA_WCHILL', 'AFWA_HEATIDX']: data = (data - 273.15) * 1.8 + 32.0 # K > F elif array in [ 'PREC_ACC_NC', 'PREC_ACC_C', 'AFWA_PWAT', 'PWAT', 'AFWA_SNOWFALL', 'AFWA_SNOW', 'AFWA_ICE', 'AFWA_FZRA' ]: data = data * 0.0393701 # mm > in elif array in ['RAINNC', 'GRPL_MAX', 'SNOW_ACC_NC']: data = data * 0.0393701 # mm > in elif array in ['TEMP_PRS', 'DEWPOINT_PRS', 'SFC_LI']: data = data - 273.15 # K > C elif array in ['ABS_VORT_PRS']: data = data * 100000.0 elif array in ['AFWA_MSLP', 'MSLP']: data = data * 0.01 # Pa > hPa elif array in ['ECHOTOP']: data = data * 0.001 # m > km elif array in ['SBCINH', 'MLCINH', 'W_DN_MAX']: data = data * -1.0 # make cin positive elif array in ['PVORT_320K']: data = data * 1000000 # multiply by 1e6 elif array in ['SBT123_GDS3_NTAT']: data = data - 273.15 # K -> C elif array in ['SBT124_GDS3_NTAT']: data = data - 273.15 # K -> C elif array in ['HAIL_MAXK1', 'HAIL_MAX2D']: data = data * 39.3701 # m -> inches elif array in ['T2']: data = data * 1.8 # C->F elif array in ['Q2', 'QVAPOR']: data = data * 1000. # kg->g elif array in ['PSFC']: data = data / 100. # Pa->mb # perform mean/max/variance/etc to reduce 3D array to 2D if (fieldtype == 'mean'): data = np.mean(data, axis=0) elif (fieldtype == 'pmm'): data = compute_pmm(data) elif (fieldtype == 'max'): data = np.amax(data, axis=0) elif (fieldtype == 'var'): data = np.std(data, axis=0) elif (fieldtype == 'summean'): for i in missing_list[filename]: data = np.insert(data, i, np.nan, axis=0) #insert nan for missing files data = np.reshape( data, (data.shape[0] / 10, 10, data.shape[1], data.shape[2])) data = np.nansum(data, axis=0) data = np.nanmean(data, axis=0) elif (fieldtype[0:3] == 'mem'): for i in missing_list[filename]: data = np.insert(data, i, np.nan, axis=0) #insert nan for missing files data = np.reshape( data, (data.shape[0] / 10, 10, data.shape[1], data.shape[2])) data = np.nanmax(data, axis=0) data = data[member - 1, :] elif (fieldtype in ['prob', 'neprob']): data = (data >= thresh).astype('float') for i in missing_list[filename]: data = np.insert(data, i, np.nan, axis=0) #insert nan for missing files data = np.reshape( data, (data.shape[0] / 10, 10, data.shape[1], data.shape[2])) data = np.nanmax(data, axis=0) if (fieldtype == 'neprob'): data = compute_neprob(data, roi=14, sigma=40) #nw=neighborhood width else: data = np.nanmean(data, axis=0) data = data + 0.001 #hack to ensure that plot displays discrete prob values if debug: print 'Returning', array, 'with shape', data.shape, 'max', data.max( ), 'min', data.min() datalist.append(data) # attach data arrays for each type of field (e.g. { 'fill':[data], 'barb':[data,data] }) datadict[f] = datalist fh.close() # these are derived fields, we don't have in any of the input files but we can compute if 'name' in fields['fill']: if fields['fill']['name'] in ['shr06mag', 'shr01mag', 'bunkmag']: datadict['fill'] = [ np.sqrt(datadict['fill'][0]**2 + datadict['fill'][1]**2) ] if fields['fill']['name'] in ['iso300', 'iso500', 'iso700', 'iso850']: datadict['fill'] = [ np.sqrt(datadict['fill'][0]**2 + datadict['fill'][1]**2) ] elif fields['fill']['name'] == 'stp': datadict['fill'] = computestp(datadict['fill']) return (datadict, missing_list['amem'])
def get_tile_dimension(in_files, var_name, transfer_limit_Mbytes=None, time_range=None): ''' Computes the total size of 3D variable array and returns the optimal tile dimension for spatial chunking. :param in_files: absolute path(s) to NetCDF dataset(s) (including OPeNDAP URLs) :type in_files: list :param var_name: variable name to process :type var_name: str :param transfer_limit_Mbytes: maximum OPeNDAP/THREDDS transfer limit in Mbytes (default: None) :type transfer_limit_Mbytes: float :param time_range: time range :type time_range: list of 2 datetime objects: [dt1, dt2] rtype: int .. warning:: only for 3D variables ''' if transfer_limit_Mbytes==None: return 0 else: transfer_limit_bytes = transfer_limit_Mbytes * 1024 * 1024 # Mbytes --> bytes in_files.sort() mfnc = MFDataset(in_files, 'r', aggdim='time') ndim = mfnc.variables[var_name].ndim if ndim != 3: print("ERROR: The variable to process must be 3D") v = mfnc.variables[var_name] v_shape = v.shape v_dtype = v.dtype v_nb_bytes = v_dtype.itemsize if time_range == None: total_array_size_bytes = v_shape[0] * v_shape[1] * v_shape[2] * v_nb_bytes optimal_tile_dimension = int( numpy.sqrt( transfer_limit_bytes / (v.shape[0] * v_nb_bytes) ) ) else: var_time = mfnc.variables['time'] try: time_calend = var_time.calendar except: time_calend = 'gregorian' time_units = var_time.units time_arr = var_time[:] dt_arr = numpy.array([util_dt.num2date(dt, calend=time_calend, units=time_units) for dt in time_arr]) indices_subset = util_dt.get_indices_subset(dt_arr, time_range) nb_time_steps_after_subset = len(indices_subset) total_array_size_bytes = nb_time_steps_after_subset * v_shape[1] * v_shape[2] * v_nb_bytes optimal_tile_dimension = int( numpy.sqrt( transfer_limit_bytes / (nb_time_steps_after_subset * v_nb_bytes) ) ) mfnc.close() return optimal_tile_dimension