data_year[year_i] = np.sum( data[year_i * 12 + month_start - 1:year_i * 12 + month_end] * month_days, axis=0) return time_year, data_year #----------------------------------------------------------------------------------------- #--------------------------------MAIN SCRIPT STARTS HERE---------------------------------- #----------------------------------------------------------------------------------------- trend_type = 0 section = 30 #Number of years for GEV fit #----------------------------------------------------------------------------------------- HEAT_data = netcdf.Dataset(directory + 'Ocean/SSH_NBC_monthly_maximum.nc', 'r') #Writing data to correct variable time_all = HEAT_data.variables['time'][:] ssh = HEAT_data.variables['SSH'][:] HEAT_data.close() #Get the globally-averaged sea surface height fh = netcdf.Dataset(directory + 'Ocean/SSH_global.nc', 'r') ssh_global = fh.variables['SSH_global'][:] #Global Sea surface height (cm) fh.close() ssh = ssh - ssh_global
def write_NetCDF_input(case_name, float_type, init_profiles, tdep_surface=None, tdep_ls=None, radiation=None, soil=None): """ Function for writing the MicroHH2 NetCDF input """ def add_variable(nc_group, name, dims, data, float_type): var = nc_group.createVariable(name, float_type, dims) var[:] = data[:] # Define new NetCDF file nc_file = nc4.Dataset('{}_input.nc'.format(case_name), mode='w', datamodel='NETCDF4') # Create height dimension, and set height coordinate nc_file.createDimension('z', init_profiles['z'].size) add_variable(nc_file, 'z', ('z'), init_profiles['z'], float_type) # Create a group called "init" for the initial profiles. nc_group_init = nc_file.createGroup('init') # Set the initial profiles for name, data in init_profiles.items(): add_variable(nc_group_init, name, ('z'), data, float_type) # Create a group called "timedep" for the time dependent input if tdep_surface is not None or tdep_ls is not None: nc_group_timedep = nc_file.createGroup('timedep') # Write the time dependent surface values if tdep_surface is not None: nc_group_timedep.createDimension('time_surface', tdep_surface['time_surface'].size) for name, data in tdep_surface.items(): add_variable(nc_group_timedep, name, ('time_surface'), data, float_type) # Write the time dependent atmospheric values if tdep_ls is not None: nc_group_timedep.createDimension('time_ls', tdep_ls['time_ls'].size) for name, data in tdep_ls.items(): dims = ('time_ls') if name == 'time_ls' else ('time_ls', 'z') add_variable(nc_group_timedep, name, dims, data, float_type) if radiation is not None: nc_group_rad = nc_file.createGroup('radiation') nc_group_rad.createDimension("lay", radiation['p_lay'].size) nc_group_rad.createDimension("lev", radiation['p_lev'].size) for name, data in radiation.items(): dims = ('lay') if data.size == radiation['p_lay'].size else ('lev') add_variable(nc_group_rad, name, dims, data, float_type) if soil is not None: nc_group_soil = nc_file.createGroup('soil') nc_group_soil.createDimension("z", soil['z'].size) for name, data in soil.items(): add_variable(nc_group_soil, name, 'z', data, float_type) nc_file.close()
def __init__(self, netcdf_filename=None): import netCDF4 self.ncfile = None if netcdf_filename is not None: self.ncfile = netCDF4.Dataset(netcdf_filename, mode='w')
def prepare_nc(trgFile, timeList, x, y, metadata, logger, EPSG="EPSG:4326", units=None, calendar='gregorian', Format="NETCDF4", complevel=9, zlib=True, least_significant_digit=None): """ This function prepares a NetCDF file with given metadata, for a certain year, daily basis data The function assumes a gregorian calendar and a time unit 'Days since 1900-01-01 00:00:00' """ import datetime as dt logger.info('Setting up netcdf output: ' + trgFile) if units == None: # Use start of the run epoch = timeList[0] units = 'seconds since %04d-%02d-%02d %02d:%02d:%02d.0 00:00' % ( epoch.year, epoch.month, epoch.day, epoch.hour, epoch.minute, epoch.second) startDayNr = netCDF4.date2num(timeList[0].replace(tzinfo=None), units=units, calendar=calendar) endDayNr = netCDF4.date2num(timeList[-1].replace(tzinfo=None), units=units, calendar=calendar) timeAR = linspace(startDayNr, endDayNr, num=len(timeList)) nc_trg = netCDF4.Dataset(trgFile, 'w', format=Format, zlib=zlib, complevel=complevel) logger.info('Setting up dimensions and attributes. Steps: ' + str(len(timeList)) + ' lat: ' + str(len(y)) + " lon: " + str(len(x))) if len(timeAR) == 1: nc_trg.createDimension('time', 1) else: nc_trg.createDimension('time', 0) # NrOfDays*8 DateHour = nc_trg.createVariable('time', 'f8', ('time', ), fill_value=-9999., zlib=zlib, complevel=complevel) DateHour.units = units DateHour.calendar = calendar DateHour.standard_name = 'time' DateHour.long_name = 'time' DateHour.axis = 'T' DateHour[:] = timeAR # make a proj4 string srs = osgeo.osr.SpatialReference() res = srs.ImportFromEPSG(int(EPSG[5:])) if res != 0: logger.error("EPGS not converted correctly: " + EPSG + ". Is the GDAL_DATA environment variable set correctly?") exit(1) projStr = srs.ExportToProj4() proj_src = '+proj=longlat +ellps=WGS84 +towgs84=0,0,0,0,0,0,0 +no_defs' if srs.IsProjected() == 0: # ONly lat lon needed nc_trg.createDimension('lat', len(y)) nc_trg.createDimension('lon', len(x)) y_var = nc_trg.createVariable('lat', 'f4', ('lat', ), fill_value=-9999., zlib=zlib, complevel=complevel) y_var.standard_name = 'latitude' y_var.long_name = 'latitude' y_var.units = 'degrees_north' y_var.axis = 'Y' x_var = nc_trg.createVariable('lon', 'f4', ('lon', ), fill_value=-9999., zlib=zlib, complevel=complevel) x_var.standard_name = 'longitude' x_var.long_name = 'longitude' x_var.units = 'degrees_east' x_var.axis = 'X' y_var[:] = y x_var[:] = x crs = nc_trg.createVariable('crs', 'c') crs.long_name = 'wgs84' crs.proj4_params = '+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs' crs.grid_mapping_name = 'latitude_longitude' else: # Assume regular grid in m nc_trg.createDimension('y', len(y)) nc_trg.createDimension('x', len(x)) y_var = nc_trg.createVariable('y', 'f4', ('y', ), fill_value=-9999., zlib=zlib, complevel=complevel) y_var.standard_name = 'projection_y_coordinate' y_var.long_name = 'y-coordinate in Cartesian system' y_var.units = 'm' y_var.axis = 'Y' x_var = nc_trg.createVariable('x', 'f4', ('x', ), fill_value=-9999., zlib=zlib, complevel=complevel) x_var.standard_name = 'projection_x_coordinate' x_var.long_name = 'x-coordinate in Cartesian system' x_var.units = 'm' x_var.axis = 'X' y_var[:] = y x_var[:] = x crs = nc_trg.createVariable('crs', 'c') crs.long_name = EPSG crs.grid_mapping_name = 'universal_transverse_mercator' crs.utm_zone_number = srs.GetUTMZone() crs.semi_major_axis = srs.GetSemiMajor() crs.inverse_flattening = srs.GetInvFlattening() crs._CoordinateTransformType = "Projection" crs._CoordinateAxisTypes = "y x" crs.proj4_params = projStr # Also write lat lon fields XI, YI = meshgrid(x, y) lon_vals, lat_vals = convertCoord(projStr, proj_src, XI, YI) # Need to create lat-lon fields lat = nc_trg.createVariable('lat', 'f4', ( 'y', 'x', )) lat.standard_name = 'latitude' lat.long_name = 'latitude coordinate' lat.units = 'degrees_north' lat.coordinates = 'lat lon' lat.grid_mapping = 'wgs84' #lat._CoordinateAxisType = "Lat" lat[:, :] = lat_vals lon = nc_trg.createVariable('lon', 'f4', ( 'y', 'x', )) lon.standard_name = 'longitude' lon.long_name = 'longitude coordinate' lon.units = 'degrees_east' lon.coordinates = 'lat lon' lon.grid_mapping = 'wgs84' #lon._CoordinateAxisType = "Lon" lon[:, :] = lon_vals crs.EPSG_code = EPSG # now add all attributes from user-defined metadata for attr in metadata: nc_trg.setncattr(attr, metadata[attr]) nc_trg.sync() nc_trg.close()
return fn_ar fn_ar_ptrc, fn_ar_grid = make_nclen(start, end) tstr = 'ptrc_phyto_1d_' avgdnc_ar_ptrc = make_fname_ar(start, end, tstr) for i in range(0, noday): tptrc = fn_ar_ptrc[i] fn = avgdnc_ar_ptrc[i] print(fn) t = time.time() ptrc = nc.Dataset(tptrc) # print(ptrc) diat = ptrc['diatoms'][:] diat_d = np.nanmean(diat, axis=0) flag = ptrc['flagellates'][:] flag_d = np.nanmean(flag, axis=0) cili = ptrc['ciliates'][:] cili_d = np.nanmean(cili, axis=0) ptrc.close() t2 = time.time() print(t2 - t) tdir = '/data/tjarniko/results/hindcast.201905_dayavg_phyto/' ncname = tdir + fn f = nc.Dataset(ncname, 'w', format='NETCDF4') #'w' stands for write
def __init__(self, netcdffile, logging, vars=[]): """ First try to setup a class read netcdf files (converted with pcr2netcdf.py) netcdffile: file to read the forcing data from logging: python logging object vars: list of variables to get from file """ self.fname = netcdffile if os.path.exists(netcdffile): self.dataset = netCDF4.Dataset(netcdffile, mode="r") else: msg = os.path.abspath(netcdffile) + " not found!" logging.error(msg) raise ValueError(msg) logging.info("Reading state input from netCDF file: " + netcdffile) self.alldat = {} a = pcr.pcr2numpy(pcr.cover(0.0), 0.0).flatten() # Determine steps to load in mem based on estimated memory usage floatspermb = 1048576 / 4 maxmb = 40 self.maxsteps = maxmb * len(a) / floatspermb + 1 self.fstep = 0 self.lstep = self.fstep + self.maxsteps self.datetime = self.dataset.variables["time"][:] if hasattr(self.dataset.variables["time"], "units"): self.timeunits = self.dataset.variables["time"].units else: self.timeunits = "Seconds since 1970-01-01 00:00:00" if hasattr(self.dataset.variables["time"], "calendar"): self.calendar = self.dataset.variables["time"].calendar else: self.calendar = "gregorian" self.datetimelist = cftime.num2date(self.datetime, self.timeunits, calendar=self.calendar) try: self.x = self.dataset.variables["x"][:] except: self.x = self.dataset.variables["lon"][:] # Now check Y values to see if we must flip the data try: self.y = self.dataset.variables["y"][:] except: self.y = self.dataset.variables["lat"][:] # test if 1D or 2D array if len(self.y.shape) == 1: if self.y[0] > self.y[-1]: self.flip = False else: self.flip = True else: # not sure if this works self.y = self.y[:][0] if self.y[0] > self.y[-1]: self.flip = False else: self.flip = True x = pcr.pcr2numpy(pcr.xcoordinate(pcr.boolean(pcr.cover(1.0))), np.nan)[0, :] y = pcr.pcr2numpy(pcr.ycoordinate(pcr.boolean(pcr.cover(1.0))), np.nan)[:, 0] # Get average cell size acc = ( np.diff(x).mean() * 0.25 ) # non-exact match needed becuase of possible rounding problems if self.flip: (self.latidx, ) = np.logical_and( self.y[::-1] + acc >= y.min(), self.y[::-1] <= y.max() + acc).nonzero() (self.lonidx, ) = np.logical_and( self.x + acc >= x.min(), self.x <= x.max() + acc).nonzero() else: (self.latidx, ) = np.logical_and( self.y + acc >= y.min(), self.y <= y.max() + acc).nonzero() (self.lonidx, ) = np.logical_and( self.x + acc >= x.min(), self.x <= x.max() + acc).nonzero() if len(self.lonidx) != len(x): logging.error("error in determining X coordinates in netcdf...") logging.error("model expects: " + str(x.min()) + " to " + str(x.max())) logging.error("got coordinates netcdf: " + str(self.x.min()) + " to " + str(self.x.max())) logging.error("got len from netcdf x: " + str(len(x)) + " expected " + str(len(self.lonidx))) raise ValueError("X coordinates in netcdf do not match model") if len(self.latidx) != len(y): logging.error("error in determining Y coordinates in netcdf...") logging.error("model expects: " + str(y.min()) + " to " + str(y.max())) logging.error("got from netcdf: " + str(self.y.min()) + " to " + str(self.y.max())) logging.error("got len from netcdf y: " + str(len(y)) + " expected " + str(len(self.latidx))) raise ValueError("Y coordinates in netcdf do not match model") for var in vars: try: self.alldat[var] = self.dataset.variables[var][self.fstep:self. maxsteps] except: self.alldat.pop(var, None) logging.warning("Variable " + var + " not found in netcdf file: " + netcdffile)
fin = open("grid.{:07d}".format(0), "rb") raw = fin.read(nx * 8) x = numpy.array(struct.unpack('>{}d'.format(nx), raw)) raw = fin.read(nx * 8) xh = numpy.array(struct.unpack('>{}d'.format(nx), raw)) raw = fin.read(ny * 8) y = numpy.array(struct.unpack('>{}d'.format(ny), raw)) raw = fin.read(ny * 8) yh = numpy.array(struct.unpack('>{}d'.format(ny), raw)) raw = fin.read(nz * 8) z = numpy.array(struct.unpack('>{}d'.format(nz), raw)) raw = fin.read(nz * 8) zh = numpy.array(struct.unpack('>{}d'.format(nz), raw)) fin.close() file = netCDF4.Dataset("s.nc", "w") dim_x = file.createDimension('x', nxsave) dim_y = file.createDimension('y', nysave) dim_z = file.createDimension('z', nzsave) dim_t = file.createDimension('time', nt) var_x = file.createVariable('x', 'f8', ('x', )) var_y = file.createVariable('y', 'f8', ('y', )) var_z = file.createVariable('z', 'f8', ('z', )) var_t = file.createVariable('time', 'f8', ('time', )) var_s = file.createVariable('s', 'f4', ( 'time', 'z', 'y', 'x',
def connect(self,uri): # try: ret = nc.Dataset(uri,'r') # except TypeError: # ret = nc.MFDataset(uri) return(ret)
def execute_netcdf_task(task): global log task.next_state() filepath = getattr(task, cmor_task.output_path_key, None) if not filepath: log.error( "Could not find file containing data for variable %s in table %s" % (task.target.variable, task.target.table)) return store_var = getattr(task, "store_with", None) surf_pressure_task = getattr(task, "sp_task", None) surf_pressure_path = getattr(surf_pressure_task, "path", None) if surf_pressure_task else None if store_var and not surf_pressure_path: log.error( "Could not find file containing surface pressure for model level variable...skipping variable %s in table " "%s" % (task.target.variable, task.target.table)) return axes = [] t_bnds = [] if hasattr(task, "grid_id"): task_grid_id = getattr(task, "grid_id") if isinstance(task_grid_id, tuple): axes.extend([a for a in task_grid_id if a is not None]) else: axes.append(task_grid_id) if hasattr(task, "z_axis_id"): axes.append(getattr(task, "z_axis_id")) if hasattr(task, "t_axis_id"): axes.append(getattr(task, "t_axis_id")) t_bnds = time_axis_bnds.get(getattr(task, "t_axis_id"), []) try: dataset = netCDF4.Dataset(filepath, 'r') except Exception as e: log.error( "Could not read netcdf file %s while cmorizing variable %s in table %s. Cause: %s" % (filepath, task.target.variable, task.target.table, e.message)) return try: ncvars = dataset.variables dataset.set_auto_mask(False) codestr = str(task.source.get_grib_code().var_id) varlist = [ v for v in ncvars if str(getattr(ncvars[v], "code", None)) == codestr ] if len(varlist) == 0: varlist = [v for v in ncvars if str(v) == "var" + codestr] if task.target.variable == "areacella": varlist = ["cell_area"] if len(varlist) == 0: log.error( "No suitable variable found in cdo-produced file %s fro cmorizing variable %s in table %s... " "dismissing task" % (filepath, task.target.variable, task.target.table)) task.set_failed() return if len(varlist) > 1: log.warning( "CDO variable retrieval resulted in multiple (%d) netcdf variables; will take first" % len(varlist)) ncvar = ncvars[varlist[0]] unit = getattr(ncvar, "units", None) if (not unit) or hasattr(task, cmor_task.conversion_key): unit = getattr(task.target, "units") if len(getattr(task.target, "positive", "")) > 0: var_id = cmor.variable(table_entry=str(task.target.variable), units=str(unit), axis_ids=axes, positive="down") else: var_id = cmor.variable(table_entry=str(task.target.variable), units=str(unit), axis_ids=axes) flip_sign = (getattr(task.target, "positive", None) == "up") factor, term = get_conversion_constants( getattr(task, cmor_task.conversion_key, None), getattr(task, cmor_task.output_frequency_key)) time_dim, index = -1, 0 for d in ncvar.dimensions: if d.startswith("time"): time_dim = index break index += 1 time_selection = None time_stamps = cmor_utils.read_time_stamps(filepath) if any(time_stamps) and len(t_bnds) > 0: time_slice_map = [] for bnd in t_bnds: candidates = [t for t in time_stamps if bnd[0] <= t <= bnd[1]] if any(candidates): time_slice_map.append(time_stamps.index(candidates[0])) else: log.warning( "For variable %s in table %s, no valid time point could be found at %s...inserting " "missing values" % (task.target.variable, task.target.table, str(bnd[0]))) time_slice_map.append(-1) time_selection = numpy.array(time_slice_map) mask = getattr(task.target, cmor_target.mask_key, None) mask_array = masks[mask].get("array", None) if mask in masks else None missval = getattr(task.target, cmor_target.missval_key, 1.e+20) if flip_sign: missval = -missval cmor_utils.netcdf2cmor(var_id, ncvar, time_dim, factor, term, store_var, get_sp_var(surf_pressure_path), swaplatlon=False, fliplat=True, mask=mask_array, missval=missval, time_selection=time_selection, force_fx=(cmor_target.get_freq( task.target) == 0)) cmor.close(var_id) task.next_state() if store_var: cmor.close(store_var) finally: dataset.close()
f.set_construct(c, axes=('domainaxis2', ), key='dimensioncoordinate2', copy=False) # cell_method c = cf.CellMethod() c.method = 'mean' c.axes = ('area', ) f.set_construct(c) q, t = cf.read('file.nc') print(q.creation_commands()) import netCDF4 nc = netCDF4.Dataset('file.nc', 'r') v = nc.variables['ta'] netcdf_array = cf.NetCDFArray(filename='file.nc', ncvar='ta', dtype=v.dtype, ndim=v.ndim, shape=v.shape, size=v.size) data_disk = cf.Data(netcdf_array) numpy_array = v[...] data_memory = cf.Data(numpy_array) data_disk.equals(data_memory) key = tas.construct_key('surface_altitude') orog = tas.convert(key) print(orog) orog1 = tas.convert(key, full_domain=False)
import numpy as np import netCDF4 td_loc = '/media/wk2/atmenu10246/VVM/DATA/hi_reso12/archive/' td0 = netCDF4.Dataset(td_loc + 'hi_reso12.L.Thermodynamic-000000.nc') thbar = td0['th'][0, :, 5, 5] x = td0['xc'] z = td0['zc'] th = td0['th'][0, :, :, 63] - np.tile(thbar, (len(x), 1)).transpose() rmax = 0 for i in range(len(z)): if np.max(th[i]) == 0: continue else: radius = np.max(x[th[i] != 0]) - np.min(x[th[i] != 0]) print(radius) print(np.argmax(x[th[i] != 0]), np.argmin(x[th[i] != 0]))
#!/usr/bin/env python2 # -*- coding: utf-8 -*- """ Created on Tue Mar 6 10:55:53 2018 @author: andrew """ from IPython import get_ipython get_ipython().magic('reset -sf') #Get data import numpy as np import netCDF4 import sys from glob import glob from matplotlib import pyplot as plt file = dset1 = netCDF4.Dataset(file1,mode='r') #dset2 = netCDF4.Dataset(file4,mode='r')
except: uselim=False else: uselim=False if args.title is not None: titlestr=unicode(args.title) else: titlestr=args.varname if args.units is not None: unitsstr=replace_superscripts(args.units) else: unitsstr='' nc = netCDF4.Dataset(args.ncfile) ncv = nc.variables varname = args.varname lon = ncv['SCHISM_hgrid_node_x'][:] lat = ncv['SCHISM_hgrid_node_y'][:] #sigma = ncv['sigma'][:] #nsigma = len(sigma) #bidx = ncv['node_bottom_index'][:] nv = ncv['SCHISM_hgrid_face_nodes'][:,:3]-1 time = ncv['time'][:] # s ut = utime(ncv['time'].units) dates = ut.num2date(time) lonb=[-18., 32.] latb=[46., 67.]
def main(netcdf_ws=os.getcwd(), ancillary_ws=os.getcwd(), output_ws=os.getcwd(), start_date=None, end_date=None, extent_path=None, output_extent=None, stats_flag=True, overwrite_flag=False): """Extract DAYMET temperature Parameters ---------- netcdf_ws : str Folder of DAYMET netcdf files. ancillary_ws : str Folder of ancillary rasters. output_ws : str Folder of output rasters. start_date : str, optional ISO format date (YYYY-MM-DD). end_date : str, optional ISO format date (YYYY-MM-DD). extent_path : str, optional File path defining the output extent. output_extent : list, optional Decimal degrees values defining output extent. stats_flag : bool, optional If True, compute raster statistics (the default is True). overwrite_flag : bool, optional If True, overwrite existing files (the default is False). Returns ------- None """ logging.info('\nExtracting DAYMET vapor pressure') # If a date is not set, process 2015 try: start_dt = dt.datetime.strptime(start_date, '%Y-%m-%d') logging.debug(' Start date: {}'.format(start_dt)) except: start_dt = dt.datetime(2015, 1, 1) logging.info(' Start date: {}'.format(start_dt)) try: end_dt = dt.datetime.strptime(end_date, '%Y-%m-%d') logging.debug(' End date: {}'.format(end_dt)) except: end_dt = dt.datetime(2015, 12, 31) logging.info(' End date: {}'.format(end_dt)) # Get DAYMET spatial reference from an ancillary raster mask_raster = os.path.join(ancillary_ws, 'daymet_mask.img') elev_raster = os.path.join(ancillary_ws, 'daymet_elev.img') daymet_re = re.compile('daymet_v3_(?P<VAR>\w+)_(?P<YEAR>\d{4})_na.nc4$') # DAYMET band name dictionary # daymet_band_dict = dict() # daymet_band_dict['prcp'] = 'precipitation_amount' # daymet_band_dict['srad'] = 'surface_downwelling_shortwave_flux_in_air' # daymet_band_dict['sph'] = 'specific_humidity' # daymet_band_dict['tmin'] = 'air_temperature' # daymet_band_dict['tmax'] = 'air_temperature' # Get extent/geo from mask raster daymet_ds = gdal.Open(mask_raster) daymet_osr = drigo.raster_ds_osr(daymet_ds) daymet_proj = drigo.osr_proj(daymet_osr) daymet_cs = drigo.raster_ds_cellsize(daymet_ds, x_only=True) daymet_extent = drigo.raster_ds_extent(daymet_ds) daymet_geo = daymet_extent.geo(daymet_cs) daymet_x, daymet_y = daymet_extent.origin() daymet_ds = None logging.debug(' Projection: {}'.format(daymet_proj)) logging.debug(' Cellsize: {}'.format(daymet_cs)) logging.debug(' Geo: {}'.format(daymet_geo)) logging.debug(' Extent: {}'.format(daymet_extent)) logging.debug(' Origin: {} {}'.format(daymet_x, daymet_y)) # Subset data to a smaller extent if output_extent is not None: logging.info('\nComputing subset extent & geo') logging.debug(' Extent: {}'.format(output_extent)) # Assume input extent is in decimal degrees output_extent = drigo.project_extent(drigo.Extent(output_extent), drigo.epsg_osr(4326), daymet_osr, 0.001) output_extent = drigo.intersect_extents([daymet_extent, output_extent]) output_extent.adjust_to_snap('EXPAND', daymet_x, daymet_y, daymet_cs) output_geo = output_extent.geo(daymet_cs) logging.debug(' Geo: {}'.format(output_geo)) logging.debug(' Extent: {}'.format(output_extent)) elif extent_path is not None: logging.info('\nComputing subset extent & geo') if extent_path.lower().endswith('.shp'): output_extent = drigo.feature_path_extent(extent_path) extent_osr = drigo.feature_path_osr(extent_path) extent_cs = None else: output_extent = drigo.raster_path_extent(extent_path) extent_osr = drigo.raster_path_osr(extent_path) extent_cs = drigo.raster_path_cellsize(extent_path, x_only=True) output_extent = drigo.project_extent(output_extent, extent_osr, daymet_osr, extent_cs) output_extent = drigo.intersect_extents([daymet_extent, output_extent]) output_extent.adjust_to_snap('EXPAND', daymet_x, daymet_y, daymet_cs) output_geo = output_extent.geo(daymet_cs) logging.debug(' Geo: {}'.format(output_geo)) logging.debug(' Extent: {}'.format(output_extent)) else: output_extent = daymet_extent.copy() output_geo = daymet_geo[:] # output_shape = output_extent.shape(cs=daymet_cs) xi, yi = drigo.array_geo_offsets(daymet_geo, output_geo, daymet_cs) output_rows, output_cols = output_extent.shape(daymet_cs) logging.debug(' Shape: {} {}'.format(output_rows, output_cols)) logging.debug(' Offsets: {} {} (x y)'.format(xi, yi)) # Read the elevation array elev_array = drigo.raster_to_array(elev_raster, mask_extent=output_extent, return_nodata=False) pair_array = refet.calcs._air_pressure_func(elev_array) del elev_array # Process each variable input_var = 'vp' output_var = 'ea' logging.info("\nVariable: {}".format(input_var)) # Build output folder var_ws = os.path.join(output_ws, output_var) if not os.path.isdir(var_ws): os.makedirs(var_ws) # Process each file in the input workspace for input_name in sorted(os.listdir(netcdf_ws)): logging.debug("{}".format(input_name)) input_match = daymet_re.match(input_name) if not input_match: logging.debug(' Regular expression didn\'t match, skipping') continue elif input_match.group('VAR') != input_var: logging.debug(' Variable didn\'t match, skipping') continue year_str = input_match.group('YEAR') logging.info(" Year: {}".format(year_str)) year_int = int(year_str) year_days = int(dt.datetime(year_int, 12, 31).strftime('%j')) if start_dt is not None and year_int < start_dt.year: logging.debug(' Before start date, skipping') continue elif end_dt is not None and year_int > end_dt.year: logging.debug(' After end date, skipping') continue # Build input file path input_raster = os.path.join(netcdf_ws, input_name) # if not os.path.isfile(input_raster): # logging.debug( # ' Input raster doesn\'t exist, skipping {}'.format( # input_raster)) # continue # Build output folder output_year_ws = os.path.join(var_ws, year_str) if not os.path.isdir(output_year_ws): os.makedirs(output_year_ws) # Read in the DAYMET NetCDF file input_nc_f = netCDF4.Dataset(input_raster, 'r') # logging.debug(input_nc_f.variables) # Check all valid dates in the year year_dates = _utils.date_range(dt.datetime(year_int, 1, 1), dt.datetime(year_int + 1, 1, 1)) for date_dt in year_dates: if start_dt is not None and date_dt < start_dt: logging.debug(' {} - before start date, skipping'.format( date_dt.date())) continue elif end_dt is not None and date_dt > end_dt: logging.debug(' {} - after end date, skipping'.format( date_dt.date())) continue else: logging.info(' {}'.format(date_dt.date())) output_path = os.path.join( output_year_ws, '{}_{}_daymet.img'.format(output_var, date_dt.strftime('%Y%m%d'))) if os.path.isfile(output_path): logging.debug(' {}'.format(output_path)) if not overwrite_flag: logging.debug(' File already exists, skipping') continue else: logging.debug(' File already exists, removing existing') os.remove(output_path) doy = int(date_dt.strftime('%j')) doy_i = range(1, year_days + 1).index(doy) # Arrays are being read as masked array with a fill value of -9999 # Convert to basic numpy array arrays with nan values try: input_ma = input_nc_f.variables[input_var][doy_i, yi:yi + output_rows, xi:xi + output_cols] except IndexError: logging.info(' date not in netcdf, skipping') continue input_nodata = float(input_ma.fill_value) sph_array = input_ma.data.astype(np.float32) sph_array[sph_array == input_nodata] = np.nan # Compute ea [kPa] from specific humidity [kg/kg] ea_array = (sph_array * pair_array) / (0.622 + 0.378 * sph_array) # Save the array as 32-bit floats drigo.array_to_raster(ea_array.astype(np.float32), output_path, output_geo=output_geo, output_proj=daymet_proj, stats_flag=stats_flag) del input_ma, ea_array, sph_array input_nc_f.close() del input_nc_f logging.debug('\nScript Complete')
def savetimestep(self, timestep, pcrdata, unit="mm", var="P", name="Precipitation"): """ save a single timestep for a variable input: - timestep - current timestep - pcrdata - pcraster map to save - unit - unit string - var - variable string - name - name of the variable """ # Open target netCDF file var = os.path.basename(var) self.nc_trg = netCDF4.Dataset(self.ncfile, "a", format=self.Format, zlib=self.zlib, complevel=9) self.nc_trg.set_fill_off() # read time axis and convert to time objects # TODO: use this to append time # time = self.nc_trg.variables['time'] # timeObj = cftime.num2date(time[:], units=time.units, calendar=time.calendar) idx = timestep - 1 buffreset = (idx + 1) % self.maxbuf bufpos = (idx) % self.maxbuf try: nc_var = self.nc_trg.variables[var] except: self.logger.debug("Creating variable " + var + " in netcdf file. Format: " + self.Format) if self.EPSG.lower() == "epsg:4326": nc_var = self.nc_trg.createVariable( var, "f4", ("time", "lat", "lon"), fill_value=-9999.0, zlib=self.zlib, complevel=9, least_significant_digit=self.least_significant_digit, ) nc_var.coordinates = "lat lon" else: nc_var = self.nc_trg.createVariable( var, "f4", ("time", "y", "x"), fill_value=-9999.0, zlib=self.zlib, complevel=9, least_significant_digit=self.least_significant_digit, ) nc_var.coordinates = "lat lon" nc_var.grid_mapping = "crs" nc_var.units = unit nc_var.standard_name = name self.nc_trg.sync() miss = float(nc_var._FillValue) data = pcr.pcr2numpy(pcr.scalar(pcrdata), miss) if var in self.bufflst: self.bufflst[var][bufpos, :, :] = data self.buffdirty = True else: self.bufflst[var] = self.timestepbuffer.copy() self.bufflst[var][bufpos, :, :] = data self.buffdirty = True # Write out timestep buffer..... if buffreset == 0 or idx == self.maxbuf - 1 or self.timesteps <= timestep: spos = idx - bufpos self.logger.debug("Writing buffer for " + var + " to file at: " + str(spos) + " " + str(int(bufpos) + 1) + " timesteps") nc_var[spos:idx + 1, :, :] = self.bufflst[var][0:bufpos + 1, :, :] self.nc_trg.sync() self.buffdirty = False
def __init__(self, swotL2_file, bounding_box=None, class_list=[1], lat_kwd='no_layover_latitude', lon_kwd='no_layover_longitude', class_kwd='no_layover_classification', min_points=100, project_data=True, verbose=False, proj='laea', x_0=0, y_0=0, lat_0=None, lon_0=None, ellps='WGS84', subsample_factor=1, **proj_kwds): self.verbose = verbose self.lat_kwd, self.lon_kwd = lat_kwd, lon_kwd self.subsample_factor = subsample_factor self.nc = netCDF4.Dataset(swotL2_file) if self.verbose: print('Dataset opened') for att_name, att_value in self.L2_META_KEY_DEFAULTS.items(): setattr(self, att_name, getattr(self.nc, att_name, att_value)) self.set_index_and_bounding_box(bounding_box, lat_kwd, lon_kwd, class_list, class_kwd=class_kwd) if self.verbose: print('Good data selected & bounding box calculated.') # Get reference locations for these data self.lat = self.get(lat_kwd) self.lon = self.get(lon_kwd) # Put in the radar/image coordinates too try: self.img_x = self.get('range_index') self.img_y = self.get('azimuth_index') except KeyError: try: print( 'Cant Find range_index, or azimuth_index variables,' ' assuming 2D-image image coordinates (like from a gdem)') Ny, Nx = np.shape(self.get(lat_kwd, use_index=False)) ix, iy = np.meshgrid(np.arange(Nx), np.arange(Ny)) self.img_x = ix[self.index] self.img_y = iy[self.index] except: print( 'WARNING: Input file does not contain range/azimuth index. ' 'Functions relying on radar coordinates WILL break!') self.img_x = None self.img_y = None if self.verbose: print('lat/lon read') # If not enough good points are found, raise Exception if len(self.lat) < min_points: raise Exception( 'number of good points: %d smaller than required: %d' % (len(self.lat), min_points)) # Project to a coordinate system if project_data: self.x, self.y = self.project(proj=proj, x_0=x_0, y_0=y_0, lat_0=lat_0, lon_0=lon_0, ellps=ellps, **proj_kwds) if self.verbose: print('projection set and x,y calculated')
def prepare_nc( trgFile, timeList, x, y, metadata, logger, EPSG="EPSG:4326", units=None, calendar="gregorian", Format="NETCDF4", complevel=9, zlib=True, least_significant_digit=None, FillValue=1e31, ): """ This function prepares a NetCDF file with given metadata, for a certain year, daily basis data The function assumes a gregorian calendar and a time unit 'Days since 1900-01-01 00:00:00' """ logger.info("Setting up netcdf output: " + trgFile) if units == None: # Use start of the run epoch = timeList[0] units = "seconds since %04d-%02d-%02d %02d:%02d:%02d.0 00:00" % ( epoch.year, epoch.month, epoch.day, epoch.hour, epoch.minute, epoch.second, ) startDayNr = cftime.date2num(timeList[0].replace(tzinfo=None), units=units, calendar=calendar) endDayNr = cftime.date2num(timeList[-1].replace(tzinfo=None), units=units, calendar=calendar) timeAR = np.linspace(startDayNr, endDayNr, num=len(timeList)) if os.path.exists(trgFile): os.remove(trgFile) nc_trg = netCDF4.Dataset(trgFile, "w", format=Format, zlib=zlib, complevel=complevel) logger.info("Setting up dimensions and attributes. Steps: " + str(len(timeList)) + " lat: " + str(len(y)) + " lon: " + str(len(x))) if len(timeAR) == 1: nc_trg.createDimension("time", 1) else: nc_trg.createDimension("time", 0) # NrOfDays*8 DateHour = nc_trg.createVariable("time", "f8", ("time", ), fill_value=FillValue, zlib=zlib, complevel=complevel) DateHour.units = units DateHour.calendar = calendar DateHour.standard_name = "time" DateHour.long_name = "time" DateHour.axis = "T" DateHour[:] = timeAR # make a proj4 string srs = osgeo.osr.SpatialReference() res = srs.ImportFromEPSG(int(EPSG[5:])) if res != 0: logger.error("EPGS not converted correctly: " + EPSG + ". Is the GDAL_DATA environment variable set correctly?") sys.exit(1) projStr = srs.ExportToProj4() proj_src = "+proj=longlat +ellps=WGS84 +towgs84=0,0,0,0,0,0,0 +no_defs" if srs.IsProjected() == 0: # ONly lat lon needed nc_trg.createDimension("lat", len(y)) nc_trg.createDimension("lon", len(x)) y_var = nc_trg.createVariable("lat", "f4", ("lat", ), fill_value=FillValue, zlib=zlib, complevel=complevel) y_var.standard_name = "latitude" y_var.long_name = "latitude" y_var.units = "degrees_north" y_var.axis = "Y" x_var = nc_trg.createVariable("lon", "f4", ("lon", ), fill_value=FillValue, zlib=zlib, complevel=complevel) x_var.standard_name = "longitude" x_var.long_name = "longitude" x_var.units = "degrees_east" x_var.axis = "X" y_var[:] = y x_var[:] = x crs = nc_trg.createVariable("crs", "c") crs.long_name = "wgs84" crs.proj4_params = "+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs" crs.grid_mapping_name = "latitude_longitude" else: # Assume regular grid in m nc_trg.createDimension("y", len(y)) nc_trg.createDimension("x", len(x)) y_var = nc_trg.createVariable("y", "f4", ("y", ), fill_value=FillValue, zlib=zlib, complevel=complevel) y_var.standard_name = "projection_y_coordinate" y_var.long_name = "y-coordinate in Cartesian system" y_var.units = "m" y_var.axis = "Y" x_var = nc_trg.createVariable("x", "f4", ("x", ), fill_value=FillValue, zlib=zlib, complevel=complevel) x_var.standard_name = "projection_x_coordinate" x_var.long_name = "x-coordinate in Cartesian system" x_var.units = "m" x_var.axis = "X" y_var[:] = y x_var[:] = x crs = nc_trg.createVariable("crs", "c") crs.long_name = EPSG crs.grid_mapping_name = "universal_transverse_mercator" crs.utm_zone_number = srs.GetUTMZone() crs.semi_major_axis = srs.GetSemiMajor() crs.inverse_flattening = srs.GetInvFlattening() crs._CoordinateTransformType = "Projection" crs._CoordinateAxisTypes = "y x" crs.proj4_params = projStr # Also write lat lon fields XI, YI = np.meshgrid(x, y) lon_vals, lat_vals = convertCoord(projStr, proj_src, XI, YI) # Need to create lat-lon fields lat = nc_trg.createVariable("lat", "f4", ("y", "x")) lat.standard_name = "latitude" lat.long_name = "latitude coordinate" lat.units = "degrees_north" lat.coordinates = "lat lon" lat.grid_mapping = "wgs84" # lat._CoordinateAxisType = "Lat" lat[:, :] = lat_vals lon = nc_trg.createVariable("lon", "f4", ("y", "x")) lon.standard_name = "longitude" lon.long_name = "longitude coordinate" lon.units = "degrees_east" lon.coordinates = "lat lon" lon.grid_mapping = "wgs84" # lon._CoordinateAxisType = "Lon" lon[:, :] = lon_vals crs.EPSG_code = EPSG # now add all attributes from user-defined metadata for attr in metadata: nc_trg.setncattr(attr, metadata[attr]) nc_trg.sync() nc_trg.close()
def DoTheCombining(fpred,ppred,tcl,vco2,output,fvari=None,pvari=None,plot=False,month=None, extrapolatedyear=None,tclyear=None,method="DIVA",tcltype=None): """ Main function that does all the work - call this if importing as a library. fpred - interpolated fCO2 prediction filename ppred - interpolated pCO2 prediction filename tcl - atsr climatology filename vco2 - vCO2 (from Takahashi) filename output - the combined netcdf output filename fvari - fCO2 variance filename (output from interpolation) pvari - pCO2 variance filename (output from interpolation) plot - boolean, whether to create plots of the data (True or False) month - the month of which the data refers to - only required for title of plots extrapolatedyear - the year data has been extrapolated to tclyear - the year of the ATSR data (should be the same as extrapolatedyear?) method - the interpolation method that has been used (DIVA, GSTAT) """ #First check if all input files exist CheckFilesExist(fpred,ppred,tcl,vco2,fvari,pvari) if method == "DIVA": DataLoader=LoadDIVAAscii elif method == "GSTAT": DataLoader=LoadKrigedAscii else: raise Exception("Unrecognised interpolation method in combine function: %s"%method) f_pred=DataLoader(fpred) p_pred=DataLoader(ppred) if fvari is None: f_std = np.array([[netcdf_helper.MISSINGDATAVALUE] * 360] * 180) else: f_vari = DataLoader(fvari) if method == "GSTAT": f_std = np.where(f_vari>=0,f_vari**0.5,netcdf_helper.MISSINGDATAVALUE) #f_std[np.where(f_vari==-9999.)] = netcdf_helper.MISSINGDATAVALUE elif method == "DIVA": f_std=f_vari if pvari is None: p_std = np.array([[netcdf_helper.MISSINGDATAVALUE] * 360] * 180) else: p_vari = DataLoader(pvari) if method == "GSTAT": p_std = np.where(p_vari>=0,p_vari**0.5,netcdf_helper.MISSINGDATAVALUE) #p_std[np.where(p_vari==-9999.)] = netcdf_helper.MISSINGDATAVALUE elif method == "DIVA": p_std=p_vari # Get Tcl in 'tclyear' for each cell if tcl is not None: if tcltype == "aatsr": sstkeyname="sst_skin_mean" sstdataname="ARC_ATSR" elif tcltype == "reynolds": sstkeyname="sst_mean" sstdataname="Reynolds" else: raise "Unknown SST data type: %s" with netCDF4.Dataset(tcl) as sst_file: Tcl = sst_file.variables[sstkeyname] Tcl = Tcl[0,:,:] if sst_file.variables[sstkeyname].units in ['degrees C']: #we want them in Kelvin (to be consistent with how the scripts were originally written) Tcl=Tcl+273.15 elif sst_file.variables[sstkeyname].units not in ['Kelvin','kelvin','K']: print("Unsure of data units for temperature - aassuming kelvin.") else: Tcl = None # Get vCO2 for each cell with netCDF4.Dataset(vco2) as vCO2_file: vCO2_data = vCO2_file.variables['vCO2_2010_grid_up'] vCO2_data = vCO2_data[0,:,:] #Lon/lat values for dimension arrays in netCDF xi = np.linspace(-179.5,179.5,360) yi = np.linspace(-89.5, 89.5, 180) #Write out the data to the netCDF file #Test directory exists if not os.path.exists(os.path.dirname(output)) and os.path.dirname(output) != "": raise Exception("Directory to write file to does not exist: %s"%(os.path.dirname(output))) if extrapolatedyear is not None: extrapyear=str(extrapolatedyear)+'_' nameext=" extrapolated to %d"%extrapolatedyear else: extrapyear="" nameext="" with netCDF4.Dataset(output, 'w', format = 'NETCDF4') as ncfile: #Add standard dimensions and arrays netcdf_helper.standard_setup_SOCAT(ncfile,timedata=1e9,londata=xi,latdata=yi) #Now add other grid arrays fCO2_interp_pred = ncfile.createVariable('fCO2_'+extrapyear+'interpolated_pred', 'f4',('time','latitude','longitude'), fill_value=netcdf_helper.MISSINGDATAVALUE,zlib=True) fCO2_interp_pred[:] = f_pred fCO2_interp_pred.units = 'uatm' fCO2_interp_pred.missing_value = netcdf_helper.MISSINGDATAVALUE fCO2_interp_pred.valid_min = 0. fCO2_interp_pred.valid_max = 1e6 fCO2_interp_pred.scale_factor = 1. fCO2_interp_pred.add_offset = 0. fCO2_interp_pred.standard_name = "fCO2_"+extrapyear+"interpolated_pred" fCO2_interp_pred.long_name = "Fugacity of CO2 using SOCAT methodology"+nameext+" and interpolated" fCO2_interp_error = ncfile.createVariable('fCO2_'+extrapyear+'interpolated_error', 'f4',('time','latitude','longitude'), fill_value=netcdf_helper.MISSINGDATAVALUE,zlib=True) fCO2_interp_error[:] = f_std fCO2_interp_error.units = 'uatm' fCO2_interp_error.missing_value = netcdf_helper.MISSINGDATAVALUE fCO2_interp_error.valid_min = 0. fCO2_interp_error.valid_max = 1e6 fCO2_interp_error.scale_factor = 1. fCO2_interp_error.add_offset = 0. fCO2_interp_error.standard_name = "fCO2_"+extrapyear+"interpolated_std" fCO2_interp_error.long_name = "Fugacity of CO2 using SOCAT methodology"+nameext+" error of interpolation" pCO2_interp_pred = ncfile.createVariable('pCO2_'+extrapyear+'interpolated_pred', 'f4',('time','latitude','longitude'), fill_value=netcdf_helper.MISSINGDATAVALUE,zlib=True) pCO2_interp_pred[:] = p_pred pCO2_interp_pred.units = 'uatm' pCO2_interp_pred.missing_value = netcdf_helper.MISSINGDATAVALUE pCO2_interp_pred.valid_min = 0. pCO2_interp_pred.valid_max = 1e6 pCO2_interp_pred.scale_factor = 1. pCO2_interp_pred.add_offset = 0. pCO2_interp_pred.standard_name = "pCO2_"+extrapyear+"interpolated_pred" pCO2_interp_pred.long_name = "Partial pressure of CO2 using SOCAT methodology"+nameext+" and interpolated" pCO2_interp_error = ncfile.createVariable('pCO2_'+extrapyear+'interpolated_error', 'f4',('time','latitude','longitude'), fill_value=netcdf_helper.MISSINGDATAVALUE,zlib=True) pCO2_interp_error[:] = p_std pCO2_interp_error.units = 'uatm' pCO2_interp_error.missing_value = netcdf_helper.MISSINGDATAVALUE pCO2_interp_error.valid_min = 0. pCO2_interp_error.valid_max = 1e6 pCO2_interp_error.scale_factor = 1. pCO2_interp_error.add_offset = 0. pCO2_interp_error.standard_name = "fCO2_"+extrapyear+"interpolated_error" pCO2_interp_error.long_name = "Partial pressure of CO2 using SOCAT methodology"+nameext+" error of interpolation" if tcl is not None: Tcl_year = ncfile.createVariable('Tcl_'+tclyear,'f4',('time','latitude','longitude'), fill_value=netcdf_helper.MISSINGDATAVALUE,zlib=True) Tcl_year[:] = Tcl Tcl_year.units = 'Kelvin' Tcl_year.missing_value = netcdf_helper.MISSINGDATAVALUE Tcl_year.valid_min = 0. Tcl_year.valid_max = 1e6 Tcl_year.scale_factor = 1. Tcl_year.add_offset = 0. Tcl_year.standard_name = "Tcl_"+tclyear Tcl_year.long_name = "Climatologial temperature from "+sstdataname+" in "+tclyear vCO2 = ncfile.createVariable('vCO2','f4',('time','latitude','longitude'), fill_value=netcdf_helper.MISSINGDATAVALUE,zlib=True) vCO2[:] = vCO2_data vCO2.units = 'ppm' vCO2.missing_value = netcdf_helper.MISSINGDATAVALUE vCO2.valid_min = 0. vCO2.valid_max = 1e6 vCO2.scale_factor = 1. vCO2.add_offset = 0. vCO2.standard_name = "vCO2" vCO2.long_name = "Concentration of CO2 in dry air in 2000 from NOAA ESRL" vCO2.data_citation="Dlugokencky, E.J., K.A. Masarie, P.M. Lang, and P.P. Tans (2014), NOAA Greenhouse Gas Reference from Atmospheric Carbon Dioxide Dry Air Mole Fractions from the NOAA ESRL Carbon Cycle Cooperative Global Air Sampling Network. Data Path: ftp://aftp.cmdl.noaa.gov/data/trace_gases/co2/flask/surface/." vCO2.data_contacts="Ed.Dlugokencky_AT_noaa.gov, Pieter.Tans_AT_noaa.gov" if plot is True: #if month has not been passed then use the output netcdf filename in the graph title if month is None: month=output #create a plot of the predicted fCO2 fpredfilename=output+'_pred_fCO2_'+extrapyear+'.png' fpredtitle = 'pred fCO2 (uatm), %s, year %s'%(month,extrapyear) goodindices=np.where(~np.isnan(f_pred)&(f_pred!=netcdf_helper.MISSINGDATAVALUE)) scale=(f_pred[goodindices].min(),f_pred[goodindices].max()) #scale=(200,600) MakeGraph(filename=fpredfilename,x=xi,y=yi,z=f_pred,title=fpredtitle,scale=scale) #create a plot of the standard deviation of fCO2 prediction if it has been included in the combination netcdf file if fvari is not None: fvarfilename=output+'_std_fCO2_'+extrapyear+'.png' fvartitle = 'std fCO2 (uatm), %s, year %s'%(month,extrapyear) MakeGraph(filename=fvarfilename,x=xi,y=yi,z=f_std,title=fvartitle,scale=(5,50))
fam_member = ['r1', 'r2', 'r3', 'r4', 'r5', 'r6', 'r7', 'r8', 'r9', 'r10'] yi1 = '1950' yf1 = '2020' yi2 = '2021' yf2 = '2100' # loop on simulations family = [] member = [] sie = [] for f in sim_family: for m in fam_member: # reading sic data from CanESM2-LE files nc1 = netcdf.Dataset( '/dmf2/scenario/external_data/cccma/CanESM2_large_ensemble/historical-' + f + '/day/seaIce/sic/' + m + 'i1p1/sic_day_CanESM2_historical-' + f + '_' + m + 'i1p1_' + yi1 + '0101-' + yf1 + '1231.nc', 'r') nc2 = netcdf.Dataset( '/dmf2/scenario/external_data/cccma/CanESM2_large_ensemble/historical-' + f + '/day/seaIce/sic/' + m + 'i1p1/sic_day_CanESM2_historical-' + f + '_' + m + 'i1p1_' + yi2 + '0101-' + yf2 + '1231.nc', 'r') # concatenating on time axis and selecting september data time1 = nc1.variables['time'] time2 = nc2.variables['time'] sic1 = nc1.variables['sic'][:, 51:, :] # Northern Hemisphere sic2 = nc2.variables['sic'][:, 51:, :] years1 = np.arange(int(yi1), int(yf1) + 1) years2 = np.arange(int(yi2), int(yf2) + 1) sic_fm = np.zeros((1, np.size(sic1, 1) * np.size(sic1, 2)))
def get_or_create(self, uri, *args, **kwargs): ''' Create dataset and corresponding metadata Parameters: ---------- uri : str URI to file or stream openable by netCDF4.Dataset Returns: ------- dataset and flag ''' # check if dataset already exists uris = DatasetURI.objects.filter(uri=uri) if len(uris) > 0: return uris[0].dataset, False # set source pp = Platform.objects.get(short_name='BUOYS') ii = Instrument.objects.get(short_name='DRIFTING BUOYS') source = Source.objects.get_or_create(platform=pp, instrument=ii)[0] # dc = DataCenter.objects.get(short_name='DOC/NOAA/OAR/AOML') iso_category = ISOTopicCategory.objects.get(name='Oceans') # dc = DataCenter.objects.get(short_name='NM/ME/KO/KM/JA/IN/CS/HZ/IF/BO/AO') nc = netCDF4.Dataset(uri) # Time variable read time = nc.variables['JULD'] #Reading depth variable depth = nc.variables['PRES'] print 'checking data', depth[0] entrytitle = nc.comment print entrytitle # checking whether there is more than one profile data on the same day, Found that data from AOML has this problem # if there is, then delete the wrong data, In here the shallowest profile is removed # if we check manually we can see that the wrong profile doesnot go beyond 500 meter. checkdepth = 0 findepth = np.zeros(time.shape[0]) for i in range(0, depth.shape[0]): print i maxdepth = np.amax(depth[i]) findepth[i] = maxdepth if (maxdepth > checkdepth): dd = i checkdepth = maxdepth maxdepth = findepth[dd] # Reading info about the data center dca = nc.variables['DATA_CENTRE'][dd] dcstr2 = ''.join(map(str, dca)) datacenter = dcstr2.replace("--", "") print datacenter if (datacenter == 'AO'): dc = DataCenter.objects.get(short_name='DOC/NOAA/OAR/AOML') # dcnew = 'AOML' # print dcnew # Reading Platform number platnum = nc.variables['PLATFORM_NUMBER'] str2 = ''.join(map(str, platnum)) platnumnew = str2.replace("--", "") # final Date newdate = datetime.datetime(1950, 1, 1, 0, 0) + datetime.timedelta( time[dd]) yearargo = newdate.strftime('%Y') monargo = newdate.strftime('%m') dayargo = newdate.strftime('%d') # Reading Lat Lon Variables latitude = nc.variables['LATITUDE'][dd] longitude = nc.variables['LONGITUDE'][dd] location = GEOSGeometry('POINT(%s %s)' % (longitude, latitude)) geolocation = GeographicLocation.objects.get_or_create( geometry=location)[0] datamodef = nc.variables['DATA_MODE'][dd] if (datamodef == 'R'): datamode = 'Real time mode' print datamode if (datamodef == 'A'): datamode = 'Delayed time mode' print datamode ds = Dataset(entry_id=platnumnew, ISO_topic_category=iso_category, source=source, data_center=dc, geographic_location=geolocation, time_coverage_start=newdate) ds.save() ds_uri = DatasetURI.objects.get_or_create(uri=uri, dataset=ds)[0] return ds, True
input_file = "MMIJ-ALLSTA.csv.gz" # The output file is a netCDF4-file output_file = "MMIJ-compound-sem.nc" pdf = pd.read_csv(input_file, encoding='iso-8859-15', sep=';', header=[1, 2], na_values=['NaN'], parse_dates=True, index_col=0) # %% f = nc.Dataset(output_file, 'w') with open('metadata-global.yaml') as g: for attribute, value in yaml_load(g).items(): f.setncattr_string(attribute, value) f.comment = yaml_load(""">- The statistics datasets have as values a compound data structure with the minimum ('min'), maximum ('max'), average ('avg'), and standard deviation ('std') of the samples within the given time interval.\n Custom dataset attributes:\n \t* 'quality_indicators' is a global attribute that describes the values of the (custom) 'quality' attribute.\n \t* 'standard_error' describes the standard error of the avg and std values as a fraction of the std values.\n \t* 'uncertainty_abs' describes the absolute uncertainty of the sampled values.\n
def remap(src_array, remap_file, src_grad1=None, src_grad2=None, \ src_grad3=None, spval=1e37, verbose=False): ''' remap based on addresses and weights computed in a setup phase ''' # get info from remap_file data = netCDF.Dataset(remap_file, 'r') title = data.title map_method = data.map_method normalization = data.normalization src_grid_name = data.source_grid dst_grid_name = data.dest_grid src_grid_size = len(data.dimensions['src_grid_size']) dst_grid_size = len(data.dimensions['dst_grid_size']) num_links = len(data.dimensions['num_links']) src_grid_dims = data.variables['src_grid_dims'][:] dst_grid_dims = data.variables['dst_grid_dims'][:] # get weights and addresses from remap_file map_wts = data.variables['remap_matrix'][:] dst_add = data.variables['dst_address'][:] src_add = data.variables['src_address'][:] # get destination mask dst_mask = data.variables['dst_grid_imask'][:] # remap from src grid to dst grid if src_grad1 is not None: iorder = 2 else: iorder = 1 if verbose is True: print('Reading remapping: ', title) print('From file: ', remap_file) print(' ') print('Remapping between:') print(src_grid_name) print('and') print(dst_grid_name) print('Remapping method: ', map_method) ndim = len(src_array.squeeze().shape) if (ndim == 2): tmp_dst_array = np.zeros((dst_grid_size)) tmp_src_array = src_array.flatten() if iorder == 1: # first order remapping # insure that map_wts is a (num_links,4) array tmp_map_wts = np.zeros((num_links, 4)) tmp_map_wts[:, 0] = map_wts[:, 0].copy() map_wts = tmp_map_wts pyroms.remapping.scrip.remap(tmp_dst_array, map_wts, \ dst_add, src_add, tmp_src_array) if iorder == 2: # second order remapping if map_method == 'conservative': # insure that map_wts is a (num_links,4) array tmp_map_wts = np.zeros((num_links, 4)) tmp_map_wts[:, 0:2] = map_wts[:, 0:2].copy() map_wts = tmp_map_wts tmp_src_grad1 = src_grad1.flatten() tmp_src_grad2 = src_grad2.flatten() pyroms.remapping.scrip.remap(tmp_dst_array, map_wts, \ dst_add, src_add, tmp_src_array, \ tmp_src_grad1, tmp_src_grad2) elif map_method == 'bicubic': tmp_src_grad1 = src_grad1.flatten() tmp_src_grad2 = src_grad2.flatten() tmp_src_grad3 = src_grad3.flatten() pyroms.remapping.scrip.remap(tmp_dst_array, map_wts, \ dst_add, src_add, tmp_src_array, \ tmp_src_grad1, tmp_src_grad2, \ tmp_src_grad3) else: raise ValueError('Unknown method') # mask dst_array idx = np.where(dst_mask == 0) tmp_dst_array[idx] = spval tmp_dst_array = np.ma.masked_values(tmp_dst_array, spval) # reshape dst_array = np.reshape(tmp_dst_array, (dst_grid_dims[1], \ dst_grid_dims[0])) elif (ndim == 3): nlev = src_array.shape[0] dst_array = np.zeros((nlev, dst_grid_dims[1], dst_grid_dims[0])) # loop over vertical level for k in range(nlev): tmp_src_array = src_array[k, :, :].flatten() tmp_dst_array = np.zeros((dst_grid_size)) if iorder == 1: # first order remapping # insure that map_wts is a (num_links,4) array tmp_map_wts = np.zeros((num_links, 4)) tmp_map_wts[:, 0] = map_wts[:, 0].copy() map_wts = tmp_map_wts pyroms.remapping.scrip.remap(tmp_dst_array, map_wts, \ dst_add, src_add, tmp_src_array) if iorder == 2: # second order remapping if map_method == 'conservative': tmp_src_grad1 = src_grad1.flatten() tmp_src_grad2 = src_grad2.flatten() pyroms.remapping.scrip.remap(tmp_dst_array, map_wts, \ dst_add, src_add, tmp_src_array, \ tmp_src_grad1, tmp_src_grad2) elif map_method == 'bicubic': tmp_src_grad1 = src_grad1.flatten() tmp_src_grad2 = src_grad2.flatten() tmp_src_grad3 = src_grad3.flatten() pyroms.remapping.scrip.remap(tmp_dst_array, map_wts, \ dst_add, src_add, tmp_src_array, \ tmp_src_grad1, tmp_src_grad2, \ tmp_src_grad3) else: raise ValueError('Unknown method') # mask dst_array idx = np.where(dst_mask == 0) tmp_dst_array[idx] = spval tmp_dst_array = np.ma.masked_values(tmp_dst_array, spval) # reshape dst_array[k,:,:] = np.reshape(tmp_dst_array, (dst_grid_dims[1], \ dst_grid_dims[0])) else: raise ValueError('src_array must have two or three dimensions') # close data file data.close() return dst_array
def savetimestep(self, timestep, data, unit="mm", var='P', name="Precipitation", metadata={}): """ save a single timestep for a variable input: - timestep - current timestep - data - pcraster map to save - unit - unit string - var - variable string - name - name of the variable """ # Open target netCDF file var = os.path.basename(var) self.nc_trg = netCDF4.Dataset( self.ncfile, 'a', format=self.Format, zlib=self.zlib, complevel=9, ) self.nc_trg.set_fill_off() # read time axis and convert to time objects # TODO: use this to append time # time = self.nc_trg.variables['time'] # timeObj = netCDF4.num2date(time[:], units=time.units, calendar=time.calendar) idx = timestep - 1 buffreset = (idx + 1) % self.maxbuf bufpos = (idx) % self.maxbuf try: nc_var = self.nc_trg.variables[var] except: self.logger.debug("Creating variable " + var + " in netcdf file. Format: " + self.Format) if self.EPSG.lower() == "epsg:4326": nc_var = self.nc_trg.createVariable( var, 'f4', ( 'time', 'lat', 'lon', ), fill_value=self.FillVal, zlib=self.zlib, complevel=9, least_significant_digit=self.least_significant_digit) nc_var.coordinates = "lat lon" else: nc_var = self.nc_trg.createVariable( var, 'f4', ( 'time', 'y', 'x', ), fill_value=self.FillVal, zlib=self.zlib, complevel=9, least_significant_digit=self.least_significant_digit) nc_var.coordinates = "lat lon" nc_var.grid_mapping = "crs" nc_var.units = unit nc_var.standard_name = name for attr in metadata: # print metadata[attr] nc_var.setncattr(attr, metadata[attr]) self.nc_trg.sync() if self.bufflst.has_key(var): self.bufflst[var][bufpos, :, :] = data else: self.bufflst[var] = self.timestepbuffer.copy() self.bufflst[var][bufpos, :, :] = data # Write out timestep buffer..... if buffreset == 0 or idx == self.maxbuf - 1 or self.timesteps <= timestep: spos = idx - bufpos self.logger.info("Writing buffer for " + var + " to file at: " + str(spos) + " " + str(int(bufpos) + 1) + " timesteps") nc_var[spos:idx + 1, :, :] = self.bufflst[var][0:bufpos + 1, :, :] self.nc_trg.sync()
import numpy import struct import netCDF4 from pylab import * stats = netCDF4.Dataset("prandtlslope_default_0000000.nc", "r") t = stats.variables["time"][:] end = t.size start = t.size - 5 dt = t[1] - t[0] z = stats.variables["z"][:] zh = stats.variables["zh"][:] dz = zh[1::] - zh[0:-1] B0 = 0.005 N2 = 3. L0 = (B0 / N2**1.5)**.5 henct = (2. * B0 / N2 * t)**.5 benct = henct * N2 wenct = (B0 * henct)**(1. / 3.) tenct = henct / (B0 / N2**1.5)**.5 henc = numpy.mean(henct[start:end]) benc = numpy.mean(benct[start:end]) wenc = numpy.mean(wenct[start:end]) u2_turb = average(stats.variables["u2_turb"][start:end, :], 0) u2_visc = average(stats.variables["u2_visc"][start:end, :], 0)
import matplotlib.pyplot as plt import matplotlib.patches as mpatches from mpl_toolkits.basemap import Basemap # Function prints each variable within the data datafile def show_vars(dataset): for v in dataset.variables: print v # Read the dataset from the GHRC OPeNDAP dataset_url = ( 'https://ghrc.nsstc.nasa.gov:443/opendap/ssmis/f17/3day/data/2017/f17_ssmis_20170904v7_d3d.nc' ) dataset = netCDF4.Dataset(dataset_url) show_vars(dataset) # Extract data parameters from file data_vari = dataset[ "atmosphere_water_vapor_content"][:, :] # Enter name of the parameter of interest. lats = dataset["latitude"][:] # Extract latitude lons = dataset["longitude"][:] # Extract longitide conv_lats, conv_lons = np.meshgrid(lons, lats) dataset.close( ) # Close the dataset once desired parameters are extracted to conserve memory ####### Account for data scales and flags ####### # Scale Factor and Quality Flags. Note that these current values are for the variable used in this example.
def dfgood(emolt_QCed_path, depth_ok, min_miles_from_dock, temp_ok, fraction_depth_error, mindist_allowed, emolt_no_telemetry): '''get a dataframe include only good data of emolt_QCed.csv and emolt_no_telemetry.csv together''' emolt_QCed = pd.read_csv(emolt_QCed_path, index_col=0) emolt_QCed_df = emolt_QCed[emolt_QCed['flag'] == 0] emolt_QCed_df.index = range(len(emolt_QCed_df)) emolt_no_telemetry.index = range(len(emolt_no_telemetry)) flag = [] url = 'https://ngdc.noaa.gov/thredds/dodsC/crm/crm_vol1.nc' try: nc = netCDF4.Dataset(url).variables lon = nc['x'][:] lat = nc['y'][:] emolt_no_telemetry['datet'] = pd.to_datetime( emolt_no_telemetry['datet']) for k in range(len(emolt_no_telemetry)): depth_ngdc = get_depth(nc, lon, lat, emolt_no_telemetry['lon'][k], emolt_no_telemetry['lat'][k], mindist_allowed) if gps_compare_JiM(emolt_no_telemetry['lat'][k], emolt_no_telemetry['lon'][k], min_miles_from_dock ) == 'yes': # this means it is near a dock flag.append(1) elif (float(emolt_no_telemetry['mean_temp'][k]) < temp_ok[0]) or ( float(emolt_no_telemetry['mean_temp'][k]) > temp_ok[1]): #elif (emolt_no_telemetry['mean_temp'][k]<temp_ok[0]) or (emolt_no_telemetry['mean_temp'][k]>temp_ok[1]): flag.append(2) elif (emolt_no_telemetry['depth'][k] < depth_ok[0]) or ( emolt_no_telemetry['depth'][k] > depth_ok[1]): flag.append(3) elif abs(emolt_no_telemetry['depth'][k] - depth_ngdc) / depth_ngdc > fraction_depth_error: flag.append(4) else: flag.append(0) # good data except: #can not connect 'https://www.ngdc.noaa.gov/thredds/dodsC/crm/crm_vol1.nc' emolt_no_telemetry['datet'] = pd.to_datetime( emolt_no_telemetry['datet']) for k in range(len(emolt_no_telemetry)): if gps_compare_JiM(emolt_no_telemetry['lat'][k], emolt_no_telemetry['lon'][k], min_miles_from_dock ) == 'yes': # this means it is near a dock flag.append(1) elif (float(emolt_no_telemetry['mean_temp'][k]) < temp_ok[0]) or ( float(emolt_no_telemetry['mean_temp'][k]) > temp_ok[1]): #elif (emolt_no_telemetry['mean_temp'][k]<temp_ok[0]) or (emolt_no_telemetry['mean_temp'][k]>temp_ok[1]): flag.append(2) elif (emolt_no_telemetry['depth'][k] < depth_ok[0]) or ( emolt_no_telemetry['depth'][k] > depth_ok[1]): flag.append(3) else: flag.append(0) # good data emolt_no_telemetry['flag'] = flag df = emolt_QCed_df.append(emolt_no_telemetry) dfnew = df[[ 'vessel', 'datet', 'lat', 'lon', 'depth', 'depth_range', 'hours', 'mean_temp', 'std_temp', 'flag' ]] ##dfnew.to_csv('/net/pubweb_html/drifter/emolt_QCed.csv') dfgood = dfnew[dfnew['flag'] == 0] # restrict to good data only dfgood['datet'] = pd.to_datetime(dfgood['datet']) dfgood = dfgood.sort_values(by=['datet']) dfgood.index = range(len(dfgood)) return dfgood
# -*- coding: utf-8 -*- """ Author = Chaidar """ import netCDF4 import pandas as pd data = netCDF4.Dataset("path to your netCDF data") #see the all data variable print(data.variables.keys()) #call the data lat = data.variables['latitude'][:] lon = data.variables['longitude'][:] thetao = data.variables['thetao'][:] bottomT = data.variables['bottomT'][:] so = data.variables['so'][:] zos = data.variables['zos'][:] depth = data.variables['depth'][:] time = data.variables['time'] dtime = netCDF4.num2date(time[:], time.units) nlon = len(lon) nlat = len(lat) #make empty list data variable bottomTlist = [] zoslist = [] latlist = [] lonlist = [] timelist = [] print('convert bottomT and zos')
def postprocess(exp='a1'): """Convert PISM output to SHMIP conventions.""" # boot filename if exp[0] == 'e': bfilename = 'input/boot_%s.nc' % exp[:2] elif exp[0] == 'f': bfilename = 'input/boot_e1.nc' else: bfilename = 'input/boot_sqrt.nc' # extra and output filenames efilename = 'output/%s_extra.nc' % exp ofilename = 'processed/%s_%s.nc' % (exp[:2].upper() + exp[2:], auth) # check for file presence if not os.path.isfile(bfilename): print "Warning: could not find input for exp %s." % exp.upper() return elif not os.path.isfile(efilename): print "Warning: could not find output for exp %s." % exp.upper() return else: print "Postprocessing experiment %s..." % exp.upper() # open datasets bds = nc4.Dataset(bfilename, 'r') eds = nc4.Dataset(efilename, 'r') ods = nc4.Dataset(ofilename, 'w') # copy global attributes from extra file copy_attributes(eds, ods) # read coordinate variables from extra file x = eds.variables['x'] y = eds.variables['y'] t = eds.variables['time'] dx = x[1] - x[0] dy = y[1] - y[0] # prepare slicing on x coordinate if exp[0] in ('a', 'b', 'c', 'd'): xcond = (0e3 <= x[:]) * (x[:] <= 100e3) else: xcond = (0e3 <= x[:]) * (x[:] <= 6e3) # prepare slicing on time coordinate if exp[0] in ('c'): tcond = (t[:] >= t[-1] - day) elif exp[0] in ('d', 'f'): tcond = (t[:] >= t[-1] - year) else: tcond = (t[:] >= 0.0) # prepare sliced coordinates ys = y[:] xs = x[xcond] ts = t[tcond] # set additional global attributes ods.title = 'PISM experiment %s.' % exp.upper() ods.meshtype = 'structured' ods.dimension = '2D' ods.channels_on_edges = 'no' ods.institution = '%s, %s' % (name, inst) ods.references = ('http://pism-docs.org, ' 'https://shmip.bitbucket.io, ' 'https://github.com/jsegu/pism-shmip') # create dimensions ods.createDimension('dim', 2) # number of spatial dimensions ods.createDimension('index1', len(xs) * len(ys)) # regular grid ods.createDimension('index2', len(xs) * len(ys)) # staggered grid ods.createDimension('time', None) # create SHMIP node (PISM cell center) coordinate variables ovar = ods.createVariable('coords1', x.dtype, ('dim', 'index1')) ovar[:] = np.meshgrid(ys, xs)[::-1] ovar.long_name = 'node coordinates' ovar.pism_name = 'cell center coordinate' ovar.units = x.units # create SHMIP cell (PISM staggered) coordinate variables # (I deduced the sign of x and y shifts by looking at model output) ovar = ods.createVariable('coords2', x.dtype, ('dim', 'index2')) ovar[:] = np.meshgrid(ys + dy / 2, xs + dx / 2)[::-1] ovar.long_name = 'cell midpoint coordinates' ovar.pism_name = 'staggered grid coordinate' ovar.units = x.units # copy time coordinate ovar = ods.createVariable('time', t.dtype, ('time')) copy_attributes(t, ovar) ovar[:] = ts[:] ovar.long_name = 'time' ovar.units = 's' # copy boot bedrock topography bvar = bds.variables['topg'] ovar = ods.createVariable('B', bvar.dtype, ('index1')) ovar[:] = bvar[:, :, xcond].T.flatten() copy_attributes(bvar, ovar) ovar.long_name = 'bed elevation' # copy boot ice thickness bvar = bds.variables['thk'] ovar = ods.createVariable('H', bvar.dtype, ('index1')) ovar[:] = bvar[:, :, xcond].T.flatten() copy_attributes(bvar, ovar) ovar.long_name = 'ice thickness' # copy effective pressure evar = eds.variables['effbwp'] ovar = ods.createVariable('N', evar.dtype, ('time', 'index1')) ovar[:] = evar[tcond, xcond] copy_attributes(evar, ovar) ovar.long_name = 'effective pressure' # copy water sheet thickness evar = eds.variables['bwat'] ovar = ods.createVariable('h', evar.dtype, ('time', 'index1')) ovar[:] = evar[tcond, xcond] copy_attributes(evar, ovar) ovar.long_name = 'water sheet thickness' # compute water sheet discharge h = eds.variables['bwat'][tcond, xcond] u = eds.variables['bwatvel[0]'][tcond, xcond] v = eds.variables['bwatvel[1]'][tcond, xcond] ovar = ods.createVariable('q', evar.dtype, ('time', 'index2')) ovar[:] = h * (u**2 + v**2)**0.5 / (365.0 * 24 * 60 * 60) ovar.long_name = 'water sheet discharge' ovar.units = 'm^2/s' # close datasets bds.close() eds.close() ods.close()
default='_Takahashi', help='string to append to file name') parser.add_argument( '-c', '--csvfile', action='store_true', help="create csv file with extension '.csv' instead of netCDF") args = parser.parse_args() infiles = args.infiles outfilename = args.outfile extension = args.extension csvfile = args.csvfile for infilename in glob.glob(sys.argv[1]): print "INPUT FILE:", infilename infile = nc.Dataset(infilename, 'r') if outfilename == None: filename, extension2 = os.path.splitext(infilename) if csvfile: extension2 = '.csv' outfilename = filename + extension + extension2 print "OUTPUT FILE:", outfilename if csvfile: outfile = open(outfilename, 'wb') writer = csv.writer(outfile) headings = [] outdata = np.ma.empty([0, 3240]) else: outfile = nc.Dataset(outfilename, 'w', format='NETCDF3_CLASSIC',
import os import numpy as np import netCDF4 import csv import pandas as pd #navigating to folder location. You may have to change this. base_dir = os.path.abspath(os.path.dirname( '/Users/Kai/src/python/noaa_ais/ncdf_translator/oisst_translator/raw_data/')) for file in os.listdir(base_dir): print(file) if file.endswith(".nc"): dataset = netCDF4.Dataset(base_dir+"/"+file) lat = dataset.variables['lat'][:] lon = dataset.variables['lon'][:] # time_var = dataset.variables['time'] # zlev = dataset.variables['zlev'] sst = dataset.variables['sst'] # anom = dataset.variables['anom'] # err = dataset.variables['err'] # ice = dataset.variables['ice'] df_lat = pd.DataFrame(data=lat[:]) df_lat = pd.DataFrame(np.repeat(df_lat.values,1440,axis=0), columns={'lat'}) df_lon= pd.DataFrame(data=lon[:]) df_lon = pd.DataFrame(np.repeat(df_lon.values,720,axis=0), columns={'lon'}) # df_time = pd.DataFrame(np.repeat([time_var[0]],1036800, axis=0), columns={'time'}) # df_zlev = pd.DataFrame(np.repeat([zlev[0]],1036800, axis=0), columns={'zlev'}) # base_df = pd.concat([df_time, df_zlev, df_lat, df_lon], axis=1)