def read_multifile(root: str = data_dir): path = f"{root}/gfs.0p25*.nc" print(path) f = MFDataset(path) print(f.data_model) print(f.dimensions) print(f.ncattrs()) # for n, variable in f.variables.items(): # type: (str, Variable) # print(n) # print(variable[:]) ghi_raw: Variable = f.variables["DSWRF_P8_L1_GLL0_avg3h"] print(ghi_raw.dimensions) ghi_raw.getncattr("initial_time0_hours") # print(ghi_raw.ncattrs()) print(ghi_raw[0][0][:])
def preprocess(filelist, begin_slice, end_slice, variable, output_filename): # read in the existing dset ds = MFDataset(filelist) # build a new output dataset with Dataset(output_filename, 'w', format='NETCDF4_CLASSIC') as sub_ds: sub_ds.createDimension('lat', ds['lat'][:].shape[0]) lon = sub_ds.createDimension('lon', ds['lon'][:].shape[0]) time = sub_ds.createDimension('time', None) # Create coordinate variables for 4-dimensions times = sub_ds.createVariable('time', np.float64, ('time', )) latitudes = sub_ds.createVariable('lat', np.float32, ('lat', )) longitudes = sub_ds.createVariable('lon', np.float32, ('lon', )) # Create the actual 3-d variable data = sub_ds.createVariable(variable, np.float32, ('time', 'lat', 'lon')) # set the data times[:] = ds['time'][begin_slice:end_slice] latitudes[:] = ds['lat'][:] longitudes[:] = ds['lon'][:] data[:] = ds[variable][begin_slice:end_slice, ...].astype(np.float32) # set up the global CF-convention-style meta attrs. sub_ds.setncatts({k: getattr(ds, k) for k in ds.ncattrs()}) # local variable meta attrs... sub_ds[variable].setncatts({ k: getattr(ds[variable], k) for k in ds[variable].ncattrs() if not k.startswith('_') }) # time attrs... sub_ds['time'].setncatts( {k: getattr(ds['time'], k) for k in ds['time'].ncattrs()}) # lon/lat attrs... sub_ds['lon'].setncatts( {k: getattr(ds['lon'], k) for k in ds['lon'].ncattrs()}) sub_ds['lat'].setncatts( {k: getattr(ds['lat'], k) for k in ds['lat'].ncattrs()}) return output_filename
class EcoFOCI_mfnetCDF(object): def __init__(self, file_name=None, aggdim=None): """Initialize opening of multiple netcdf files along same dimension (aggdim) in same path. Parameters ---------- file_name : str full path to file on disk (with wildcards) aggdim : str dimesion name to aggregate along. Slowest varying dimension or unlimited dimension will be choosen if no option is passed. """ self.nchandle = MFDataset(file_name,'a',aggdim=aggdim) self.file_name = file_name def get_global_atts(self): g_atts = {} att_names = self.nchandle.ncattrs() for name in att_names: g_atts[name] = self.nchandle.getncattr(name) return g_atts def get_vars(self): self.variables = self.nchandle.variables return self.nchandle.variables def ncreadfile_dic(self): data = {} for j, v in enumerate(self.nchandle.variables): if v in self.nchandle.variables.keys(): #check for nc variable data[v] = self.nchandle.variables[v][:] else: #if parameter doesn't exist fill the array with zeros data[v] = None return (data) def close(self): self.nchandle.close()
class NC4MFDataset(NC4Dataset): def __init__(self, file_list, time_axis='time', **kwargs): self.time_axis = time_axis self.ds = MFDataset(file_list, **kwargs) def ncattrs_dict(self, varname=None): if varname: return { attr: getattr(self.ds.variables[varname], attr) for attr in self.ds.variables[varname].ncattrs() } else: return {attr: getattr(self.ds, attr) for attr in self.ds.ncattrs()} def getncattr(self, attr): return getattr(self.ds, attr)
def ncread(file, vars=None, dims=False, noisy=False, atts=False, datetimes=False): """ Read in the FVCOM results file and spit out numpy arrays for each of the variables specified in the vars list. Optionally specify a dict with keys whose names match the dimension names in the netCDF file and whose values are strings specifying alternative ranges or lists of indices. For example, to extract the first hundred time steps, supply dims as: dims = {'time':'0:100'} To extract the first, 400th and 10,000th values of any array with nodes: dims = {'node':'[0, 3999, 9999]'} Any dimension not given in dims will be extracted in full. Specify atts=True to extract the variable attributes. Set datetimes=True to convert the FVCOM Modified Julian Day values to python datetime objects. Parameters ---------- file : str, list If a string, the full path to an FVCOM netCDF output file. If a list, a series of files to be loaded. Data will be concatenated into a single dict. vars : list, optional List of variable names to be extracted. If omitted, all variables are returned. dims : dict, optional Dict whose keys are dimensions and whose values are a string of either a range (e.g. {'time':'0:100'}) or a list of individual indices (e.g. {'time':'[0, 1, 80, 100]'}). Slicing is supported (::5 for every fifth value). noisy : bool, optional Set to True to enable verbose output. atts : bool, optional Set to True to enable output of the attributes (defaults to False). datetimes : bool, optional Set to True to convert FVCOM Modified Julian Days to Python datetime objects (creates a new `datetime' key in the output dict. Only applies if `vars' includes either the `Times' or `time' variables. Note: if FVCOM has been run with single precision output, then the conversion of the `time' values to a datetime object suffers rounding errors. It's best to either run FVCOM in double precision or specify only the `Times' data in the `vars' list. Returns ------- FVCOM : dict Dict of data extracted from the netCDF file. Keys are those given in vars and the data are stored as ndarrays. If `datetimes' is True, then this also includes a `datetime' key in which is the FVCOM Modified Julian Day time series converted to Python datetime objects. attributes : dict, optional If atts=True, returns the attributes as a dict for each variable in vars. The key `dims' contains the array dimensions (each variable contains the names of its dimensions) as well as the shape of the dimensions defined in the netCDF file. The key `global' contains the global attributes. See Also -------- read_probes : read in FVCOM ASCII probes output files. """ # Set to True when we've converted from Modified Julian Day so we don't # end up doing the conversion twice, once for `Times' and again for # `time' if both variables have been requested in `vars'. done_datetimes = False # Check whether we'll be able to fulfill the datetime request. if datetimes and vars and not list(set(vars) & set(('Times', 'time'))): raise ValueError("Conversion from Modified Julian Day to python " "datetimes has been requested but no time variable " "(`Times' or `time') has been requested in vars.") # If we have a list, assume it's lots of files and load them all. if isinstance(file, list): try: try: rootgrp = MFDataset(file, 'r') except IOError as msg: raise IOError('Unable to open file {} ({}). Aborting.'.format(file, msg)) except: # Try aggregating along a 'time' dimension (for POLCOMS, # for example). try: rootgrp = MFDataset(file, 'r', aggdim='time') except IOError as msg: raise IOError('Unable to open file {} ({}). Aborting.'.format(file, msg)) else: rootgrp = Dataset(file, 'r') # Create a dict of the dimension names and their current sizes read_dims = {} for key, var in list(rootgrp.dimensions.items()): # Make the dimensions ranges so we can use them to extract all the # values. read_dims[key] = '0:' + str(len(var)) # Compare the dimensions in the netCDF file with those provided. If we've # been given a dict of dimensions which differs from those in the netCDF # file, then use those. if dims: commonKeys = set(read_dims).intersection(list(dims.keys())) for k in commonKeys: read_dims[k] = dims[k] if noisy: print("File format: {}".format(rootgrp.file_format)) if not vars: vars = iter(list(rootgrp.variables.keys())) FVCOM = {} # Save the dimensions in the attributes dict. if atts: attributes = {} attributes['dims'] = read_dims attributes['global'] = {} for g in rootgrp.ncattrs(): attributes['global'][g] = getattr(rootgrp, g) for key, var in list(rootgrp.variables.items()): if noisy: print('Found ' + key, end=' ') sys.stdout.flush() if key in vars: vDims = rootgrp.variables[key].dimensions toExtract = [read_dims[d] for d in vDims] # If we have no dimensions, we must have only a single value, in # which case set the dimensions to empty and append the function to # extract the value. if not toExtract: toExtract = '.getValue()' # Thought I'd finally figured out how to replace the eval approach, # but I still can't get past the indexing needed to be able to # subset the data. # FVCOM[key] = rootgrp.variables.get(key)[0:-1] # I know, I know, eval() is evil. getData = 'rootgrp.variables[\'{}\']{}'.format(key, str(toExtract).replace('\'', '')) FVCOM[key] = eval(getData) # Add the units and dimensions for this variable to the list of # attributes. if atts: attributes[key] = {} try: attributes[key]['units'] = rootgrp.variables[key].units except: pass try: attributes[key]['dims'] = rootgrp.variables[key].dimensions except: pass if datetimes and key in ('Times', 'time') and not done_datetimes: # Convert the time data to datetime objects. How we do this # depends on which we hit first - `Times' or `time'. For the # former, we need to parse the strings, for the latter we can # leverage num2date from the netCDF4 module and use the time # units attribute. if key == 'Times': try: FVCOM['datetime'] = [datetime.strptime(''.join(i), '%Y-%m-%dT%H:%M:%S.%f') for i in FVCOM[key]] except ValueError: # Try a different format before bailing out. FVCOM['datetime'] = [datetime.strptime(''.join(i), '%Y/%m/%d %H:%M:%S.%f') for i in FVCOM[key]] done_datetimes = True elif key == 'time': FVCOM['datetime'] = num2date(FVCOM[key], rootgrp.variables[key].units) done_datetimes = True if noisy: if len(str(toExtract)) < 60: print('(extracted {})'.format(str(toExtract).replace('\'', ''))) else: print('(extracted given indices)') elif noisy: print() # Close the open file. rootgrp.close() if atts: return FVCOM, attributes else: return FVCOM
def readFVCOM(file, varList=None, clipDims=False, noisy=False, atts=False): """ Read in the FVCOM results file and spit out numpy arrays for each of the variables specified in the varList list. Optionally specify a dict with keys whose names match the dimension names in the NetCDF file and whose values are strings specifying alternative ranges or lists of indices. For example, to extract the first hundred time steps, supply clipDims as: clipDims = {'time':'0:100'} To extract the first, 400th and 10,000th values of any array with nodes: clipDims = {'node':'[0, 3999, 9999]'} Any dimension not given in clipDims will be extracted in full. Specify atts=True to extract the variable attributes. Parameters ---------- file : str, list If a string, the full path to an FVCOM NetCDF output file. If a list, a series of files to be loaded. Data will be concatenated into a single dict. varList : list, optional List of variable names to be extracted. If omitted, all variables are returned. clipDims : dict, optional Dict whose keys are dimensions and whose values are a string of either a range (e.g. {'time':'0:100'}) or a list of individual indices (e.g. {'time':'[0, 1, 80, 100]'}). Slicing is supported (::5 for every fifth value) but it is not possible to extract data from the end of the array with a negative index (e.g. 0:-4). noisy : bool, optional Set to True to enable verbose output. atts : bool, optional Set to True to enable output of the attributes (defaults to False). Returns ------- FVCOM : dict Dict of data extracted from the NetCDF file. Keys are those given in varList and the data are stored as ndarrays. attributes : dict, optional If atts=True, returns the attributes as a dict for each variable in varList. The key 'dims' contains the array dimensions (each variable contains the names of its dimensions) as well as the shape of the dimensions defined in the NetCDF file. The key 'global' contains the global attributes. See Also -------- readProbes : read in FVCOM ASCII probes output files. """ # If we have a list, assume it's lots of files and load them all. if isinstance(file, list): try: try: rootgrp = MFDataset(file, 'r') except IOError as msg: raise IOError('Unable to open file {} ({}). Aborting.'.format(file, msg)) except: # Try aggregating along a 'time' dimension (for POLCOMS, for example) try: rootgrp = MFDataset(file, 'r', aggdim='time') except IOError as msg: raise IOError('Unable to open file {} ({}). Aborting.'.format(file, msg)) else: rootgrp = Dataset(file, 'r') # Create a dict of the dimension names and their current sizes dims = {} for key, var in list(rootgrp.dimensions.items()): # Make the dimensions ranges so we can use them to extract all the # values. dims[key] = '0:' + str(len(var)) # Compare the dimensions in the NetCDF file with those provided. If we've # been given a dict of dimensions which differs from those in the NetCDF # file, then use those. if clipDims: commonKeys = set(dims).intersection(list(clipDims.keys())) for k in commonKeys: dims[k] = clipDims[k] if noisy: print("File format: {}".format(rootgrp.file_format)) if not varList: varList = iter(list(rootgrp.variables.keys())) FVCOM = {} # Save the dimensions in the attributes dict. if atts: attributes = {} attributes['dims'] = dims attributes['global'] = {} for g in rootgrp.ncattrs(): attributes['global'][g] = getattr(rootgrp, g) for key, var in list(rootgrp.variables.items()): if noisy: print('Found ' + key, end=' ') sys.stdout.flush() if key in varList: vDims = rootgrp.variables[key].dimensions toExtract = [dims[d] for d in vDims] # If we have no dimensions, we must have only a single value, in # which case set the dimensions to empty and append the function to # extract the value. if not toExtract: toExtract = '.getValue()' # Thought I'd finally figured out how to replace the eval approach, # but I still can't get past the indexing needed to be able to # subset the data. # FVCOM[key] = rootgrp.variables.get(key)[0:-1] # I know, I know, eval() is evil. getData = 'rootgrp.variables[\'{}\']{}'.format(key, str(toExtract).replace('\'', '')) FVCOM[key] = eval(getData) # Add the units and dimensions for this variable to the list of # attributes. if atts: attributes[key] = {} try: attributes[key]['units'] = rootgrp.variables[key].units except: pass try: attributes[key]['dims'] = rootgrp.variables[key].dimensions except: pass if noisy: if len(str(toExtract)) < 60: print('(extracted {})'.format(str(toExtract).replace('\'', ''))) else: print('(extracted given indices)') elif noisy: print() # Close the open file. rootgrp.close() if atts: return FVCOM, attributes else: return FVCOM