def scan(path): """Scan a Cdunif file, return a tree of NCML objects, in the form of a NetcdfNode.""" from cdms2 import Cdunif f = Cdunif.CdunifFile(path) nc = NetcdfNode(uri=path) for name, length in f.dimensions.items(): if length is None: isUnlimited = "true" length = len(f.readDimension(name)) else: isUnlimited = "false" nc.setDimension(DimensionNode(name, length, isUnlimited=isUnlimited)) for name, var in f.variables.items(): v = VariableNode(name, NumericToNCType[var.typecode()], var.dimensions) if len(var.dimensions) > 0 and name == var.dimensions[ 0]: # Write coordinate variable values only v.setValues(ValueNode(var.getValue())) nc.setVariable(v) _copyObjectAttributes(var, v) _copyObjectAttributes(f, nc) f.close() return nc
def scan(path): """Scan a Cdunif file, return a tree of NCML objects, in the form of a NetcdfNode.""" from cdms2 import Cdunif from ncml import _copyObjectAttributes, NumericToNCType f = Cdunif.CdunifFile(path) nc = NetcdfNode(uri=path) for name, length in f.dimensions.items(): if length is None: isUnlimited = "true" length = len(f.readDimension(name)) else: isUnlimited = "false" nc.setDimension( ncml.DimensionNode(name, length, isUnlimited=isUnlimited)) for name, var in f.variables.items(): datatype = NumericToNCType[var.typecode()] if len(var.dimensions) == 1 and name == var.dimensions[0]: axisType = positive = boundaryRef = None if hasattr(var, 'axis'): try: axisType = cdToCsAxisType[string.lower(var.axis)] except: pass if hasattr(var, 'positive'): positive = var.positive if hasattr(var, 'bounds'): boundaryRef = var.bounds units = None if hasattr(var, 'units'): units = var.units v = CoordinateAxisNode(name, datatype, units, var.dimensions, axisType=axisType, positive=positive, boundaryRef=boundaryRef) v.setValues(ncml.ValueNode(var.getValue())) nc.setCoordinateAxis(v) else: v = VariableNode(name, datatype, var.dimensions) nc.setVariable(v) _copyObjectAttributes(var, v) _copyObjectAttributes(f, nc) f.close() return nc
def getContext(self, **context): if not self.offline: # f = NetCDF.NetCDFFile(self.path) f = Cdunif.CdunifFile(self.path) fileContext = self.file2keys(f, self.path) f.close() for key in ['experiment', 'submodel', 'run_name']: if not context.has_key(key): context[key] = fileContext[key] if not context.has_key('product'): context['product'] = fileContext['frequency'] if not context.has_key('project'): context['project'] = self.name if self.validate: self.validateContext(**context) self.context = context return context
def proc_path(validator, path): cf = Cdunif.CdunifFile(path, 'r') table = '' try: table = getattr(cf, 'table_id') except: raise ESGPublishError("File %s missing required table_id global attribute"%f) variable_id ='' try: variable_id = getattr(cf, 'variable_id') except: raise ESGPublishError("File %s missing required variable_id global attribute"%f) table_file = sys.argv[1] + '/CMIP6_' + table + '.json' print path, variable_id, table_file fakeargs = [ '--variable', variable_id, table_file , path] parser = argparse.ArgumentParser(prog='pptest_harness') parser.add_argument('--variable') parser.add_argument('cmip6_table', action=validator.JSONAction) parser.add_argument('infile', action=validator.CDMSAction) parser.add_argument('outfile', nargs='?', help='Output file (default stdout)', type=argparse.FileType('w'), default=sys.stdout) args = parser.parse_args(fakeargs) # print "About to CV check:", f process = validator.checkCMIP6(args) args.infile = cf try: process.ControlVocab(args) except KeyboardInterrupt as e: print "Error with ",path, str(e) args.infile.close()
def __init__(self, projectName, path, Session, validate=True, offline=False): ProjectHandler.__init__(self, projectName, path, Session, validate=validate, offline=offline) if not offline: try: # f = NetCDF.NetCDFFile(path) f = Cdunif.CdunifFile(path) except: raise ESGPublishError( 'Error opening %s. Is the data offline?' % path) if not self.validateProject(f): raise ESGInvalidMetadataFormat("Not a %s datafile" % projectName) f.close() self.fieldNames = { 'project': (ENUM, True, 0), 'experiment': (ENUM, True, 1), 'model': (ENUM, True, 2), 'product': (ENUM, True, 3), 'submodel': (ENUM, False, 4), 'run_name': (STRING, True, 5), } self.context = {} self.validValues = {} if validate: self.initValidValues(Session)
def open(path, mode='r'): """ Open a file. Returns an instance of the format handler. path String path name. mode String mode. Since only mode='r' (read-only) is currently used, it is optional. """ f = None # pdb.set_trace() if (path[-3:] == ".nc"): cf = Cdunif.CdunifFile(path) f = MultipleFormatHandler(cf, path) else: f = MultipleFormatHandler(None, path) return f
def ControlVocab(self, ncfile, variable=None, print_all=True): """ Check CMIP6 global attributes against Control Vocabulary file. 1. Validate required attribute if presents and some values. 2. Validate registered institution and institution_id 3. Validate registered source and source_id 4. Validate experiment, experiment_id and all attributes associated with this experiment. Make sure that all attributes associate with the experiment_id found in CMIP6_CV.json are set to the appropriate values. 5. Validate grid_label and grid_resolution 6. Validate creation time in ISO format (YYYY-MM-DDTHH:MM:SS) 7. Validate furtherinfourl from CV internal template 8. Validate variable attributes with CMOR JSON table. 9. Validate parent_* attribute 10. Validate sub_experiment_* attributes. 11. Validate that all *_index are integers. """ filename = os.path.basename(ncfile) # ------------------------------------------------------------------- # Initialize arrays # ------------------------------------------------------------------- # If table_path is the table directory # Deduce corresponding JSON from filename if os.path.isdir(self.cmip6_table_path): cmip6_table = '{}/CMIP6_{}.json'.format( self.cmip6_table_path, self._get_table_from_filename(filename)) else: cmip6_table = self.cmip6_table_path table_id = os.path.basename( os.path.splitext(cmip6_table)[0]).split('_')[1] # Check and get JSON table cmor_table = self._check_json_table(cmip6_table) # ------------------------------------------------------------------- # Load CMIP6 table into memory # ------------------------------------------------------------------- table = cmip6_cv.load_table(cmip6_table) # ------------------------------------------------------------------- # Deduce variable # ------------------------------------------------------------------- # If variable can be deduced from the filename (Default) # If not variable submitted on command line with --variable is considered variable_id = self._get_variable_from_filename(filename) if not variable: variable = variable_id # ------------------------------------------------------------------- # Distinguish similar CMOR entries with the same out_name if exist # ------------------------------------------------------------------- # Apply test on variable only if a particular treatment if required prepare_path = os.path.dirname(os.path.realpath(__file__)) out_names_tests = json.loads( open(os.path.join(prepare_path, 'out_names_tests.json')).read()) # ------------------------------------------------------------------- # Open file in processing # The file needs to be open before the calling the test. # ------------------------------------------------------------------- infile = Cdunif.CdunifFile(ncfile, "r") key = '{}_{}'.format(table_id, variable_id) variable_cmor_entry = None if key in list(out_names_tests.keys()): for test, cmor_entry in list(out_names_tests[key].items()): if getattr(self, test)(**{ 'infile': infile, 'variable': variable, 'filename': filename }): # If test successfull, the CMOR entry to consider is given by the test variable_cmor_entry = cmor_entry else: # If not, CMOR entry to consider is the variable from filename or from input command-line variable_cmor_entry = variable else: # By default, CMOR entry to consider is the variable from filename or from input command-line variable_cmor_entry = variable # ------------------------------------------------------------------- # Get variable out name in netCDF record # ------------------------------------------------------------------- # Variable record name should follow CMOR table out names if variable_cmor_entry not in list( cmor_table['variable_entry'].keys()): print(BCOLORS.FAIL) print( "=====================================================================================" ) print("The entry " + variable_cmor_entry + " could not be found in CMOR table") print( "=====================================================================================" ) print(BCOLORS.ENDC) raise KeyboardInterrupt variable_record_name = cmor_table['variable_entry'][ variable_cmor_entry]['out_name'] # Variable id attribute should be the same as variable record name # in any case to be CF- and CMIP6-compliant variable_id = variable_record_name # ------------------------------------------------------------------- # Create a dictionary of all global attributes # ------------------------------------------------------------------- self.dictGbl = infile.__dict__ for key, value in list(self.dictGbl.items()): cmip6_cv.set_cur_dataset_attribute(key, value) # Set member_id attribute depending on sub_experiment_id and variant_label member_id = "" if "sub_experiment_id" in list(self.dictGbl.keys()): if self.dictGbl["sub_experiment_id"] not in ['none']: member_id = '{}-{}'.format(self.dictGbl['sub_experiment_id'], self.dictGbl['variant_label']) else: member_id = self.dictGbl['variant_label'] cmip6_cv.set_cur_dataset_attribute(cmip6_cv.GLOBAL_ATT_MEMBER_ID, member_id) # ------------------------------------------------------------------- # Create a dictionary of attributes for the variable # ------------------------------------------------------------------- try: self.dictVar = infile.variables[variable_record_name].__dict__ except BaseException: print(BCOLORS.FAIL) print( "=====================================================================================" ) print("The variable " + variable_record_name + " could not be found in file") print( "=====================================================================================" ) print(BCOLORS.ENDC) raise KeyboardInterrupt # ------------------------------------------------------------------- # Check global attributes # ------------------------------------------------------------------- self.errors += cmip6_cv.check_requiredattributes(table) self.errors += cmip6_cv.check_institution(table) self.errors += cmip6_cv.check_sourceID(table) self.errors += cmip6_cv.check_experiment(table) self.errors += cmip6_cv.check_grids(table) self.errors += cmip6_cv.check_ISOTime() self.errors += cmip6_cv.check_furtherinfourl(table) self.errors += cmip6_cv.check_subExpID(table) for attr in ['branch_time_in_child', 'branch_time_in_parent']: if attr in list(self.dictGbl.keys()): self.set_double_value(attr) if not isinstance(self.dictGbl[attr], numpy.float64): print(BCOLORS.FAIL) print( "=====================================================================================" ) print("{} is not a double: ".format(attr), type(self.dictGbl[attr])) print( "=====================================================================================" ) print(BCOLORS.ENDC) self.errors += 1 for attr in [ 'realization_index', 'initialization_index', 'physics_index', 'forcing_index' ]: if not isinstance(self.dictGbl[attr], numpy.ndarray): print(BCOLORS.FAIL) print( "=====================================================================================" ) print("{} is not an integer: ".format(attr), type(self.dictGbl[attr])) print( "=====================================================================================" ) print(BCOLORS.ENDC) self.errors += 1 self.errors += cmip6_cv.check_parentExpID(table) for attr in ['table_id', 'variable_id']: try: if locals()[attr] != self.dictGbl[attr]: print(BCOLORS.FAIL) print( "=====================================================================================" ) print("{} attribute is not consistent: ".format(attr), self.dictGbl[attr]) print( "=====================================================================================" ) print(BCOLORS.ENDC) self.errors += 1 except KeyError: print(BCOLORS.FAIL) print( "=====================================================================================" ) print("{} attribute is missing in global attributes".format( attr)) print( "=====================================================================================" ) print(BCOLORS.ENDC) self.errors += 1 # ------------------------------------------------------------------- # Get time axis properties # ------------------------------------------------------------------- # Get calendar and time units try: calendar = infile.variables['time'].calendar timeunits = infile.variables['time'].units except BaseException: calendar = "gregorian" timeunits = "days since ?" # Get first and last time bounds climatology = self.is_climatology(filename) if climatology: if cmip6_table.find('Amon') != -1: variable = '{}Clim'.format(variable) clim_idx = variable.find('Clim') if climatology and clim_idx != -1: var = [variable[:clim_idx]] try: if 'bounds' in list(infile.variables['time'].__dict__.keys()): bndsvar = infile.variables['time'].__dict__['bounds'] elif 'climatology' in list( infile.variables['time'].__dict__.keys()): bndsvar = infile.variables['time'].__dict__['climatology'] else: bndsvar = 'time_bnds' startimebnds = infile.variables[bndsvar][0][0] endtimebnds = infile.variables[bndsvar][-1][1] except BaseException: startimebnds = 0 endtimebnds = 0 try: startime = infile.variables['time'][0] endtime = infile.variables['time'][-1] except BaseException: startime = 0 endtime = 0 # ------------------------------------------------------------------- # Setup variable # ------------------------------------------------------------------- varid = cmip6_cv.setup_variable(variable_cmor_entry, self.dictVar['units'], self.dictVar['_FillValue'][0], startime, endtime, startimebnds, endtimebnds) if varid == -1: print(BCOLORS.FAIL) print( "=====================================================================================" ) print("Could not find variable {} in table {} ".format( variable_cmor_entry, cmip6_table)) print( "=====================================================================================" ) print(BCOLORS.ENDC) raise KeyboardInterrupt # ------------------------------------------------------------------- # Check filename # ------------------------------------------------------------------- self.errors += cmip6_cv.check_filename(table, varid, calendar, timeunits, filename) # ------------------------------------------------------------------- # Check variable attributes # ------------------------------------------------------------------- cv_attrs = cmip6_cv.list_variable_attributes(varid) for key in cv_attrs: if key == "long_name": continue if key == "comment": continue if key == "cell_measures": if cv_attrs[key].find("OPT") != -1 or cv_attrs[key].find( "MODEL") != -1: continue # Is this attribute in file? if key in list(self.dictVar.keys()): # Verify that attribute value is equal to file attribute table_value = cv_attrs[key] file_value = self.dictVar[key] # PrePARE accept units of 1 or 1.0 so adjust the table_value if key == "units": if (table_value == "1") and (file_value == "1.0"): table_value = "1.0" if (table_value == "1.0") and (file_value == "1"): table_value = "1" if isinstance(table_value, str) and isinstance( file_value, numpy.ndarray): if numpy.array( [int(value) for value in table_value.split() ] == file_value).all(): file_value = True table_value = True if isinstance(table_value, numpy.ndarray): table_value = table_value[0] if isinstance(file_value, numpy.ndarray): file_value = file_value[0] if isinstance(table_value, float): if abs(table_value - file_value) <= 0.00001 * abs(table_value): table_value = file_value if key == "cell_methods": idx = file_value.find(" (") if idx != -1: file_value = file_value[:idx] table_value = table_value[:idx] if key == "cell_measures": pattern = re.compile( '(?P<param>[\w.-]+): (?P<val1>[\w.-]+) OR (?P<val2>[\w.-]+)' ) values = re.findall(pattern, table_value) table_values = [ "" ] # Empty string is allowed in case of useless attribute if values: tmp = dict() for param, val1, val2 in values: tmp[param] = [ str('{}: {}'.format(param, val1)), str('{}: {}'.format(param, val2)) ] table_values.extend([ ' '.join(i) for i in list( itertools.product(*list(tmp.values()))) ]) if str(file_value) not in list(map(str, table_values)): print(BCOLORS.FAIL) print( "=====================================================================================" ) print("Your file contains \"" + key + "\":\"" + str(file_value) + "\" and") print("CMIP6 tables requires \"" + key + "\":\"" + str(table_value) + "\".") print( "=====================================================================================" ) print(BCOLORS.ENDC) self.errors += 1 continue if str(table_value) != str(file_value): print(BCOLORS.FAIL) print( "=====================================================================================" ) print("Your file contains \"" + key + "\":\"" + str(file_value) + "\" and") print("CMIP6 tables requires \"" + key + "\":\"" + str(table_value) + "\".") print( "=====================================================================================" ) print(BCOLORS.ENDC) self.errors += 1 else: # That attribute is not in the file table_value = cv_attrs[key] if isinstance(table_value, numpy.ndarray): table_value = table_value[0] if isinstance(table_value, float): table_value = "{0:.2g}".format(table_value) print(BCOLORS.FAIL) print( "=====================================================================================" ) print("CMIP6 variable " + variable + " requires \"" + key + "\":\"" + str(table_value) + "\".") print( "=====================================================================================" ) print(BCOLORS.ENDC) self.errors += 1 # Print final message if self.errors != 0: print(BCOLORS.FAIL + "└──> :: CV FAIL :: {}".format(ncfile) + BCOLORS.ENDC) raise KeyboardInterrupt elif print_all: print(BCOLORS.OKGREEN + " :: CV SUCCESS :: {}".format(ncfile) + BCOLORS.ENDC)
def open(path, mode='r'): cf = Cdunif.CdunifFile(path) f = CdunifFormatHandler(cf, path) return f
def _oisst_daily(daily_sst_dir, daily_sst_filename, targ_grid_res): #--------------------------------------------------------------------- """ See file header. """ print('targ_grid_res: ', targ_grid_res) time_span_tag = daily_sst_filename.split('.')[2] time_units = 'days since ' + time_span_tag.split('-')[0] + '-1-1 00:00:00' year_start = int(time_span_tag.split('-')[0]) daily_ice_filename = daily_sst_filename.replace('sst', 'icec') # Create target grid. # For a ONEXONE target grid resolution with arguments: # (-90., 181, 1., 0., 360, 1.) # A grid will be created with: # latitude starting at -90 & going north 181 points, # with an increment of 1 degree; # longitude starting at 0E & going east 360 points, # with an increment of 1 degree. # The out_filename will reflect the designated resolution. #--------------------------------------------------------- args = TARG_GRID_RES_DICT[targ_grid_res]['args'] targ_grid = cdms2.createUniformGrid(args[0], args[1], args[2], args[3], args[4], args[5]) label = TARG_GRID_RES_DICT[targ_grid_res]['label'] out_filename = 'sst_daily_cdcunits' + label + time_span_tag + '.nc' fdaily_sst = cdms2.open(daily_sst_dir + '/' + daily_sst_filename) fdaily_ice = cdms2.open(daily_sst_dir + '/' + daily_ice_filename) input_grid = fdaily_sst.variables['sst'].getGrid() rg_in2targ = Regridder(input_grid, targ_grid) # Create file and variables for output. #-------------------------------------- #fout = NetCDF.NetCDFFile(out_filename, 'w') fout = Cdunif.CdunifFile(out_filename, 'w') lons = targ_grid.getLongitude()[:] lats = targ_grid.getLatitude()[:] fout.createDimension('lon', len(lons)) fout.createDimension('lat', len(lats)) fout.createDimension('time', None) sst_cpl = fout.createVariable('sst', 'f', ('time', 'lat', 'lon')) sst_cpl.long_name = 'sea surface temperature' sst_cpl.units = 'degrees_C' ifrac = fout.createVariable('ifrac', 'f', ('time', 'lat', 'lon')) ifrac.long_name = 'ice fraction' ifrac.units = 'fraction' lat = fout.createVariable('lat', 'd', ('lat', )) lat.long_name = 'latitude of grid cell center' lat.units = 'degrees_north' lon = fout.createVariable('lon', 'd', ('lon', )) lon.long_name = 'longitude of grid cell center' lon.units = 'degrees_east' time = fout.createVariable('time', 'd', ('time', )) time.long_name = 'time' time.units = time_units time.calendar = 'noleap' date = fout.createVariable('date', 'i', ('time', )) date.long_name = 'calendar date (YYYYMMDD)' datesec = fout.createVariable('datesec', 'i', ('time', )) datesec.long_name = 'seconds elapsed on calendar date' datesec.units = 'seconds' # Coordinate data. #----------------- lat[:] = lats lat.long_name = 'latitude' lat.units = 'degrees_north' lon[:] = lons lon.long_name = 'longitude' lon.units = 'degrees_east' sst_w = fdaily_sst.variables['sst'] ntimes = sst_w.shape[0] intime = sst_w.getTime() intime_units = intime.units intimes = intime[:] # Time loop. #----------- time_idx_out = -1 for time_idx in range(ntimes - 1): # Data is centered on time in file. #---------------------------------- mid_intime = intimes[time_idx] rtime = cdtime.reltime(mid_intime, intime_units) ctime = rtime.tocomp() new_reltime = ctime.torel(time_units, cdtime.NoLeapCalendar) new_ctime = new_reltime.tocomp() year = ctime.year if year < year_start: #======= continue #======= month = ctime.month day = ctime.day hour = ctime.hour minute = ctime.minute second = ctime.second # Change time units. #------------------- print ('time_idx_out, ctime, new_ctime: ', \ time_idx_out, ctime, new_ctime) time[time_idx_out] = new_reltime.value print('time[time_idx_out]: ', time[time_idx_out]) date[time_idx_out] = (year * 10000) + (month * 100) + day datesec[time_idx_out] = (hour * 60 * 60) + (minute * 60) + second data = fdaily_sst('sst', time=slice(time_idx, (time_idx + 1)), raw=1, squeeze=1) data_f = fill_msg(data, nscan=200) data_f = n.array(data_f, n.float32) print ('data_f min,max,mean: ', \ data_f.min(), data_f.max(), data_f.mean() ) data_f = rg_in2targ(data_f).filled() out_sst = data_f print ('out_sst min,max,mean: ', \ out_sst.min(), out_sst.max(), out_sst.mean() ) data = fdaily_ice('icec', time=slice(time_idx, (time_idx + 1)), raw=1, squeeze=1) data_f = data * 1.0 print ('data_f min,max,mean: ', \ data_f.min(), data_f.max(), data_f.mean() ) # Set ice to zero where missing - over land. #------------------------------------------- data_f = rg_in2targ(data_f).filled(0.0) out_ice = data_f print ('out_ice min,max,mean: ', \ out_ice.min(), out_ice.max(), out_ice.mean() ) sst_cpl[time_idx_out, :, :] = out_sst ifrac[time_idx_out, :, :] = out_ice time_idx_out = time_idx_out + 1 fout.sync() fout.close() return