def ControlVocab(self): ''' Check CMIP6 global attributes against Control Vocabulary file. 1. Validate required attribute if presents and some values. 2. Validate registered institution and institution_id 3. Validate registered source and source_id 4. Validate experiment, experiment_id and attribute associated with the experiment. 5. Validate grid_label and grid_resolution 6. Validate creation time in ISO format (YYYY-MM-DDTHH:MM:SS) 7. Validate furtherinfourl from CV internal template 8. Validate variable attributes with CMOR JSON table. ''' cmip6_cv.check_requiredattributes(self.table_id) cmip6_cv.check_institution(self.table_id) cmip6_cv.check_sourceID(self.table_id) cmip6_cv.check_experiment(self.table_id) cmip6_cv.check_grids(self.table_id) cmip6_cv.check_ISOTime() cmip6_cv.check_furtherinfourl(self.table_id) varid = cmip6_cv.setup_variable(self.var[0], 'm', 1e20) for key in self.dictVars.keys(): # Is this attritue in the input table? if(cmip6_cv.has_variable_attribute(varid, key)): # Verify that attribute value is equal to file attribute table_value = cmip6_cv.get_variable_attribute(varid, key) file_value = self.dictVars[key] if isinstance(table_value, numpy.ndarray): table_value = table_value[0] if isinstance(file_value, numpy.ndarray): file_value = file_value[0] if isinstance(table_value, float): if(table_value / file_value < 1.1): table_value = file_value file_value = str(file_value) table_value = str(table_value) if table_value != file_value: print "You variable attribute differ from table attribute!" print "You file contains " + key + ":" + file_value + " and" print "CMIP6 tables requires " + key + ":" + table_value + "." if(cmip6_cv.get_CV_Error()): raise KeyboardInterrupt print bcolors.OKGREEN print "*************************************************************************************" print "* This file is compliant with the CMIP6 specification and can be published in ESGF. *" print "*************************************************************************************" print bcolors.ENDC
def ControlVocab(self): ''' Check CMIP6 global attributes against Control Vocabulary file. 1. Validate required attribute if presents and some values. 2. Validate registered institution and institution_id 3. Validate registered source and source_id 4. Validate experiment, experiment_id and attribute associated with the experiment. 5. Validate grid_label and grid_resolution 6. Validate creation time in ISO format (YYYY-MM-DDTHH:MM:SS) 7. Validate furtherinfourl from CV internal template ''' cmip6_cv.check_requiredattributes(self.table_id) cmip6_cv.check_institution(self.table_id) cmip6_cv.check_sourceID(self.table_id) cmip6_cv.check_experiment(self.table_id) cmip6_cv.check_grids(self.table_id) cmip6_cv.check_ISOTime() cmip6_cv.check_furtherinfourl(self.table_id) varid = cmip6_cv.setup_variable(self.var[0], 'm', 1e20) for key in self.dictVars.keys(): # Is this attritue in the input table? if (cmip6_cv.has_variable_attribute(varid, key)): # Verify that attribute value is equal to file attribute table_value = cmip6_cv.get_variable_attribute(varid, key) file_value = self.dictVars[key] if isinstance(table_value, numpy.ndarray): table_value = table_value[0] if isinstance(file_value, numpy.ndarray): file_value = file_value[0] if isinstance(table_value, float): if (table_value / file_value < 1.1): table_value = file_value file_value = str(file_value) table_value = str(table_value) if table_value != file_value: print "You variable attribute differ from table attribute!" print "You file contains " + key + ":" + file_value + " and" print "CMIP6 tables requires " + key + ":" + table_value + "." if (cmip6_cv.get_CV_Error()): raise KeyboardInterrupt print bcolors.OKGREEN print "*************************************************************************************" print "* This file is compliant with the CMIP6 specification and can be published in ESGF. *" print "*************************************************************************************" print bcolors.ENDC
def ControlVocab(self, ncfile, variable=None, print_all=True): """ Check CMIP6 global attributes against Control Vocabulary file. 1. Validate required attribute if presents and some values. 2. Validate registered institution and institution_id 3. Validate registered source and source_id 4. Validate experiment, experiment_id and all attributes associated with this experiment. Make sure that all attributes associate with the experiment_id found in CMIP6_CV.json are set to the appropriate values. 5. Validate grid_label and grid_resolution 6. Validate creation time in ISO format (YYYY-MM-DDTHH:MM:SS) 7. Validate furtherinfourl from CV internal template 8. Validate variable attributes with CMOR JSON table. 9. Validate parent_* attribute 10. Validate sub_experiment_* attributes. 11. Validate that all *_index are integers. """ filename = os.path.basename(ncfile) # ------------------------------------------------------------------- # Initialize arrays # ------------------------------------------------------------------- # If table_path is the table directory # Deduce corresponding JSON from filename if os.path.isdir(self.cmip6_table_path): cmip6_table = '{}/CMIP6_{}.json'.format( self.cmip6_table_path, self._get_table_from_filename(filename)) else: cmip6_table = self.cmip6_table_path table_id = os.path.basename( os.path.splitext(cmip6_table)[0]).split('_')[1] # Check and get JSON table cmor_table = self._check_json_table(cmip6_table) # ------------------------------------------------------------------- # Load CMIP6 table into memory # ------------------------------------------------------------------- table = cmip6_cv.load_table(cmip6_table) # ------------------------------------------------------------------- # Deduce variable # ------------------------------------------------------------------- # If variable can be deduced from the filename (Default) # If not variable submitted on command line with --variable is considered variable_id = self._get_variable_from_filename(filename) if not variable: variable = variable_id # ------------------------------------------------------------------- # Distinguish similar CMOR entries with the same out_name if exist # ------------------------------------------------------------------- # Apply test on variable only if a particular treatment if required prepare_path = os.path.dirname(os.path.realpath(__file__)) out_names_tests = json.loads( open(os.path.join(prepare_path, 'out_names_tests.json')).read()) # ------------------------------------------------------------------- # Open file in processing # The file needs to be open before the calling the test. # ------------------------------------------------------------------- infile = Cdunif.CdunifFile(ncfile, "r") key = '{}_{}'.format(table_id, variable_id) variable_cmor_entry = None if key in list(out_names_tests.keys()): for test, cmor_entry in list(out_names_tests[key].items()): if getattr(self, test)(**{ 'infile': infile, 'variable': variable, 'filename': filename }): # If test successfull, the CMOR entry to consider is given by the test variable_cmor_entry = cmor_entry else: # If not, CMOR entry to consider is the variable from filename or from input command-line variable_cmor_entry = variable else: # By default, CMOR entry to consider is the variable from filename or from input command-line variable_cmor_entry = variable # ------------------------------------------------------------------- # Get variable out name in netCDF record # ------------------------------------------------------------------- # Variable record name should follow CMOR table out names if variable_cmor_entry not in list( cmor_table['variable_entry'].keys()): print(BCOLORS.FAIL) print( "=====================================================================================" ) print("The entry " + variable_cmor_entry + " could not be found in CMOR table") print( "=====================================================================================" ) print(BCOLORS.ENDC) raise KeyboardInterrupt variable_record_name = cmor_table['variable_entry'][ variable_cmor_entry]['out_name'] # Variable id attribute should be the same as variable record name # in any case to be CF- and CMIP6-compliant variable_id = variable_record_name # ------------------------------------------------------------------- # Create a dictionary of all global attributes # ------------------------------------------------------------------- self.dictGbl = infile.__dict__ for key, value in list(self.dictGbl.items()): cmip6_cv.set_cur_dataset_attribute(key, value) # Set member_id attribute depending on sub_experiment_id and variant_label member_id = "" if "sub_experiment_id" in list(self.dictGbl.keys()): if self.dictGbl["sub_experiment_id"] not in ['none']: member_id = '{}-{}'.format(self.dictGbl['sub_experiment_id'], self.dictGbl['variant_label']) else: member_id = self.dictGbl['variant_label'] cmip6_cv.set_cur_dataset_attribute(cmip6_cv.GLOBAL_ATT_MEMBER_ID, member_id) # ------------------------------------------------------------------- # Create a dictionary of attributes for the variable # ------------------------------------------------------------------- try: self.dictVar = infile.variables[variable_record_name].__dict__ except BaseException: print(BCOLORS.FAIL) print( "=====================================================================================" ) print("The variable " + variable_record_name + " could not be found in file") print( "=====================================================================================" ) print(BCOLORS.ENDC) raise KeyboardInterrupt # ------------------------------------------------------------------- # Check global attributes # ------------------------------------------------------------------- self.errors += cmip6_cv.check_requiredattributes(table) self.errors += cmip6_cv.check_institution(table) self.errors += cmip6_cv.check_sourceID(table) self.errors += cmip6_cv.check_experiment(table) self.errors += cmip6_cv.check_grids(table) self.errors += cmip6_cv.check_ISOTime() self.errors += cmip6_cv.check_furtherinfourl(table) self.errors += cmip6_cv.check_subExpID(table) for attr in ['branch_time_in_child', 'branch_time_in_parent']: if attr in list(self.dictGbl.keys()): self.set_double_value(attr) if not isinstance(self.dictGbl[attr], numpy.float64): print(BCOLORS.FAIL) print( "=====================================================================================" ) print("{} is not a double: ".format(attr), type(self.dictGbl[attr])) print( "=====================================================================================" ) print(BCOLORS.ENDC) self.errors += 1 for attr in [ 'realization_index', 'initialization_index', 'physics_index', 'forcing_index' ]: if not isinstance(self.dictGbl[attr], numpy.ndarray): print(BCOLORS.FAIL) print( "=====================================================================================" ) print("{} is not an integer: ".format(attr), type(self.dictGbl[attr])) print( "=====================================================================================" ) print(BCOLORS.ENDC) self.errors += 1 self.errors += cmip6_cv.check_parentExpID(table) for attr in ['table_id', 'variable_id']: try: if locals()[attr] != self.dictGbl[attr]: print(BCOLORS.FAIL) print( "=====================================================================================" ) print("{} attribute is not consistent: ".format(attr), self.dictGbl[attr]) print( "=====================================================================================" ) print(BCOLORS.ENDC) self.errors += 1 except KeyError: print(BCOLORS.FAIL) print( "=====================================================================================" ) print("{} attribute is missing in global attributes".format( attr)) print( "=====================================================================================" ) print(BCOLORS.ENDC) self.errors += 1 # ------------------------------------------------------------------- # Get time axis properties # ------------------------------------------------------------------- # Get calendar and time units try: calendar = infile.variables['time'].calendar timeunits = infile.variables['time'].units except BaseException: calendar = "gregorian" timeunits = "days since ?" # Get first and last time bounds climatology = self.is_climatology(filename) if climatology: if cmip6_table.find('Amon') != -1: variable = '{}Clim'.format(variable) clim_idx = variable.find('Clim') if climatology and clim_idx != -1: var = [variable[:clim_idx]] try: if 'bounds' in list(infile.variables['time'].__dict__.keys()): bndsvar = infile.variables['time'].__dict__['bounds'] elif 'climatology' in list( infile.variables['time'].__dict__.keys()): bndsvar = infile.variables['time'].__dict__['climatology'] else: bndsvar = 'time_bnds' startimebnds = infile.variables[bndsvar][0][0] endtimebnds = infile.variables[bndsvar][-1][1] except BaseException: startimebnds = 0 endtimebnds = 0 try: startime = infile.variables['time'][0] endtime = infile.variables['time'][-1] except BaseException: startime = 0 endtime = 0 # ------------------------------------------------------------------- # Setup variable # ------------------------------------------------------------------- varid = cmip6_cv.setup_variable(variable_cmor_entry, self.dictVar['units'], self.dictVar['_FillValue'][0], startime, endtime, startimebnds, endtimebnds) if varid == -1: print(BCOLORS.FAIL) print( "=====================================================================================" ) print("Could not find variable {} in table {} ".format( variable_cmor_entry, cmip6_table)) print( "=====================================================================================" ) print(BCOLORS.ENDC) raise KeyboardInterrupt # ------------------------------------------------------------------- # Check filename # ------------------------------------------------------------------- self.errors += cmip6_cv.check_filename(table, varid, calendar, timeunits, filename) # ------------------------------------------------------------------- # Check variable attributes # ------------------------------------------------------------------- cv_attrs = cmip6_cv.list_variable_attributes(varid) for key in cv_attrs: if key == "long_name": continue if key == "comment": continue if key == "cell_measures": if cv_attrs[key].find("OPT") != -1 or cv_attrs[key].find( "MODEL") != -1: continue # Is this attribute in file? if key in list(self.dictVar.keys()): # Verify that attribute value is equal to file attribute table_value = cv_attrs[key] file_value = self.dictVar[key] # PrePARE accept units of 1 or 1.0 so adjust the table_value if key == "units": if (table_value == "1") and (file_value == "1.0"): table_value = "1.0" if (table_value == "1.0") and (file_value == "1"): table_value = "1" if isinstance(table_value, str) and isinstance( file_value, numpy.ndarray): if numpy.array( [int(value) for value in table_value.split() ] == file_value).all(): file_value = True table_value = True if isinstance(table_value, numpy.ndarray): table_value = table_value[0] if isinstance(file_value, numpy.ndarray): file_value = file_value[0] if isinstance(table_value, float): if abs(table_value - file_value) <= 0.00001 * abs(table_value): table_value = file_value if key == "cell_methods": idx = file_value.find(" (") if idx != -1: file_value = file_value[:idx] table_value = table_value[:idx] if key == "cell_measures": pattern = re.compile( '(?P<param>[\w.-]+): (?P<val1>[\w.-]+) OR (?P<val2>[\w.-]+)' ) values = re.findall(pattern, table_value) table_values = [ "" ] # Empty string is allowed in case of useless attribute if values: tmp = dict() for param, val1, val2 in values: tmp[param] = [ str('{}: {}'.format(param, val1)), str('{}: {}'.format(param, val2)) ] table_values.extend([ ' '.join(i) for i in list( itertools.product(*list(tmp.values()))) ]) if str(file_value) not in list(map(str, table_values)): print(BCOLORS.FAIL) print( "=====================================================================================" ) print("Your file contains \"" + key + "\":\"" + str(file_value) + "\" and") print("CMIP6 tables requires \"" + key + "\":\"" + str(table_value) + "\".") print( "=====================================================================================" ) print(BCOLORS.ENDC) self.errors += 1 continue if str(table_value) != str(file_value): print(BCOLORS.FAIL) print( "=====================================================================================" ) print("Your file contains \"" + key + "\":\"" + str(file_value) + "\" and") print("CMIP6 tables requires \"" + key + "\":\"" + str(table_value) + "\".") print( "=====================================================================================" ) print(BCOLORS.ENDC) self.errors += 1 else: # That attribute is not in the file table_value = cv_attrs[key] if isinstance(table_value, numpy.ndarray): table_value = table_value[0] if isinstance(table_value, float): table_value = "{0:.2g}".format(table_value) print(BCOLORS.FAIL) print( "=====================================================================================" ) print("CMIP6 variable " + variable + " requires \"" + key + "\":\"" + str(table_value) + "\".") print( "=====================================================================================" ) print(BCOLORS.ENDC) self.errors += 1 # Print final message if self.errors != 0: print(BCOLORS.FAIL + "└──> :: CV FAIL :: {}".format(ncfile) + BCOLORS.ENDC) raise KeyboardInterrupt elif print_all: print(BCOLORS.OKGREEN + " :: CV SUCCESS :: {}".format(ncfile) + BCOLORS.ENDC)
def ControlVocab(self, args): ''' Check CMIP6 global attributes against Control Vocabulary file. 1. Validate required attribute if presents and some values. 2. Validate registered institution and institution_id 3. Validate registered source and source_id 4. Validate experiment, experiment_id and all attributes associated with this experiment. Make sure that all attributes associate with the experiment_id found in CMIP6_CV.json are set to the appropriate values. 5. Validate grid_label and grid_resolution 6. Validate creation time in ISO format (YYYY-MM-DDTHH:MM:SS) 7. Validate furtherinfourl from CV internal template 8. Validate variable attributes with CMOR JSON table. 9. Validate parent_* attribute 10. Validate sub_experiment_* atributes. 11. Validate that all *_index are integers. ''' self.variable = args.variable self.infile = args.infile # ------------------------------------- # Create alist of all Global Attributes # ------------------------------------- self.dictGbl = { key: self.infile.__dict__[key] for key in self.infile.__dict__.keys() } self.attributes = self.infile.__dict__.keys() self.variables = self.infile.variables.keys() ierr = [ cmip6_cv.set_cur_dataset_attribute(key, value) for key, value in self.dictGbl.iteritems() ] member_id = "" if ("sub_experiment_id" in self.dictGbl.keys()): if (self.dictGbl["sub_experiment_id"] not in ["none"]): member_id = self.dictGbl["sub_experiment_id"] + \ '-' + self.dictGbl["variant_label"] else: member_id = self.dictGbl["variant_label"] cmip6_cv.set_cur_dataset_attribute(cmip6_cv.GLOBAL_ATT_MEMBER_ID, member_id) self.setDoubleValue('branch_time_in_parent') self.setDoubleValue('branch_time_in_child') if self.variable is not None: self.var = [self.variable] else: # ------------------------------------------------------------------- # find variable that contains a "history" (should only be one) # ------------------------------------------------------------------- self.var = [self.infile.variable_id] if ((self.var == []) or (len(self.var) > 1)): print bcolors.FAIL print "!!!!!!!!!!!!!!!!!!!!!!!!!" print "! Error: The input file does not have an history attribute and the CMIP6 variable could not be found" print "! Please use the --variable option to specify your CMIP6 variable" print "! Check your file or use CMOR 3.x to achieve compliance for ESGF publication." print "!!!!!!!!!!!!!!!!!!!!!!!!!" print bcolors.ENDC raise KeyboardInterrupt try: self.keys = self.infile.variables[self.var[0]].__dict__.keys() except BaseException: print bcolors.FAIL print "!!!!!!!!!!!!!!!!!!!!!!!!!" print "! Error: The variable " + self.var[ 0] + " could not be found" print "! Check your file variables " print "!!!!!!!!!!!!!!!!!!!!!!!!!" print bcolors.ENDC raise # ------------------------------------------------------------------- # Create a dictionnary of attributes for var # ------------------------------------------------------------------- self.dictVars = dict( (y, x) for y, x in [(key, value) for key in self.keys if self.infile.variables[self.var[0]].__dict__[key] is not None for value in [self.infile.variables[self.var[0]].__dict__[key]]]) try: self.calendar = self.infile.variables['time'].calendar self.timeunits = self.infile.variables['time'].units except BaseException: self.calendar = "gregorian" self.timeunits = "days since ?" cmip6_cv.check_requiredattributes(self.table_id) cmip6_cv.check_institution(self.table_id) cmip6_cv.check_sourceID(self.table_id) cmip6_cv.check_experiment(self.table_id) cmip6_cv.check_grids(self.table_id) cmip6_cv.check_ISOTime() cmip6_cv.check_furtherinfourl(self.table_id) cmip6_cv.check_parentExpID(self.table_id) cmip6_cv.check_subExpID(self.table_id) try: startimebnds = self.infile.variables['time_bnds'][0][0] endtimebnds = self.infile.variables['time_bnds'][-1][1] except BaseException: startimebnds = 0 endtimebnds = 0 try: startime = self.infile.variables['time'][0] endtime = self.infile.variables['time'][-1] except BaseException: startime = 0 endtime = 0 varunits = self.infile.variables[self.var[0]].units varmissing = self.infile.variables[self.var[0]]._FillValue[0] varid = cmip6_cv.setup_variable(self.var[0], varunits, varmissing, startime, endtime, startimebnds, endtimebnds) if (varid == -1): print bcolors.FAIL print "=====================================================================================" print " Could not find variable '%s' in table '%s' " % ( self.var[0], self.cmip6_table) print "=====================================================================================" print bcolors.ENDC cmip6_cv.set_CV_Error() return # fn = os.path.basename(self.infile.id) fn = os.path.basename(str(self.infile).split('\'')[1]) cmip6_cv.check_filename(self.table_id, varid, self.calendar, self.timeunits, fn) if not isinstance(self.dictGbl['realization_index'], numpy.ndarray): print bcolors.FAIL print "=====================================================================================" print "realization_index is not an integer: ", type( self.dictGbl['realization_index']) print "=====================================================================================" print bcolors.ENDC cmip6_cv.set_CV_Error() if not isinstance(self.dictGbl['initialization_index'], numpy.ndarray): print bcolors.FAIL print "=====================================================================================" print "initialization_index is not an integer: ", type( self.dictGbl['initialization_index']) print "=====================================================================================" print bcolors.ENDC cmip6_cv.set_CV_Error() if not isinstance(self.dictGbl['physics_index'], numpy.ndarray): print bcolors.FAIL print "=====================================================================================" print "physics_index is not an integer: ", type( self.dictGbl['physics_index']) print "=====================================================================================" print bcolors.ENDC cmip6_cv.set_CV_Error() if not isinstance(self.dictGbl['forcing_index'], numpy.ndarray): print bcolors.FAIL print "=====================================================================================" print "forcing_index is not an integer: ", type( self.dictGbl['forcing_index']) print "=====================================================================================" print bcolors.ENDC cmip6_cv.set_CV_Error() prepLIST = cmip6_cv.list_variable_attributes(varid) for key in prepLIST: if (key == "long_name"): continue if (key == "comment"): continue # Is this attritue in file? if (key in self.dictVars.keys()): # Verify that attribute value is equal to file attribute table_value = prepLIST[key] file_value = self.dictVars[key] if isinstance(table_value, numpy.ndarray): table_value = table_value[0] if isinstance(file_value, numpy.ndarray): file_value = file_value[0] if isinstance(table_value, float): if (file_value == 0): if (table_value != file_value): file_value = False else: if (1 - (table_value / file_value) < 0.00001): table_value = file_value if key == "cell_methods": idx = file_value.find(" (interval:") file_value = file_value[:idx] table_value = table_value[:idx] file_value = str(file_value) table_value = str(table_value) if table_value != file_value: print bcolors.FAIL print "=====================================================================================" print "You file contains \"" + key + "\":\"" + str( file_value) + "\" and" print "CMIP6 tables requires \"" + key + "\":\"" + str( table_value) + "\"." print "=====================================================================================" print bcolors.ENDC cmip6_cv.set_CV_Error() else: # That attribute is not in the file table_value = prepLIST[key] if key == "cell_measures": if ((table_value.find("OPT") != -1) or (table_value.find("MODEL") != -1)): continue if isinstance(table_value, numpy.ndarray): table_value = table_value[0] if isinstance(table_value, float): table_value = "{0:.2g}".format(table_value) print bcolors.FAIL print "=====================================================================================" print "CMIP6 variable " + self.var[ 0] + " requires \"" + key + "\":\"" + str( table_value) + "\"." print "=====================================================================================" print bcolors.ENDC cmip6_cv.set_CV_Error() if (cmip6_cv.get_CV_Error()): raise KeyboardInterrupt pass print bcolors.OKGREEN print "*************************************************************************************" print "* This file is compliant with the CMIP6 specification and can be published in ESGF. *" print "*************************************************************************************" print bcolors.ENDC
def ControlVocab(self): ''' Check CMIP6 global attributes against Control Vocabulary file. 1. Validate required attribute if presents and some values. 2. Validate registered institution and institution_id 3. Validate registered source and source_id 4. Validate experiment, experiment_id and attribute associated with the experiment. 5. Validate grid_label and grid_resolution 6. Validate creation time in ISO format (YYYY-MM-DDTHH:MM:SS) 7. Validate furtherinfourl from CV internal template 8. Validate variable attributes with CMOR JSON table. ''' cmip6_cv.check_requiredattributes(self.table_id) cmip6_cv.check_institution(self.table_id) cmip6_cv.check_sourceID(self.table_id) cmip6_cv.check_experiment(self.table_id) cmip6_cv.check_grids(self.table_id) cmip6_cv.check_ISOTime() cmip6_cv.check_furtherinfourl(self.table_id) varid = cmip6_cv.setup_variable(self.var[0], 'm', 1e20) prepLIST = cmip6_cv.list_variable_attributes(varid) for key in prepLIST: if (key == "comment"): continue # Is this attritue in file? if (key in self.dictVars.keys()): # Verify that attribute value is equal to file attribute table_value = prepLIST[key] file_value = self.dictVars[key] if isinstance(table_value, numpy.ndarray): table_value = table_value[0] if isinstance(file_value, numpy.ndarray): file_value = file_value[0] if isinstance(table_value, float): if (table_value / file_value < 1.1): table_value = file_value if key == "cell_methods": idx = file_value.find(" (interval:") file_value = file_value[:idx] table_value = table_value[:idx] file_value = str(file_value) table_value = str(table_value) if table_value != file_value: print bcolors.FAIL print "=====================================================================================" print "You file contains \"" + key + "\":\"" + str( file_value) + "\" and" print "CMIP6 tables requires \"" + key + "\":\"" + str( table_value) + "\"." print "=====================================================================================" print bcolors.ENDC cmip6_cv.set_CV_Error() else: # That attribute is not in the file table_value = prepLIST[key] if isinstance(table_value, numpy.ndarray): table_value = table_value[0] if isinstance(table_value, float): table_value = "{0:.2g}".format(table_value) print bcolors.FAIL print "=====================================================================================" print "CMIP6 variable " + self.var[ 0] + " requires \"" + key + "\":\"" + str( table_value) + "\"." print "=====================================================================================" print bcolors.ENDC cmip6_cv.set_CV_Error() if (cmip6_cv.get_CV_Error()): raise KeyboardInterrupt print bcolors.OKGREEN print "*************************************************************************************" print "* This file is compliant with the CMIP6 specification and can be published in ESGF. *" print "*************************************************************************************" print bcolors.ENDC
def ControlVocab(self, ncfile, variable=None): """ Check CMIP6 global attributes against Control Vocabulary file. 1. Validate required attribute if presents and some values. 2. Validate registered institution and institution_id 3. Validate registered source and source_id 4. Validate experiment, experiment_id and all attributes associated with this experiment. Make sure that all attributes associate with the experiment_id found in CMIP6_CV.json are set to the appropriate values. 5. Validate grid_label and grid_resolution 6. Validate creation time in ISO format (YYYY-MM-DDTHH:MM:SS) 7. Validate furtherinfourl from CV internal template 8. Validate variable attributes with CMOR JSON table. 9. Validate parent_* attribute 10. Validate sub_experiment_* attributes. 11. Validate that all *_index are integers. """ err = 0 cmip6_cv.reset_CV_Error() filename = os.path.basename(ncfile) # ------------------------------------------------------------------- # Initialize arrays # ------------------------------------------------------------------- # If table_path is the table directory # Deduce corresponding JSON from filename if os.path.isdir(self.cmip6_table_path): cmip6_table = '{}/CMIP6_{}.json'.format( self.cmip6_table_path, self._get_table_from_filename(filename)) else: cmip6_table = self.cmip6_table_path table_id = os.path.basename(os.path.splitext(cmip6_table)[0]).split('_')[1] # Check JSON file self._check_json_table(cmip6_table) # ------------------------------------------------------------------- # Load CMIP6 table into memory # ------------------------------------------------------------------- table = cmip6_cv.load_table(cmip6_table) # ------------------------------------------------------------------- # Deduce variable # ------------------------------------------------------------------- # If variable can be deduced from the filename (Default) # If not variable submitted on command line with --variable is considered variable_id = self._get_variable_from_filename(filename) if not variable: variable = variable_id # ------------------------------------------------------------------- # Open file in processing # ------------------------------------------------------------------- infile = Cdunif.CdunifFile(ncfile, "r") # ------------------------------------------------------------------- # Create a dictionary of all global attributes # ------------------------------------------------------------------- self.dictGbl = infile.__dict__ for key, value in self.dictGbl.iteritems(): cmip6_cv.set_cur_dataset_attribute(key, value) # Set member_id attribute depending on sub_experiment_id and variant_label member_id = "" if "sub_experiment_id" in self.dictGbl.keys(): if self.dictGbl["sub_experiment_id"] not in ['none']: member_id = '{}-{}'.format(self.dictGbl['sub_experiment_id'], self.dictGbl['variant_label']) else: member_id = self.dictGbl['variant_label'] cmip6_cv.set_cur_dataset_attribute(cmip6_cv.GLOBAL_ATT_MEMBER_ID, member_id) self.set_double_value('branch_time_in_parent') self.set_double_value('branch_time_in_child') # ------------------------------------------------------------------- # Create a dictionary of attributes for the variable # ------------------------------------------------------------------- try: self.dictVar = infile.variables[variable].__dict__ except BaseException: print BCOLORS.FAIL print "=====================================================================================" print "The variable " + variable + " could not be found in file" print "=====================================================================================" print BCOLORS.ENDC raise KeyboardInterrupt # ------------------------------------------------------------------- # Check global attributes # ------------------------------------------------------------------- self.errors += cmip6_cv.check_requiredattributes(table) self.errors += cmip6_cv.check_institution(table) self.errors += cmip6_cv.check_sourceID(table) self.errors += cmip6_cv.check_experiment(table) self.errors += cmip6_cv.check_grids(table) self.errors += cmip6_cv.check_ISOTime() self.errors += cmip6_cv.check_furtherinfourl(table) self.errors += cmip6_cv.check_parentExpID(table) self.errors += cmip6_cv.check_subExpID(table) for attr in ['branch_time_in_child', 'branch_time_in_parent']: if attr in self.dictGbl.keys(): if not isinstance(self.dictGbl[attr], numpy.float64): print BCOLORS.FAIL print "=====================================================================================" print "{} is not a double: ".format(attr), type(self.dictGbl[attr]) print "=====================================================================================" print BCOLORS.ENDC self.errors += 1 for attr in ['realization_index', 'initialization_index', 'physics_index', 'forcing_index']: if not isinstance(self.dictGbl[attr], numpy.ndarray): print BCOLORS.FAIL print "=====================================================================================" print "{} is not an integer: ".format(attr), type(self.dictGbl[attr]) print "=====================================================================================" print BCOLORS.ENDC self.errors += 1 for attr in ['table_id', 'variable_id']: try: if locals()[attr] != self.dictGbl[attr]: print BCOLORS.FAIL print "=====================================================================================" print "{} attribute is not consistent: ".format(attr), self.dictGbl[attr] print "=====================================================================================" print BCOLORS.ENDC self.errors += 1 except KeyError: print BCOLORS.FAIL print "=====================================================================================" print "{} attribute is missing in global attributes".format(attr) print "=====================================================================================" print BCOLORS.ENDC self.errors += 1 # ------------------------------------------------------------------- # Get time axis properties # ------------------------------------------------------------------- # Get calendar and time units try: calendar = infile.variables['time'].calendar timeunits = infile.variables['time'].units except BaseException: calendar = "gregorian" timeunits = "days since ?" # Get first and last time bounds try: if 'bounds' in infile.variables['time'].__dict__.keys(): bndsvar = infile.variables['time'].__dict__['bounds'] startimebnds = infile.variables[bndsvar][0][0] endtimebnds = infile.variables[bndsvar][-1][1] else: startimebnds = infile.variables['time_bnds'][0][0] endtimebnds = infile.variables['time_bnds'][-1][1] except BaseException: startimebnds = 0 endtimebnds = 0 # Get first and last time steps try: startime = infile.variables['time'][0] endtime = infile.variables['time'][-1] except BaseException: startime = 0 endtime = 0 # ------------------------------------------------------------------- # Distinguish similar CMOR entries with the same out_name if exist # ------------------------------------------------------------------- # Apply test on variable only if a particular treatment if required prepare_path = os.path.dirname(os.path.realpath(__file__)) out_names_tests = json.loads(open(os.path.join(prepare_path, 'out_names_tests.json')).read()) key = '{}_{}'.format(table_id, variable_id) if key in out_names_tests.keys(): for test, cmor_entry in out_names_tests[key].iteritems(): if getattr(self, test)(**{'infile': infile, 'variable': variable, 'filename': filename}): variable = cmor_entry # ------------------------------------------------------------------- # Setup variable # ------------------------------------------------------------------- varid = cmip6_cv.setup_variable(variable, self.dictVar['units'], self.dictVar['_FillValue'][0], startime, endtime, startimebnds, endtimebnds) if varid == -1: print BCOLORS.FAIL print "=====================================================================================" print "Could not find variable {} in table {} ".format(variable, cmip6_table) print "=====================================================================================" print BCOLORS.ENDC raise KeyboardInterrupt # ------------------------------------------------------------------- # Check filename # ------------------------------------------------------------------- self.errors += cmip6_cv.check_filename(table, varid, calendar, timeunits, filename) # ------------------------------------------------------------------- # Check variable attributes # ------------------------------------------------------------------- fn = os.path.basename(str(infile).split('\'')[1]) err += cmip6_cv.check_filename( table, varid, calendar, timeunits, fn) if (err != 0) or (cmip6_cv.get_CV_Error() == 1): self.cv_error = True if 'branch_time_in_child' in self.dictGbl.keys(): if not isinstance(self.dictGbl['branch_time_in_child'], numpy.float64): print BCOLORS.FAIL print "=====================================================================================" print "branch_time_in_child is not a double: ", type(self.dictGbl['branch_time_in_child']) print "=====================================================================================" print BCOLORS.ENDC self.cv_error = True if 'branch_time_in_parent' in self.dictGbl.keys(): if not isinstance(self.dictGbl['branch_time_in_parent'], numpy.float64): print BCOLORS.FAIL print "=====================================================================================" print "branch_time_in_parent is not an double: ", type(self.dictGbl['branch_time_in_parent']) print "=====================================================================================" print BCOLORS.ENDC self.cv_error = True if not isinstance(self.dictGbl['branch_time_in_child'], numpy.float64): print bcolors.FAIL print "=====================================================================================" print "realization_index is not a double: ", type(self.dictGbl['branch_time_in_child']) print "=====================================================================================" print bcolors.ENDC cmip6_cv.set_CV_Error() if not isinstance(self.dictGbl['branch_time_in_parent'], numpy.float64): print bcolors.FAIL print "=====================================================================================" print "initialization_index is not an double: ", type(self.dictGbl['branch_time_in_parent']) print "=====================================================================================" print bcolors.ENDC cmip6_cv.set_CV_Error() if not isinstance(self.dictGbl['realization_index'], numpy.ndarray): print BCOLORS.FAIL print "=====================================================================================" print "realization_index is not an integer: ", type(self.dictGbl['realization_index']) print "=====================================================================================" print BCOLORS.ENDC self.cv_error = True if not isinstance(self.dictGbl['initialization_index'], numpy.ndarray): print BCOLORS.FAIL print "=====================================================================================" print "initialization_index is not an integer: ", type(self.dictGbl['initialization_index']) print "=====================================================================================" print BCOLORS.ENDC self.cv_error = True if not isinstance(self.dictGbl['physics_index'], numpy.ndarray): print BCOLORS.FAIL print "=====================================================================================" print "physics_index is not an integer: ", type(self.dictGbl['physics_index']) print "=====================================================================================" print BCOLORS.ENDC self.cv_error = True if not isinstance(self.dictGbl['forcing_index'], numpy.ndarray): print BCOLORS.FAIL print "=====================================================================================" print "forcing_index is not an integer: ", type(self.dictGbl['forcing_index']) print "=====================================================================================" print BCOLORS.ENDC self.cv_error = True # ----------------------------- # variable attribute comparison # ----------------------------- cv_attrs = cmip6_cv.list_variable_attributes(varid) for key in cv_attrs: if key == "long_name": continue if key == "comment": continue if key == "cell_measures": if cv_attrs[key].find("OPT") != -1 or cv_attrs[key].find("MODEL") != -1: continue # Is this attribute in file? if key in self.dictVar.keys(): # Verify that attribute value is equal to file attribute table_value = cv_attrs[key] file_value = self.dictVar[key] # PrePARE accept units of 1 or 1.0 so adjust the table_value if key == "units": if (table_value == "1") and (file_value == "1.0"): table_value = "1.0" if (table_value == "1.0") and (file_value == "1"): table_value = "1" if isinstance(table_value, str) and isinstance(file_value, numpy.ndarray): if numpy.array([int(value) for value in table_value.split()] == file_value).all(): file_value = True table_value = True if isinstance(table_value, numpy.ndarray): table_value = table_value[0] if isinstance(file_value, numpy.ndarray): file_value = file_value[0] if isinstance(table_value, float): if file_value == 0: if table_value != file_value: file_value = False else: if abs(1 - (table_value / file_value)) < 0.00001: table_value = file_value if key == "cell_methods": idx = file_value.find(" (") if idx != -1: file_value = file_value[:idx] table_value = table_value[:idx] if key == "cell_measures": pattern = re.compile('(?P<param>[\w.-]+): (?P<val1>[\w.-]+) OR (?P<val2>[\w.-]+)') values = re.findall(pattern, table_value) table_values = [""] # Empty string is allowed in case of useless attribute if values: tmp = dict() for param, val1, val2 in values: tmp[param] = [str('{}: {}'.format(param, val1)), str('{}: {}'.format(param, val2))] table_values.extend([' '.join(i) for i in list(itertools.product(*tmp.values()))]) if str(file_value) not in map(str, table_values): print BCOLORS.FAIL print "=====================================================================================" print "Your file contains \"" + key + "\":\"" + str(file_value) + "\" and" print "CMIP6 tables requires \"" + key + "\":\"" + str(table_value) + "\"." print "=====================================================================================" print BCOLORS.ENDC self.errors += 1 continue if str(table_value) != str(file_value): print BCOLORS.FAIL print "=====================================================================================" print "Your file contains \"" + key + "\":\"" + str(file_value) + "\" and" print "CMIP6 tables requires \"" + key + "\":\"" + str(table_value) + "\"." print "=====================================================================================" print BCOLORS.ENDC self.errors += 1 else: # That attribute is not in the file table_value = cv_attrs[key] if isinstance(table_value, numpy.ndarray): table_value = table_value[0] if isinstance(table_value, float): table_value = "{0:.2g}".format(table_value) print BCOLORS.FAIL print "=====================================================================================" print "CMIP6 variable " + variable + " requires \"" + key + "\":\"" + str(table_value) + "\"." print "=====================================================================================" print BCOLORS.ENDC self.errors += 1 if self.errors != 0: raise KeyboardInterrupt else: print BCOLORS.OKGREEN print "*************************************************************************************" print "* This file is compliant with the CMIP6 specification and can be published in ESGF *" print "*************************************************************************************" print BCOLORS.ENDC
def ControlVocab(self, ncfile, variable=None, print_all=True, no_text_color=False): """ Check CMIP6 global attributes against Control Vocabulary file. 1. Validate required attribute if presents and some values. 2. Validate registered institution and institution_id 3. Validate registered source and source_id 4. Validate experiment, experiment_id and all attributes associated with this experiment. Make sure that all attributes associate with the experiment_id found in CMIP6_CV.json are set to the appropriate values. 5. Validate grid_label and grid_resolution 6. Validate creation time in ISO format (YYYY-MM-DDTHH:MM:SS) 7. Validate furtherinfourl from CV internal template 8. Validate variable attributes with CMOR JSON table. 9. Validate parent_* attribute 10. Validate sub_experiment_* attributes. 11. Validate that all *_index are integers. """ filename = os.path.basename(ncfile) # ------------------------------------------------------------------- # Initialize arrays # ------------------------------------------------------------------- # If table_path is the table directory # Deduce corresponding JSON from filename if os.path.isdir(self.cmip6_table_path): cmip6_table = '{}/CMIP6_{}.json'.format( self.cmip6_table_path, self._get_table_from_filename(filename)) else: cmip6_table = self.cmip6_table_path table_id = os.path.basename(os.path.splitext(cmip6_table)[0]).split('_')[1] # Check and get JSON table cmor_table = self._check_json_table(cmip6_table) # ------------------------------------------------------------------- # Load CMIP6 table into memory # ------------------------------------------------------------------- table = cmip6_cv.load_table(cmip6_table) # ------------------------------------------------------------------- # Deduce variable # ------------------------------------------------------------------- # If variable can be deduced from the filename (Default) # If not variable submitted on command line with --variable is considered variable_id = self._get_variable_from_filename(filename) if not variable: variable = variable_id # ------------------------------------------------------------------- # Distinguish similar CMOR entries with the same out_name if exist # ------------------------------------------------------------------- # Apply test on variable only if a particular treatment if required prepare_path = os.path.dirname(os.path.realpath(__file__)) out_names_tests = json.loads(open(os.path.join(prepare_path, 'out_names_tests.json')).read()) # ------------------------------------------------------------------- # Open file in processing # The file needs to be open before the calling the test. # ------------------------------------------------------------------- infile = netCDF4.Dataset(ncfile, "r") key = '{}_{}'.format(table_id, variable_id) variable_cmor_entry = None if key in list(out_names_tests.keys()): for test, cmor_entry in list(out_names_tests[key].items()): if getattr(self, test)(**{'infile': infile, 'variable': variable, 'filename': filename}): # If test successfull, the CMOR entry to consider is given by the test variable_cmor_entry = cmor_entry break else: # If not, CMOR entry to consider is the variable from filename or from input command-line variable_cmor_entry = variable else: # By default, CMOR entry to consider is the variable from filename or from input command-line variable_cmor_entry = variable # ------------------------------------------------------------------- # Get variable out name in netCDF record # ------------------------------------------------------------------- # Variable record name should follow CMOR table out names if variable_cmor_entry not in list(cmor_table['variable_entry'].keys()): msg = "The entry " + variable_cmor_entry + " could not be found in CMOR table" self.prepare_print(msg, 'FAIL', no_text_color, lines=True) self.prepare_print("└──> :: CV FAIL :: {}".format(ncfile), 'FAIL', no_text_color) raise KeyboardInterrupt variable_record_name = cmor_table['variable_entry'][variable_cmor_entry]['out_name'] # Variable id attribute should be the same as variable record name # in any case to be CF- and CMIP6-compliant variable_id = variable_record_name # ------------------------------------------------------------------- # Create a dictionary of all global attributes # ------------------------------------------------------------------- self.dictGbl = infile.__dict__ for key, value in list(self.dictGbl.items()): cmip6_cv.set_cur_dataset_attribute(key, value) # Set member_id attribute depending on sub_experiment_id and variant_label member_id = "" if "sub_experiment_id" in list(self.dictGbl.keys()): if self.dictGbl["sub_experiment_id"] not in ['none']: member_id = '{}-{}'.format(self.dictGbl['sub_experiment_id'], self.dictGbl['variant_label']) else: member_id = self.dictGbl['variant_label'] cmip6_cv.set_cur_dataset_attribute(cmip6_cv.GLOBAL_ATT_MEMBER_ID, member_id) # ------------------------------------------------------------------- # Create a dictionary of attributes for the variable # ------------------------------------------------------------------- try: self.dictVar = infile.variables[variable_record_name].__dict__ except BaseException: msg = "The variable " + variable_record_name + " could not be found in file" self.prepare_print(msg, 'FAIL', no_text_color, lines=True) self.prepare_print("└──> :: CV FAIL :: {}".format(ncfile), 'FAIL', no_text_color) raise KeyboardInterrupt # ------------------------------------------------------------------- # Check global attributes # ------------------------------------------------------------------- if cmip6_cv.check_requiredattributes(table) != 0: self.errors += 1 if cmip6_cv.check_institution(table) != 0: self.errors += 1 if cmip6_cv.check_sourceID(table) != 0: self.errors += 1 if cmip6_cv.check_experiment(table) != 0: self.errors += 1 if cmip6_cv.check_grids(table) != 0: self.errors += 1 if cmip6_cv.check_ISOTime() != 0: self.errors += 1 if cmip6_cv.check_furtherinfourl(table) != 0: self.errors += 1 if cmip6_cv.check_subExpID(table) != 0: self.errors += 1 for attr in ['branch_time_in_child', 'branch_time_in_parent']: if attr in list(self.dictGbl.keys()): self.set_double_value(attr) if not numpy.issubdtype(type(self.dictGbl[attr]), numpy.float64): msg = "{} is not a double: {}".format(attr, type(self.dictGbl[attr])) self.prepare_print(msg, 'FAIL', no_text_color, lines=True) self.errors += 1 for attr in ['realization_index', 'initialization_index', 'physics_index', 'forcing_index']: try: if not numpy.issubdtype(type(self.dictGbl[attr]), numpy.integer): msg = "{} is not an integer: {}".format(attr, type(self.dictGbl[attr])) self.prepare_print(msg, 'FAIL', no_text_color, lines=True) self.errors += 1 except KeyError: msg = "{} attribute is missing in global attributes".format(attr) self.prepare_print(msg, 'FAIL', no_text_color, lines=True) self.errors += 1 if cmip6_cv.check_parentExpID(table) != 0: self.errors += 1 for attr in ['table_id', 'variable_id']: try: if locals()[attr] != self.dictGbl[attr]: msg = "{} attribute is not consistent: {}".format(attr, self.dictGbl[attr]) self.prepare_print(msg, 'FAIL', no_text_color, lines=True) self.errors += 1 except KeyError: msg = "{} attribute is missing in global attributes".format(attr) self.prepare_print(msg, 'FAIL', no_text_color, lines=True) self.errors += 1 # ------------------------------------------------------------------- # Get time axis properties # ------------------------------------------------------------------- # Get calendar and time units try: calendar = infile.variables['time'].calendar timeunits = infile.variables['time'].units except BaseException: calendar = "gregorian" timeunits = "days since ?" # Get first and last time bounds climatology = self.is_climatology(filename) if climatology: if cmip6_table.find('Amon') != -1: variable = '{}Clim'.format(variable) clim_idx = variable.find('Clim') if climatology and clim_idx != -1: var = [variable[:clim_idx]] try: if 'bounds' in list(infile.variables['time'].__dict__.keys()): bndsvar = infile.variables['time'].__dict__['bounds'] elif 'climatology' in list(infile.variables['time'].__dict__.keys()): bndsvar = infile.variables['time'].__dict__['climatology'] else: bndsvar = 'time_bnds' startimebnds = infile.variables[bndsvar][0][0] endtimebnds = infile.variables[bndsvar][-1][1] except BaseException: startimebnds = 0 endtimebnds = 0 try: startime = infile.variables['time'][0] endtime = infile.variables['time'][-1] except BaseException: startime = 0 endtime = 0 # ------------------------------------------------------------------- # Setup variable # ------------------------------------------------------------------- varid = cmip6_cv.setup_variable(variable_cmor_entry, cmor_table['variable_entry'][variable_cmor_entry]['units'], float(cmor_table['Header']['missing_value']), int(cmor_table['Header']['int_missing_value']), startime, endtime, startimebnds, endtimebnds) if varid == -1: msg = "Could not find variable {} in table {} ".format(variable_cmor_entry, cmip6_table) self.prepare_print(msg, 'FAIL', no_text_color, lines=True) self.prepare_print("└──> :: CV FAIL :: {}".format(ncfile), 'FAIL', no_text_color) raise KeyboardInterrupt # ------------------------------------------------------------------- # Check filename # ------------------------------------------------------------------- if cmip6_cv.check_filename(table, varid, calendar, timeunits, filename) != 0: self.errors += 1 # ------------------------------------------------------------------- # Check variable attributes # ------------------------------------------------------------------- cv_attrs = cmip6_cv.list_variable_attributes(varid) for key in cv_attrs: if key == "long_name": continue if key == "comment": continue if key == "cell_measures": if " OR " in cv_attrs[key] or "OPT" in cv_attrs[key] or "MODEL" in cv_attrs[key] or "UGRID" in cv_attrs[key]: continue # Is this attribute in file? if key in list(self.dictVar.keys()): # Verify that attribute value is equal to file attribute table_value = cv_attrs[key] file_value = self.dictVar[key] # PrePARE accept units of 1 or 1.0 so adjust the table_value if key == "units": if (table_value == "1") and (file_value == "1.0"): table_value = "1.0" if (table_value == "1.0") and (file_value == "1"): table_value = "1" if isinstance(table_value, str) and isinstance(file_value, numpy.ndarray): if numpy.array([int(value) for value in table_value.split()] == file_value).all(): file_value = True table_value = True if isinstance(table_value, numpy.ndarray): table_value = table_value[0] if isinstance(file_value, numpy.ndarray): file_value = file_value[0] if isinstance(table_value, float): if abs(table_value - file_value) <= 0.00001 * abs(table_value): table_value = file_value if key == "cell_methods": # Remove text that is inside parentheses i.e. comments file_value = re.sub(r"\(.*\)", "", file_value) table_value = re.sub(r"\(.*\)", "", table_value) # Remove extra whitespace file_value = " ".join(file_value.split()) table_value = " ".join(table_value.split()) if key == "cell_measures": # Check if area and volume values from the table's cell_measures are found in the file's external_variables pattern = re.compile('(?:area|volume): (\w+)') values = re.findall(pattern, table_value) for v in values: if 'external_variables' in list(self.dictGbl.keys()): if not re.search(r"\b{}\b".format(v), self.dictGbl['external_variables']): msg = "Your file contains external_variables = \"" + self.dictGbl['external_variables'] + "\", and" if len(values) == 2: msg += "\nCMIP6 tables requires \"" + values[0] + "\" and \"" + values[1] + "\" in external_variables." else: msg += "\nCMIP6 tables requires \"" + values[0] + "\" in external_variables." self.prepare_print(msg, 'FAIL', no_text_color, lines=True) self.errors += 1 else: if len(values) == 2: msg = "Your file contains \"" + values[0] + "\" and \"" + values[1] + "\" in cell_measures and" else: msg = "Your file contains \"" + values[0] + "\" in cell_measures and" msg += "\nCMIP6 tables require attribute \"external_variables\" in global attributes." self.prepare_print(msg, 'FAIL', no_text_color, lines=True) self.errors += 1 continue if str(table_value) != str(file_value): msg = "Your file contains \"" + key + "\":\"" + str(file_value) + "\" and" msg += "\nCMIP6 tables requires \"" + key + "\":\"" + str(table_value) + "\"." self.prepare_print(msg, 'FAIL', no_text_color, lines=True) self.errors += 1 else: # That attribute is not in the file table_value = cv_attrs[key] if isinstance(table_value, numpy.ndarray): table_value = table_value[0] if isinstance(table_value, float): table_value = "{0:.2g}".format(table_value) msg = "CMIP6 variable " + variable + " requires \"" + key + "\":\"" + str(table_value) + "\"." self.prepare_print(msg, 'FAIL', no_text_color, lines=True) self.errors += 1 # Check if cell_measures is defined in the file but not in the table if "cell_measures" in list(self.dictVar.keys()) and "cell_measures" not in cv_attrs: msg = "Your file contains \"cell_measures\":\"" + str(self.dictVar["cell_measures"]) + "\" but" msg += "\nCMIP6 tables do not define \"cell_measures\"." self.prepare_print(msg, 'FAIL', no_text_color, lines=True) self.errors += 1 # Print final message if self.errors != 0: self.prepare_print("└──> :: CV FAIL :: {}".format(ncfile), 'FAIL', no_text_color) raise KeyboardInterrupt elif print_all: self.prepare_print(" :: CV SUCCESS :: {}".format(ncfile), 'OKGREEN', no_text_color)
def ControlVocab(self, ncfile, variable=None, print_all=True): """ Check CMIP6 global attributes against Control Vocabulary file. 1. Validate required attribute if presents and some values. 2. Validate registered institution and institution_id 3. Validate registered source and source_id 4. Validate experiment, experiment_id and all attributes associated with this experiment. Make sure that all attributes associate with the experiment_id found in CMIP6_CV.json are set to the appropriate values. 5. Validate grid_label and grid_resolution 6. Validate creation time in ISO format (YYYY-MM-DDTHH:MM:SS) 7. Validate furtherinfourl from CV internal template 8. Validate variable attributes with CMOR JSON table. 9. Validate parent_* attribute 10. Validate sub_experiment_* attributes. 11. Validate that all *_index are integers. """ filename = os.path.basename(ncfile) # ------------------------------------------------------------------- # Initialize arrays # ------------------------------------------------------------------- # If table_path is the table directory # Deduce corresponding JSON from filename if os.path.isdir(self.cmip6_table_path): cmip6_table = '{}/CMIP6_{}.json'.format( self.cmip6_table_path, self._get_table_from_filename(filename)) else: cmip6_table = self.cmip6_table_path table_id = os.path.basename(os.path.splitext(cmip6_table)[0]).split('_')[1] # Check and get JSON table cmor_table = self._check_json_table(cmip6_table) # ------------------------------------------------------------------- # Load CMIP6 table into memory # ------------------------------------------------------------------- table = cmip6_cv.load_table(cmip6_table) # ------------------------------------------------------------------- # Deduce variable # ------------------------------------------------------------------- # If variable can be deduced from the filename (Default) # If not variable submitted on command line with --variable is considered variable_id = self._get_variable_from_filename(filename) if not variable: variable = variable_id # ------------------------------------------------------------------- # Distinguish similar CMOR entries with the same out_name if exist # ------------------------------------------------------------------- # Apply test on variable only if a particular treatment if required prepare_path = os.path.dirname(os.path.realpath(__file__)) out_names_tests = json.loads(open(os.path.join(prepare_path, 'out_names_tests.json')).read()) # ------------------------------------------------------------------- # Open file in processing # The file needs to be open before the calling the test. # ------------------------------------------------------------------- infile = Cdunif.CdunifFile(ncfile, "r") key = '{}_{}'.format(table_id, variable_id) variable_cmor_entry = None if key in list(out_names_tests.keys()): for test, cmor_entry in list(out_names_tests[key].items()): if getattr(self, test)(**{'infile': infile, 'variable': variable, 'filename': filename}): # If test successfull, the CMOR entry to consider is given by the test variable_cmor_entry = cmor_entry else: # If not, CMOR entry to consider is the variable from filename or from input command-line variable_cmor_entry = variable else: # By default, CMOR entry to consider is the variable from filename or from input command-line variable_cmor_entry = variable # ------------------------------------------------------------------- # Get variable out name in netCDF record # ------------------------------------------------------------------- # Variable record name should follow CMOR table out names if variable_cmor_entry not in list(cmor_table['variable_entry'].keys()): print(BCOLORS.FAIL) print("=====================================================================================") print("The entry " + variable_cmor_entry + " could not be found in CMOR table") print("=====================================================================================") print(BCOLORS.ENDC) raise KeyboardInterrupt variable_record_name = cmor_table['variable_entry'][variable_cmor_entry]['out_name'] # Variable id attribute should be the same as variable record name # in any case to be CF- and CMIP6-compliant variable_id = variable_record_name # ------------------------------------------------------------------- # Create a dictionary of all global attributes # ------------------------------------------------------------------- self.dictGbl = infile.__dict__ for key, value in list(self.dictGbl.items()): cmip6_cv.set_cur_dataset_attribute(key, value) # Set member_id attribute depending on sub_experiment_id and variant_label member_id = "" if "sub_experiment_id" in list(self.dictGbl.keys()): if self.dictGbl["sub_experiment_id"] not in ['none']: member_id = '{}-{}'.format(self.dictGbl['sub_experiment_id'], self.dictGbl['variant_label']) else: member_id = self.dictGbl['variant_label'] cmip6_cv.set_cur_dataset_attribute(cmip6_cv.GLOBAL_ATT_MEMBER_ID, member_id) # ------------------------------------------------------------------- # Create a dictionary of attributes for the variable # ------------------------------------------------------------------- try: self.dictVar = infile.variables[variable_record_name].__dict__ except BaseException: print(BCOLORS.FAIL) print("=====================================================================================") print("The variable " + variable_record_name + " could not be found in file") print("=====================================================================================") print(BCOLORS.ENDC) raise KeyboardInterrupt # ------------------------------------------------------------------- # Check global attributes # ------------------------------------------------------------------- self.errors += cmip6_cv.check_requiredattributes(table) self.errors += cmip6_cv.check_institution(table) self.errors += cmip6_cv.check_sourceID(table) self.errors += cmip6_cv.check_experiment(table) self.errors += cmip6_cv.check_grids(table) self.errors += cmip6_cv.check_ISOTime() self.errors += cmip6_cv.check_furtherinfourl(table) self.errors += cmip6_cv.check_subExpID(table) for attr in ['branch_time_in_child', 'branch_time_in_parent']: if attr in list(self.dictGbl.keys()): self.set_double_value(attr) if not isinstance(self.dictGbl[attr], numpy.float64): print(BCOLORS.FAIL) print("=====================================================================================") print("{} is not a double: ".format(attr), type(self.dictGbl[attr])) print("=====================================================================================") print(BCOLORS.ENDC) self.errors += 1 for attr in ['realization_index', 'initialization_index', 'physics_index', 'forcing_index']: if not isinstance(self.dictGbl[attr], numpy.ndarray): print(BCOLORS.FAIL) print("=====================================================================================") print("{} is not an integer: ".format(attr), type(self.dictGbl[attr])) print("=====================================================================================") print(BCOLORS.ENDC) self.errors += 1 self.errors += cmip6_cv.check_parentExpID(table) for attr in ['table_id', 'variable_id']: try: if locals()[attr] != self.dictGbl[attr]: print(BCOLORS.FAIL) print("=====================================================================================") print("{} attribute is not consistent: ".format(attr), self.dictGbl[attr]) print("=====================================================================================") print(BCOLORS.ENDC) self.errors += 1 except KeyError: print(BCOLORS.FAIL) print("=====================================================================================") print("{} attribute is missing in global attributes".format(attr)) print("=====================================================================================") print(BCOLORS.ENDC) self.errors += 1 # ------------------------------------------------------------------- # Get time axis properties # ------------------------------------------------------------------- # Get calendar and time units try: calendar = infile.variables['time'].calendar timeunits = infile.variables['time'].units except BaseException: calendar = "gregorian" timeunits = "days since ?" # Get first and last time bounds climatology = self.is_climatology(filename) if climatology: if cmip6_table.find('Amon') != -1: variable = '{}Clim'.format(variable) clim_idx = variable.find('Clim') if climatology and clim_idx != -1: var = [variable[:clim_idx]] try: if 'bounds' in list(infile.variables['time'].__dict__.keys()): bndsvar = infile.variables['time'].__dict__['bounds'] elif 'climatology' in list(infile.variables['time'].__dict__.keys()): bndsvar = infile.variables['time'].__dict__['climatology'] else: bndsvar = 'time_bnds' startimebnds = infile.variables[bndsvar][0][0] endtimebnds = infile.variables[bndsvar][-1][1] except BaseException: startimebnds = 0 endtimebnds = 0 try: startime = infile.variables['time'][0] endtime = infile.variables['time'][-1] except BaseException: startime = 0 endtime = 0 # ------------------------------------------------------------------- # Setup variable # ------------------------------------------------------------------- varid = cmip6_cv.setup_variable(variable_cmor_entry, self.dictVar['units'], self.dictVar['_FillValue'][0], startime, endtime, startimebnds, endtimebnds) if varid == -1: print(BCOLORS.FAIL) print("=====================================================================================") print("Could not find variable {} in table {} ".format(variable_cmor_entry, cmip6_table)) print("=====================================================================================") print(BCOLORS.ENDC) raise KeyboardInterrupt # ------------------------------------------------------------------- # Check filename # ------------------------------------------------------------------- self.errors += cmip6_cv.check_filename(table, varid, calendar, timeunits, filename) # ------------------------------------------------------------------- # Check variable attributes # ------------------------------------------------------------------- cv_attrs = cmip6_cv.list_variable_attributes(varid) for key in cv_attrs: if key == "long_name": continue if key == "comment": continue if key == "cell_measures": if cv_attrs[key].find("OPT") != -1 or cv_attrs[key].find("MODEL") != -1: continue # Is this attribute in file? if key in list(self.dictVar.keys()): # Verify that attribute value is equal to file attribute table_value = cv_attrs[key] file_value = self.dictVar[key] # PrePARE accept units of 1 or 1.0 so adjust the table_value if key == "units": if (table_value == "1") and (file_value == "1.0"): table_value = "1.0" if (table_value == "1.0") and (file_value == "1"): table_value = "1" if isinstance(table_value, str) and isinstance(file_value, numpy.ndarray): if numpy.array([int(value) for value in table_value.split()] == file_value).all(): file_value = True table_value = True if isinstance(table_value, numpy.ndarray): table_value = table_value[0] if isinstance(file_value, numpy.ndarray): file_value = file_value[0] if isinstance(table_value, float): if abs(table_value - file_value) <= 0.00001 * abs(table_value): table_value = file_value if key == "cell_methods": idx = file_value.find(" (") if idx != -1: file_value = file_value[:idx] table_value = table_value[:idx] if key == "cell_measures": pattern = re.compile('(?P<param>[\w.-]+): (?P<val1>[\w.-]+) OR (?P<val2>[\w.-]+)') values = re.findall(pattern, table_value) table_values = [""] # Empty string is allowed in case of useless attribute if values: tmp = dict() for param, val1, val2 in values: tmp[param] = [str('{}: {}'.format(param, val1)), str('{}: {}'.format(param, val2))] table_values.extend([' '.join(i) for i in list(itertools.product(*list(tmp.values())))]) if str(file_value) not in list(map(str, table_values)): print(BCOLORS.FAIL) print("=====================================================================================") print("Your file contains \"" + key + "\":\"" + str(file_value) + "\" and") print("CMIP6 tables requires \"" + key + "\":\"" + str(table_value) + "\".") print("=====================================================================================") print(BCOLORS.ENDC) self.errors += 1 continue if str(table_value) != str(file_value): print(BCOLORS.FAIL) print("=====================================================================================") print("Your file contains \"" + key + "\":\"" + str(file_value) + "\" and") print("CMIP6 tables requires \"" + key + "\":\"" + str(table_value) + "\".") print("=====================================================================================") print(BCOLORS.ENDC) self.errors += 1 else: # That attribute is not in the file table_value = cv_attrs[key] if isinstance(table_value, numpy.ndarray): table_value = table_value[0] if isinstance(table_value, float): table_value = "{0:.2g}".format(table_value) print(BCOLORS.FAIL) print("=====================================================================================") print("CMIP6 variable " + variable + " requires \"" + key + "\":\"" + str(table_value) + "\".") print("=====================================================================================") print(BCOLORS.ENDC) self.errors += 1 # Print final message if self.errors != 0: print(BCOLORS.FAIL + "└──> :: CV FAIL :: {}".format(ncfile) + BCOLORS.ENDC) raise KeyboardInterrupt elif print_all: print(BCOLORS.OKGREEN + " :: CV SUCCESS :: {}".format(ncfile) + BCOLORS.ENDC)