Exemple #1
0
    def ControlVocab(self, ncfile, variable=None, print_all=True):
        """
        Check CMIP6 global attributes against Control Vocabulary file.

            1. Validate required attribute if presents and some values.
            2. Validate registered institution and institution_id
            3. Validate registered source and source_id
            4. Validate experiment, experiment_id and all attributes associated with this experiment.
                   Make sure that all attributes associate with the experiment_id found in CMIP6_CV.json
                   are set to the appropriate values.
            5. Validate grid_label and grid_resolution
            6. Validate creation time in ISO format (YYYY-MM-DDTHH:MM:SS)
            7. Validate furtherinfourl from CV internal template
            8. Validate variable attributes with CMOR JSON table.
            9. Validate parent_* attribute
           10. Validate sub_experiment_* attributes.
           11. Validate that all *_index are integers.

        """
        filename = os.path.basename(ncfile)
        # -------------------------------------------------------------------
        #  Initialize arrays
        # -------------------------------------------------------------------
        # If table_path is the table directory
        # Deduce corresponding JSON from filename
        if os.path.isdir(self.cmip6_table_path):
            cmip6_table = '{}/CMIP6_{}.json'.format(
                self.cmip6_table_path, self._get_table_from_filename(filename))
        else:
            cmip6_table = self.cmip6_table_path
        table_id = os.path.basename(
            os.path.splitext(cmip6_table)[0]).split('_')[1]
        # Check and get JSON table
        cmor_table = self._check_json_table(cmip6_table)
        # -------------------------------------------------------------------
        # Load CMIP6 table into memory
        # -------------------------------------------------------------------
        table = cmip6_cv.load_table(cmip6_table)
        # -------------------------------------------------------------------
        #  Deduce variable
        # -------------------------------------------------------------------
        # If variable can be deduced from the filename (Default)
        # If not variable submitted on command line with --variable is considered
        variable_id = self._get_variable_from_filename(filename)
        if not variable:
            variable = variable_id
        # -------------------------------------------------------------------
        #  Distinguish similar CMOR entries with the same out_name if exist
        # -------------------------------------------------------------------
        # Apply test on variable only if a particular treatment if required
        prepare_path = os.path.dirname(os.path.realpath(__file__))
        out_names_tests = json.loads(
            open(os.path.join(prepare_path, 'out_names_tests.json')).read())
        # -------------------------------------------------------------------
        #  Open file in processing
        #  The file needs to be open before the calling the test.
        # -------------------------------------------------------------------
        infile = Cdunif.CdunifFile(ncfile, "r")
        key = '{}_{}'.format(table_id, variable_id)
        variable_cmor_entry = None
        if key in list(out_names_tests.keys()):
            for test, cmor_entry in list(out_names_tests[key].items()):
                if getattr(self, test)(**{
                        'infile': infile,
                        'variable': variable,
                        'filename': filename
                }):
                    # If test successfull, the CMOR entry to consider is given by the test
                    variable_cmor_entry = cmor_entry
                else:
                    # If not, CMOR entry to consider is the variable from filename or from input command-line
                    variable_cmor_entry = variable
        else:
            # By default, CMOR entry to consider is the variable from filename or from input command-line
            variable_cmor_entry = variable
        # -------------------------------------------------------------------
        #  Get variable out name in netCDF record
        #  -------------------------------------------------------------------
        # Variable record name should follow CMOR table out names
        if variable_cmor_entry not in list(
                cmor_table['variable_entry'].keys()):
            print(BCOLORS.FAIL)
            print(
                "====================================================================================="
            )
            print("The entry " + variable_cmor_entry +
                  " could not be found in CMOR table")
            print(
                "====================================================================================="
            )
            print(BCOLORS.ENDC)
            raise KeyboardInterrupt
        variable_record_name = cmor_table['variable_entry'][
            variable_cmor_entry]['out_name']
        # Variable id attribute should be the same as variable record name
        # in any case to be CF- and CMIP6-compliant
        variable_id = variable_record_name
        # -------------------------------------------------------------------
        # Create a dictionary of all global attributes
        # -------------------------------------------------------------------
        self.dictGbl = infile.__dict__
        for key, value in list(self.dictGbl.items()):
            cmip6_cv.set_cur_dataset_attribute(key, value)
        # Set member_id attribute depending on sub_experiment_id and variant_label
        member_id = ""
        if "sub_experiment_id" in list(self.dictGbl.keys()):
            if self.dictGbl["sub_experiment_id"] not in ['none']:
                member_id = '{}-{}'.format(self.dictGbl['sub_experiment_id'],
                                           self.dictGbl['variant_label'])
            else:
                member_id = self.dictGbl['variant_label']
        cmip6_cv.set_cur_dataset_attribute(cmip6_cv.GLOBAL_ATT_MEMBER_ID,
                                           member_id)
        # -------------------------------------------------------------------
        # Create a dictionary of attributes for the variable
        # -------------------------------------------------------------------
        try:
            self.dictVar = infile.variables[variable_record_name].__dict__
        except BaseException:
            print(BCOLORS.FAIL)
            print(
                "====================================================================================="
            )
            print("The variable " + variable_record_name +
                  " could not be found in file")
            print(
                "====================================================================================="
            )
            print(BCOLORS.ENDC)
            raise KeyboardInterrupt

        # -------------------------------------------------------------------
        # Check global attributes
        # -------------------------------------------------------------------
        self.errors += cmip6_cv.check_requiredattributes(table)
        self.errors += cmip6_cv.check_institution(table)
        self.errors += cmip6_cv.check_sourceID(table)
        self.errors += cmip6_cv.check_experiment(table)
        self.errors += cmip6_cv.check_grids(table)
        self.errors += cmip6_cv.check_ISOTime()
        self.errors += cmip6_cv.check_furtherinfourl(table)
        self.errors += cmip6_cv.check_subExpID(table)
        for attr in ['branch_time_in_child', 'branch_time_in_parent']:
            if attr in list(self.dictGbl.keys()):
                self.set_double_value(attr)
                if not isinstance(self.dictGbl[attr], numpy.float64):
                    print(BCOLORS.FAIL)
                    print(
                        "====================================================================================="
                    )
                    print("{} is not a double: ".format(attr),
                          type(self.dictGbl[attr]))
                    print(
                        "====================================================================================="
                    )
                    print(BCOLORS.ENDC)
                    self.errors += 1
        for attr in [
                'realization_index', 'initialization_index', 'physics_index',
                'forcing_index'
        ]:
            if not isinstance(self.dictGbl[attr], numpy.ndarray):
                print(BCOLORS.FAIL)
                print(
                    "====================================================================================="
                )
                print("{} is not an integer: ".format(attr),
                      type(self.dictGbl[attr]))
                print(
                    "====================================================================================="
                )
                print(BCOLORS.ENDC)
                self.errors += 1
        self.errors += cmip6_cv.check_parentExpID(table)
        for attr in ['table_id', 'variable_id']:
            try:
                if locals()[attr] != self.dictGbl[attr]:
                    print(BCOLORS.FAIL)
                    print(
                        "====================================================================================="
                    )
                    print("{} attribute is not consistent: ".format(attr),
                          self.dictGbl[attr])
                    print(
                        "====================================================================================="
                    )
                    print(BCOLORS.ENDC)
                    self.errors += 1
            except KeyError:
                print(BCOLORS.FAIL)
                print(
                    "====================================================================================="
                )
                print("{} attribute is missing in global attributes".format(
                    attr))
                print(
                    "====================================================================================="
                )
                print(BCOLORS.ENDC)
                self.errors += 1
        # -------------------------------------------------------------------
        # Get time axis properties
        # -------------------------------------------------------------------
        # Get calendar and time units
        try:
            calendar = infile.variables['time'].calendar
            timeunits = infile.variables['time'].units
        except BaseException:
            calendar = "gregorian"
            timeunits = "days since ?"
        # Get first and last time bounds

        climatology = self.is_climatology(filename)
        if climatology:
            if cmip6_table.find('Amon') != -1:
                variable = '{}Clim'.format(variable)

        clim_idx = variable.find('Clim')
        if climatology and clim_idx != -1:
            var = [variable[:clim_idx]]

        try:
            if 'bounds' in list(infile.variables['time'].__dict__.keys()):
                bndsvar = infile.variables['time'].__dict__['bounds']
            elif 'climatology' in list(
                    infile.variables['time'].__dict__.keys()):
                bndsvar = infile.variables['time'].__dict__['climatology']
            else:
                bndsvar = 'time_bnds'
            startimebnds = infile.variables[bndsvar][0][0]
            endtimebnds = infile.variables[bndsvar][-1][1]
        except BaseException:
            startimebnds = 0
            endtimebnds = 0

        try:
            startime = infile.variables['time'][0]
            endtime = infile.variables['time'][-1]
        except BaseException:
            startime = 0
            endtime = 0

        # -------------------------------------------------------------------
        # Setup variable
        # -------------------------------------------------------------------
        varid = cmip6_cv.setup_variable(variable_cmor_entry,
                                        self.dictVar['units'],
                                        self.dictVar['_FillValue'][0],
                                        startime, endtime, startimebnds,
                                        endtimebnds)
        if varid == -1:
            print(BCOLORS.FAIL)
            print(
                "====================================================================================="
            )
            print("Could not find variable {} in table {} ".format(
                variable_cmor_entry, cmip6_table))
            print(
                "====================================================================================="
            )
            print(BCOLORS.ENDC)
            raise KeyboardInterrupt
        # -------------------------------------------------------------------
        # Check filename
        # -------------------------------------------------------------------
        self.errors += cmip6_cv.check_filename(table, varid, calendar,
                                               timeunits, filename)
        # -------------------------------------------------------------------
        # Check variable attributes
        # -------------------------------------------------------------------
        cv_attrs = cmip6_cv.list_variable_attributes(varid)
        for key in cv_attrs:
            if key == "long_name":
                continue
            if key == "comment":
                continue
            if key == "cell_measures":
                if cv_attrs[key].find("OPT") != -1 or cv_attrs[key].find(
                        "MODEL") != -1:
                    continue
            # Is this attribute in file?
            if key in list(self.dictVar.keys()):
                # Verify that attribute value is equal to file attribute
                table_value = cv_attrs[key]
                file_value = self.dictVar[key]
                # PrePARE accept units of 1 or 1.0 so adjust the table_value
                if key == "units":
                    if (table_value == "1") and (file_value == "1.0"):
                        table_value = "1.0"
                    if (table_value == "1.0") and (file_value == "1"):
                        table_value = "1"
                if isinstance(table_value, str) and isinstance(
                        file_value, numpy.ndarray):
                    if numpy.array(
                        [int(value) for value in table_value.split()
                         ] == file_value).all():
                        file_value = True
                        table_value = True
                if isinstance(table_value, numpy.ndarray):
                    table_value = table_value[0]
                if isinstance(file_value, numpy.ndarray):
                    file_value = file_value[0]
                if isinstance(table_value, float):
                    if abs(table_value -
                           file_value) <= 0.00001 * abs(table_value):
                        table_value = file_value
                if key == "cell_methods":
                    idx = file_value.find(" (")
                    if idx != -1:
                        file_value = file_value[:idx]
                        table_value = table_value[:idx]
                if key == "cell_measures":
                    pattern = re.compile(
                        '(?P<param>[\w.-]+): (?P<val1>[\w.-]+) OR (?P<val2>[\w.-]+)'
                    )
                    values = re.findall(pattern, table_value)
                    table_values = [
                        ""
                    ]  # Empty string is allowed in case of useless attribute
                    if values:
                        tmp = dict()
                        for param, val1, val2 in values:
                            tmp[param] = [
                                str('{}: {}'.format(param, val1)),
                                str('{}: {}'.format(param, val2))
                            ]
                        table_values.extend([
                            ' '.join(i) for i in list(
                                itertools.product(*list(tmp.values())))
                        ])
                        if str(file_value) not in list(map(str, table_values)):
                            print(BCOLORS.FAIL)
                            print(
                                "====================================================================================="
                            )
                            print("Your file contains \"" + key + "\":\"" +
                                  str(file_value) + "\" and")
                            print("CMIP6 tables requires \"" + key + "\":\"" +
                                  str(table_value) + "\".")
                            print(
                                "====================================================================================="
                            )
                            print(BCOLORS.ENDC)
                            self.errors += 1
                        continue

                if str(table_value) != str(file_value):
                    print(BCOLORS.FAIL)
                    print(
                        "====================================================================================="
                    )
                    print("Your file contains \"" + key + "\":\"" +
                          str(file_value) + "\" and")
                    print("CMIP6 tables requires \"" + key + "\":\"" +
                          str(table_value) + "\".")
                    print(
                        "====================================================================================="
                    )
                    print(BCOLORS.ENDC)
                    self.errors += 1
            else:
                # That attribute is not in the file
                table_value = cv_attrs[key]
                if isinstance(table_value, numpy.ndarray):
                    table_value = table_value[0]
                if isinstance(table_value, float):
                    table_value = "{0:.2g}".format(table_value)
                print(BCOLORS.FAIL)
                print(
                    "====================================================================================="
                )
                print("CMIP6 variable " + variable + " requires \"" + key +
                      "\":\"" + str(table_value) + "\".")
                print(
                    "====================================================================================="
                )
                print(BCOLORS.ENDC)
                self.errors += 1
        # Print final message
        if self.errors != 0:
            print(BCOLORS.FAIL + "└──> :: CV FAIL    :: {}".format(ncfile) +
                  BCOLORS.ENDC)
            raise KeyboardInterrupt
        elif print_all:
            print(BCOLORS.OKGREEN + "     :: CV SUCCESS :: {}".format(ncfile) +
                  BCOLORS.ENDC)
Exemple #2
0
    def ControlVocab(self, args):
        '''
            Check CMIP6 global attributes against Control Vocabulary file.

                1. Validate required attribute if presents and some values.
                2. Validate registered institution and institution_id
                3. Validate registered source and source_id
                4. Validate experiment, experiment_id and all attributes associated with this experiment.
                       Make sure that all attributes associate with the experiment_id found in CMIP6_CV.json
                       are set to the appropriate values.
                5. Validate grid_label and grid_resolution
                6. Validate creation time in ISO format (YYYY-MM-DDTHH:MM:SS)
                7. Validate furtherinfourl from CV internal template
                8. Validate variable attributes with CMOR JSON table.
                9. Validate parent_* attribute
               10. Validate sub_experiment_* atributes.
               11. Validate that all *_index are integers.
        '''
        self.variable = args.variable
        self.infile = args.infile
        # -------------------------------------
        # Create alist of all Global Attributes
        # -------------------------------------
        self.dictGbl = {
            key: self.infile.__dict__[key]
            for key in self.infile.__dict__.keys()
        }
        self.attributes = self.infile.__dict__.keys()
        self.variables = self.infile.variables.keys()
        ierr = [
            cmip6_cv.set_cur_dataset_attribute(key, value)
            for key, value in self.dictGbl.iteritems()
        ]
        member_id = ""
        if ("sub_experiment_id" in self.dictGbl.keys()):
            if (self.dictGbl["sub_experiment_id"] not in ["none"]):
                member_id = self.dictGbl["sub_experiment_id"] + \
                    '-' + self.dictGbl["variant_label"]
            else:
                member_id = self.dictGbl["variant_label"]

        cmip6_cv.set_cur_dataset_attribute(cmip6_cv.GLOBAL_ATT_MEMBER_ID,
                                           member_id)

        self.setDoubleValue('branch_time_in_parent')
        self.setDoubleValue('branch_time_in_child')
        if self.variable is not None:
            self.var = [self.variable]
        else:
            # -------------------------------------------------------------------
            # find variable that contains a "history" (should only be one)
            # -------------------------------------------------------------------
            self.var = [self.infile.variable_id]

        if ((self.var == []) or (len(self.var) > 1)):
            print bcolors.FAIL
            print "!!!!!!!!!!!!!!!!!!!!!!!!!"
            print "! Error:  The input file does not have an history attribute and the CMIP6 variable could not be found"
            print "!         Please use the --variable option to specify your CMIP6 variable"
            print "! Check your file or use CMOR 3.x to achieve compliance for ESGF publication."
            print "!!!!!!!!!!!!!!!!!!!!!!!!!"
            print bcolors.ENDC

            raise KeyboardInterrupt

        try:
            self.keys = self.infile.variables[self.var[0]].__dict__.keys()
        except BaseException:
            print bcolors.FAIL
            print "!!!!!!!!!!!!!!!!!!!!!!!!!"
            print "! Error:  The variable " + self.var[
                0] + " could not be found"
            print "! Check your file variables "
            print "!!!!!!!!!!!!!!!!!!!!!!!!!"
            print bcolors.ENDC

            raise

        # -------------------------------------------------------------------
        # Create a dictionnary of attributes for var
        # -------------------------------------------------------------------
        self.dictVars = dict(
            (y, x) for y, x in
            [(key, value) for key in self.keys
             if self.infile.variables[self.var[0]].__dict__[key] is not None
             for value in [self.infile.variables[self.var[0]].__dict__[key]]])
        try:
            self.calendar = self.infile.variables['time'].calendar
            self.timeunits = self.infile.variables['time'].units
        except BaseException:
            self.calendar = "gregorian"
            self.timeunits = "days since ?"
        cmip6_cv.check_requiredattributes(self.table_id)
        cmip6_cv.check_institution(self.table_id)
        cmip6_cv.check_sourceID(self.table_id)
        cmip6_cv.check_experiment(self.table_id)
        cmip6_cv.check_grids(self.table_id)
        cmip6_cv.check_ISOTime()
        cmip6_cv.check_furtherinfourl(self.table_id)
        cmip6_cv.check_parentExpID(self.table_id)
        cmip6_cv.check_subExpID(self.table_id)
        try:
            startimebnds = self.infile.variables['time_bnds'][0][0]
            endtimebnds = self.infile.variables['time_bnds'][-1][1]
        except BaseException:
            startimebnds = 0
            endtimebnds = 0
        try:
            startime = self.infile.variables['time'][0]
            endtime = self.infile.variables['time'][-1]
        except BaseException:
            startime = 0
            endtime = 0
        varunits = self.infile.variables[self.var[0]].units
        varmissing = self.infile.variables[self.var[0]]._FillValue[0]
        varid = cmip6_cv.setup_variable(self.var[0], varunits, varmissing,
                                        startime, endtime, startimebnds,
                                        endtimebnds)
        if (varid == -1):
            print bcolors.FAIL
            print "====================================================================================="
            print " Could not find variable '%s' in table '%s' " % (
                self.var[0], self.cmip6_table)
            print "====================================================================================="
            print bcolors.ENDC
            cmip6_cv.set_CV_Error()
            return

#        fn = os.path.basename(self.infile.id)
        fn = os.path.basename(str(self.infile).split('\'')[1])
        cmip6_cv.check_filename(self.table_id, varid, self.calendar,
                                self.timeunits, fn)

        if not isinstance(self.dictGbl['realization_index'], numpy.ndarray):
            print bcolors.FAIL
            print "====================================================================================="
            print "realization_index is not an integer: ", type(
                self.dictGbl['realization_index'])
            print "====================================================================================="
            print bcolors.ENDC
            cmip6_cv.set_CV_Error()
        if not isinstance(self.dictGbl['initialization_index'], numpy.ndarray):
            print bcolors.FAIL
            print "====================================================================================="
            print "initialization_index is not an integer: ", type(
                self.dictGbl['initialization_index'])
            print "====================================================================================="
            print bcolors.ENDC
            cmip6_cv.set_CV_Error()
        if not isinstance(self.dictGbl['physics_index'], numpy.ndarray):
            print bcolors.FAIL
            print "====================================================================================="
            print "physics_index is not an integer: ", type(
                self.dictGbl['physics_index'])
            print "====================================================================================="
            print bcolors.ENDC
            cmip6_cv.set_CV_Error()
        if not isinstance(self.dictGbl['forcing_index'], numpy.ndarray):
            print bcolors.FAIL
            print "====================================================================================="
            print "forcing_index is not an integer: ", type(
                self.dictGbl['forcing_index'])
            print "====================================================================================="
            print bcolors.ENDC
            cmip6_cv.set_CV_Error()

        prepLIST = cmip6_cv.list_variable_attributes(varid)
        for key in prepLIST:
            if (key == "long_name"):
                continue
            if (key == "comment"):
                continue
            # Is this attritue in file?
            if (key in self.dictVars.keys()):
                # Verify that attribute value is equal to file attribute
                table_value = prepLIST[key]
                file_value = self.dictVars[key]
                if isinstance(table_value, numpy.ndarray):
                    table_value = table_value[0]
                if isinstance(file_value, numpy.ndarray):
                    file_value = file_value[0]

                if isinstance(table_value, float):
                    if (file_value == 0):
                        if (table_value != file_value):
                            file_value = False
                    else:
                        if (1 - (table_value / file_value) < 0.00001):
                            table_value = file_value

                if key == "cell_methods":
                    idx = file_value.find(" (interval:")
                    file_value = file_value[:idx]
                    table_value = table_value[:idx]

                file_value = str(file_value)
                table_value = str(table_value)
                if table_value != file_value:
                    print bcolors.FAIL
                    print "====================================================================================="
                    print "You file contains \"" + key + "\":\"" + str(
                        file_value) + "\" and"
                    print "CMIP6 tables requires \"" + key + "\":\"" + str(
                        table_value) + "\"."
                    print "====================================================================================="
                    print bcolors.ENDC
                    cmip6_cv.set_CV_Error()
            else:
                # That attribute is not in the file
                table_value = prepLIST[key]
                if key == "cell_measures":
                    if ((table_value.find("OPT") != -1)
                            or (table_value.find("MODEL") != -1)):
                        continue
                if isinstance(table_value, numpy.ndarray):
                    table_value = table_value[0]
                if isinstance(table_value, float):
                    table_value = "{0:.2g}".format(table_value)
                print bcolors.FAIL
                print "====================================================================================="
                print "CMIP6 variable " + self.var[
                    0] + " requires \"" + key + "\":\"" + str(
                        table_value) + "\"."
                print "====================================================================================="
                print bcolors.ENDC
                cmip6_cv.set_CV_Error()

        if (cmip6_cv.get_CV_Error()):
            raise KeyboardInterrupt

        pass
        print bcolors.OKGREEN
        print "*************************************************************************************"
        print "* This file is compliant with the CMIP6 specification and can be published in ESGF. *"
        print "*************************************************************************************"
        print bcolors.ENDC
Exemple #3
0
    def ControlVocab(self):
        '''
            Check CMIP6 global attributes against Control Vocabulary file.

                1. Validate required attribute if presents and some values.
                2. Validate registered institution and institution_id
                3. Validate registered source and source_id
                4. Validate experiment, experiment_id and attribute associated with the experiment.
                5. Validate grid_label and grid_resolution
                6. Validate creation time in ISO format (YYYY-MM-DDTHH:MM:SS)
                7. Validate furtherinfourl from CV internal template
                8. Validate variable attributes with CMOR JSON table.
        '''
        cmip6_cv.check_requiredattributes(self.table_id)
        cmip6_cv.check_institution(self.table_id)
        cmip6_cv.check_sourceID(self.table_id)
        cmip6_cv.check_experiment(self.table_id)
        cmip6_cv.check_grids(self.table_id)
        cmip6_cv.check_ISOTime()
        cmip6_cv.check_furtherinfourl(self.table_id)
        varid = cmip6_cv.setup_variable(self.var[0], 'm', 1e20)

        prepLIST = cmip6_cv.list_variable_attributes(varid)
        for key in prepLIST:
            if (key == "comment"):
                continue
            # Is this attritue in file?
            if (key in self.dictVars.keys()):
                # Verify that attribute value is equal to file attribute
                table_value = prepLIST[key]
                file_value = self.dictVars[key]
                if isinstance(table_value, numpy.ndarray):
                    table_value = table_value[0]
                if isinstance(file_value, numpy.ndarray):
                    file_value = file_value[0]

                if isinstance(table_value, float):
                    if (table_value / file_value < 1.1):
                        table_value = file_value

                if key == "cell_methods":
                    idx = file_value.find(" (interval:")
                    file_value = file_value[:idx]
                    table_value = table_value[:idx]

                file_value = str(file_value)
                table_value = str(table_value)
                if table_value != file_value:
                    print bcolors.FAIL
                    print "====================================================================================="
                    print "You file contains \"" + key + "\":\"" + str(
                        file_value) + "\" and"
                    print "CMIP6 tables requires \"" + key + "\":\"" + str(
                        table_value) + "\"."
                    print "====================================================================================="
                    print bcolors.ENDC
                    cmip6_cv.set_CV_Error()
            else:
                # That attribute is not in the file
                table_value = prepLIST[key]
                if isinstance(table_value, numpy.ndarray):
                    table_value = table_value[0]
                if isinstance(table_value, float):
                    table_value = "{0:.2g}".format(table_value)
                print bcolors.FAIL
                print "====================================================================================="
                print "CMIP6 variable " + self.var[
                    0] + " requires \"" + key + "\":\"" + str(
                        table_value) + "\"."
                print "====================================================================================="
                print bcolors.ENDC
                cmip6_cv.set_CV_Error()

        if (cmip6_cv.get_CV_Error()):
            raise KeyboardInterrupt
        print bcolors.OKGREEN
        print "*************************************************************************************"
        print "* This file is compliant with the CMIP6 specification and can be published in ESGF. *"
        print "*************************************************************************************"
        print bcolors.ENDC
Exemple #4
0
    def ControlVocab(self, ncfile, variable=None):
        """
        Check CMIP6 global attributes against Control Vocabulary file.

            1. Validate required attribute if presents and some values.
            2. Validate registered institution and institution_id
            3. Validate registered source and source_id
            4. Validate experiment, experiment_id and all attributes associated with this experiment.
                   Make sure that all attributes associate with the experiment_id found in CMIP6_CV.json
                   are set to the appropriate values.
            5. Validate grid_label and grid_resolution
            6. Validate creation time in ISO format (YYYY-MM-DDTHH:MM:SS)
            7. Validate furtherinfourl from CV internal template
            8. Validate variable attributes with CMOR JSON table.
            9. Validate parent_* attribute
           10. Validate sub_experiment_* attributes.
           11. Validate that all *_index are integers.

        """

        err = 0
        cmip6_cv.reset_CV_Error()
        filename = os.path.basename(ncfile)
        # -------------------------------------------------------------------
        #  Initialize arrays
        # -------------------------------------------------------------------
        # If table_path is the table directory
        # Deduce corresponding JSON from filename
        if os.path.isdir(self.cmip6_table_path):
            cmip6_table = '{}/CMIP6_{}.json'.format(
                self.cmip6_table_path, self._get_table_from_filename(filename))
        else:
            cmip6_table = self.cmip6_table_path
        table_id = os.path.basename(os.path.splitext(cmip6_table)[0]).split('_')[1]
        # Check JSON file
        self._check_json_table(cmip6_table)
        # -------------------------------------------------------------------
        # Load CMIP6 table into memory
        # -------------------------------------------------------------------
        table = cmip6_cv.load_table(cmip6_table)
        # -------------------------------------------------------------------
        #  Deduce variable
        # -------------------------------------------------------------------
        # If variable can be deduced from the filename (Default)
        # If not variable submitted on command line with --variable is considered
        variable_id = self._get_variable_from_filename(filename)
        if not variable:
            variable = variable_id
        # -------------------------------------------------------------------
        #  Open file in processing
        # -------------------------------------------------------------------
        infile = Cdunif.CdunifFile(ncfile, "r")
        # -------------------------------------------------------------------
        # Create a dictionary of all global attributes
        # -------------------------------------------------------------------
        self.dictGbl = infile.__dict__
        for key, value in self.dictGbl.iteritems():
            cmip6_cv.set_cur_dataset_attribute(key, value)
        # Set member_id attribute depending on sub_experiment_id and variant_label
        member_id = ""
        if "sub_experiment_id" in self.dictGbl.keys():
            if self.dictGbl["sub_experiment_id"] not in ['none']:
                member_id = '{}-{}'.format(self.dictGbl['sub_experiment_id'],
                                           self.dictGbl['variant_label'])
            else:
                member_id = self.dictGbl['variant_label']
        cmip6_cv.set_cur_dataset_attribute(cmip6_cv.GLOBAL_ATT_MEMBER_ID, member_id)
        self.set_double_value('branch_time_in_parent')
        self.set_double_value('branch_time_in_child')
        # -------------------------------------------------------------------
        # Create a dictionary of attributes for the variable
        # -------------------------------------------------------------------
        try:
            self.dictVar = infile.variables[variable].__dict__
        except BaseException:
            print BCOLORS.FAIL
            print "====================================================================================="
            print "The variable " + variable + " could not be found in file"
            print "====================================================================================="
            print BCOLORS.ENDC
            raise KeyboardInterrupt
        # -------------------------------------------------------------------
        # Check global attributes
        # -------------------------------------------------------------------
        self.errors += cmip6_cv.check_requiredattributes(table)
        self.errors += cmip6_cv.check_institution(table)
        self.errors += cmip6_cv.check_sourceID(table)
        self.errors += cmip6_cv.check_experiment(table)
        self.errors += cmip6_cv.check_grids(table)
        self.errors += cmip6_cv.check_ISOTime()
        self.errors += cmip6_cv.check_furtherinfourl(table)
        self.errors += cmip6_cv.check_parentExpID(table)
        self.errors += cmip6_cv.check_subExpID(table)
        for attr in ['branch_time_in_child', 'branch_time_in_parent']:
            if attr in self.dictGbl.keys():
                if not isinstance(self.dictGbl[attr], numpy.float64):
                    print BCOLORS.FAIL
                    print "====================================================================================="
                    print "{} is not a double: ".format(attr), type(self.dictGbl[attr])
                    print "====================================================================================="
                    print BCOLORS.ENDC
                    self.errors += 1
        for attr in ['realization_index', 'initialization_index', 'physics_index', 'forcing_index']:
            if not isinstance(self.dictGbl[attr], numpy.ndarray):
                print BCOLORS.FAIL
                print "====================================================================================="
                print "{} is not an integer: ".format(attr), type(self.dictGbl[attr])
                print "====================================================================================="
                print BCOLORS.ENDC
                self.errors += 1
        for attr in ['table_id', 'variable_id']:
            try:
                if locals()[attr] != self.dictGbl[attr]:
                    print BCOLORS.FAIL
                    print "====================================================================================="
                    print "{} attribute is not consistent: ".format(attr), self.dictGbl[attr]
                    print "====================================================================================="
                    print BCOLORS.ENDC
                    self.errors += 1
            except KeyError:
                print BCOLORS.FAIL
                print "====================================================================================="
                print "{} attribute is missing in global attributes".format(attr)
                print "====================================================================================="
                print BCOLORS.ENDC
                self.errors += 1
        # -------------------------------------------------------------------
        # Get time axis properties
        # -------------------------------------------------------------------
        # Get calendar and time units
        try:
            calendar = infile.variables['time'].calendar
            timeunits = infile.variables['time'].units
        except BaseException:
            calendar = "gregorian"
            timeunits = "days since ?"
           
        # Get first and last time bounds
        try:
            if 'bounds' in infile.variables['time'].__dict__.keys():
                bndsvar = infile.variables['time'].__dict__['bounds']
                startimebnds = infile.variables[bndsvar][0][0]
                endtimebnds = infile.variables[bndsvar][-1][1]
            else:
                startimebnds = infile.variables['time_bnds'][0][0]
                endtimebnds = infile.variables['time_bnds'][-1][1]
        except BaseException:
            startimebnds = 0
            endtimebnds = 0
        # Get first and last time steps
        try:
            startime = infile.variables['time'][0]
            endtime = infile.variables['time'][-1]
        except BaseException:
            startime = 0
            endtime = 0
        # -------------------------------------------------------------------
        #  Distinguish similar CMOR entries with the same out_name if exist
        # -------------------------------------------------------------------
        # Apply test on variable only if a particular treatment if required
        prepare_path = os.path.dirname(os.path.realpath(__file__))
        out_names_tests = json.loads(open(os.path.join(prepare_path, 'out_names_tests.json')).read())
        key = '{}_{}'.format(table_id, variable_id)
        if key in out_names_tests.keys():
            for test, cmor_entry in out_names_tests[key].iteritems():
                if getattr(self, test)(**{'infile': infile,
                                          'variable': variable,
                                          'filename': filename}):
                    variable = cmor_entry
        # -------------------------------------------------------------------
        # Setup variable
        # -------------------------------------------------------------------
        varid = cmip6_cv.setup_variable(variable,
                                        self.dictVar['units'],
                                        self.dictVar['_FillValue'][0],
                                        startime,
                                        endtime,
                                        startimebnds,
                                        endtimebnds)
        if varid == -1:
            print BCOLORS.FAIL
            print "====================================================================================="
            print "Could not find variable {} in table {} ".format(variable, cmip6_table)
            print "====================================================================================="
            print BCOLORS.ENDC
            raise KeyboardInterrupt
        # -------------------------------------------------------------------
        # Check filename
        # -------------------------------------------------------------------
        self.errors += cmip6_cv.check_filename(table,
                                               varid,
                                               calendar,
                                               timeunits,
                                               filename)
        # -------------------------------------------------------------------
        # Check variable attributes
        # -------------------------------------------------------------------
        fn = os.path.basename(str(infile).split('\'')[1])
        err += cmip6_cv.check_filename(
            table,
            varid,
            calendar,
            timeunits,
            fn)

        if (err != 0) or (cmip6_cv.get_CV_Error() == 1):
            self.cv_error = True

        if 'branch_time_in_child' in self.dictGbl.keys():
            if not isinstance(self.dictGbl['branch_time_in_child'], numpy.float64):
                print BCOLORS.FAIL
                print "====================================================================================="
                print "branch_time_in_child is not a double: ", type(self.dictGbl['branch_time_in_child'])
                print "====================================================================================="
                print BCOLORS.ENDC
                self.cv_error = True

        if 'branch_time_in_parent' in self.dictGbl.keys():
            if not isinstance(self.dictGbl['branch_time_in_parent'], numpy.float64):
                print BCOLORS.FAIL
                print "====================================================================================="
                print "branch_time_in_parent is not an double: ", type(self.dictGbl['branch_time_in_parent'])
                print "====================================================================================="
                print BCOLORS.ENDC
                self.cv_error = True

        if not isinstance(self.dictGbl['branch_time_in_child'], numpy.float64):
            print bcolors.FAIL
            print "====================================================================================="
            print "realization_index is not a double: ", type(self.dictGbl['branch_time_in_child'])
            print "====================================================================================="
            print bcolors.ENDC
            cmip6_cv.set_CV_Error()

        if not isinstance(self.dictGbl['branch_time_in_parent'], numpy.float64):
            print bcolors.FAIL
            print "====================================================================================="
            print "initialization_index is not an double: ", type(self.dictGbl['branch_time_in_parent'])
            print "====================================================================================="
            print bcolors.ENDC
            cmip6_cv.set_CV_Error()

        if not isinstance(self.dictGbl['realization_index'], numpy.ndarray):
            print BCOLORS.FAIL
            print "====================================================================================="
            print "realization_index is not an integer: ", type(self.dictGbl['realization_index'])
            print "====================================================================================="
            print BCOLORS.ENDC
            self.cv_error = True

        if not isinstance(self.dictGbl['initialization_index'], numpy.ndarray):
            print BCOLORS.FAIL
            print "====================================================================================="
            print "initialization_index is not an integer: ", type(self.dictGbl['initialization_index'])
            print "====================================================================================="
            print BCOLORS.ENDC
            self.cv_error = True

        if not isinstance(self.dictGbl['physics_index'], numpy.ndarray):
            print BCOLORS.FAIL
            print "====================================================================================="
            print "physics_index is not an integer: ", type(self.dictGbl['physics_index'])
            print "====================================================================================="
            print BCOLORS.ENDC
            self.cv_error = True

        if not isinstance(self.dictGbl['forcing_index'], numpy.ndarray):
            print BCOLORS.FAIL
            print "====================================================================================="
            print "forcing_index is not an integer: ", type(self.dictGbl['forcing_index'])
            print "====================================================================================="
            print BCOLORS.ENDC
            self.cv_error = True

        # -----------------------------
        # variable attribute comparison
        # -----------------------------
        cv_attrs = cmip6_cv.list_variable_attributes(varid)
        for key in cv_attrs:
            if key == "long_name":
                continue
            if key == "comment":
                continue
            if key == "cell_measures":
                if cv_attrs[key].find("OPT") != -1 or cv_attrs[key].find("MODEL") != -1:
                    continue
            # Is this attribute in file?
            if key in self.dictVar.keys():
                # Verify that attribute value is equal to file attribute
                table_value = cv_attrs[key]
                file_value = self.dictVar[key]
                # PrePARE accept units of 1 or 1.0 so adjust the table_value
                if key == "units":
                    if (table_value == "1") and (file_value == "1.0"):
                        table_value = "1.0"
                    if (table_value == "1.0") and (file_value == "1"):
                        table_value = "1"
                if isinstance(table_value, str) and isinstance(file_value, numpy.ndarray):
                    if numpy.array([int(value) for value in table_value.split()] == file_value).all():
                        file_value = True
                        table_value = True
                if isinstance(table_value, numpy.ndarray):
                    table_value = table_value[0]
                if isinstance(file_value, numpy.ndarray):
                    file_value = file_value[0]
                if isinstance(table_value, float):
                    if file_value == 0:
                        if table_value != file_value:
                            file_value = False
                    else:
                        if abs(1 - (table_value / file_value)) < 0.00001:
                            table_value = file_value
                if key == "cell_methods":
                    idx = file_value.find(" (")
                    if idx != -1:
                        file_value = file_value[:idx]
                        table_value = table_value[:idx]
                if key == "cell_measures":
                    pattern = re.compile('(?P<param>[\w.-]+): (?P<val1>[\w.-]+) OR (?P<val2>[\w.-]+)')
                    values = re.findall(pattern, table_value)
                    table_values = [""]  # Empty string is allowed in case of useless attribute
                    if values:
                        tmp = dict()
                        for param, val1, val2 in values:
                            tmp[param] = [str('{}: {}'.format(param, val1)), str('{}: {}'.format(param, val2))]
                        table_values.extend([' '.join(i) for i in list(itertools.product(*tmp.values()))])
                        if str(file_value) not in map(str, table_values):
                            print BCOLORS.FAIL
                            print "====================================================================================="
                            print "Your file contains \"" + key + "\":\"" + str(file_value) + "\" and"
                            print "CMIP6 tables requires \"" + key + "\":\"" + str(table_value) + "\"."
                            print "====================================================================================="
                            print BCOLORS.ENDC
                            self.errors += 1
                        continue

                if str(table_value) != str(file_value):
                    print BCOLORS.FAIL
                    print "====================================================================================="
                    print "Your file contains \"" + key + "\":\"" + str(file_value) + "\" and"
                    print "CMIP6 tables requires \"" + key + "\":\"" + str(table_value) + "\"."
                    print "====================================================================================="
                    print BCOLORS.ENDC
                    self.errors += 1
            else:
                # That attribute is not in the file
                table_value = cv_attrs[key]
                if isinstance(table_value, numpy.ndarray):
                    table_value = table_value[0]
                if isinstance(table_value, float):
                    table_value = "{0:.2g}".format(table_value)
                print BCOLORS.FAIL
                print "====================================================================================="
                print "CMIP6 variable " + variable + " requires \"" + key + "\":\"" + str(table_value) + "\"."
                print "====================================================================================="
                print BCOLORS.ENDC
                self.errors += 1

        if self.errors != 0:
            raise KeyboardInterrupt
        else:
            print BCOLORS.OKGREEN
            print "*************************************************************************************"
            print "* This file is compliant with the CMIP6 specification and can be published in ESGF  *"
            print "*************************************************************************************"
            print BCOLORS.ENDC
Exemple #5
0
    def ControlVocab(self, ncfile, variable=None, print_all=True, no_text_color=False):
        """
        Check CMIP6 global attributes against Control Vocabulary file.

            1. Validate required attribute if presents and some values.
            2. Validate registered institution and institution_id
            3. Validate registered source and source_id
            4. Validate experiment, experiment_id and all attributes associated with this experiment.
                   Make sure that all attributes associate with the experiment_id found in CMIP6_CV.json
                   are set to the appropriate values.
            5. Validate grid_label and grid_resolution
            6. Validate creation time in ISO format (YYYY-MM-DDTHH:MM:SS)
            7. Validate furtherinfourl from CV internal template
            8. Validate variable attributes with CMOR JSON table.
            9. Validate parent_* attribute
           10. Validate sub_experiment_* attributes.
           11. Validate that all *_index are integers.

        """
        filename = os.path.basename(ncfile)
        # -------------------------------------------------------------------
        #  Initialize arrays
        # -------------------------------------------------------------------
        # If table_path is the table directory
        # Deduce corresponding JSON from filename
        if os.path.isdir(self.cmip6_table_path):
            cmip6_table = '{}/CMIP6_{}.json'.format(
                self.cmip6_table_path, self._get_table_from_filename(filename))
        else:
            cmip6_table = self.cmip6_table_path
        table_id = os.path.basename(os.path.splitext(cmip6_table)[0]).split('_')[1]
        # Check and get JSON table
        cmor_table = self._check_json_table(cmip6_table)
        # -------------------------------------------------------------------
        # Load CMIP6 table into memory
        # -------------------------------------------------------------------
        table = cmip6_cv.load_table(cmip6_table)
        # -------------------------------------------------------------------
        #  Deduce variable
        # -------------------------------------------------------------------
        # If variable can be deduced from the filename (Default)
        # If not variable submitted on command line with --variable is considered
        variable_id = self._get_variable_from_filename(filename)
        if not variable:
            variable = variable_id
        # -------------------------------------------------------------------
        #  Distinguish similar CMOR entries with the same out_name if exist
        # -------------------------------------------------------------------
        # Apply test on variable only if a particular treatment if required
        prepare_path = os.path.dirname(os.path.realpath(__file__))
        out_names_tests = json.loads(open(os.path.join(prepare_path, 'out_names_tests.json')).read())
        # -------------------------------------------------------------------
        #  Open file in processing
        #  The file needs to be open before the calling the test.
        # -------------------------------------------------------------------
        infile = netCDF4.Dataset(ncfile, "r")
        key = '{}_{}'.format(table_id, variable_id)
        variable_cmor_entry = None
        if key in list(out_names_tests.keys()):
            for test, cmor_entry in list(out_names_tests[key].items()):
                if getattr(self, test)(**{'infile': infile,
                                          'variable': variable,
                                          'filename': filename}):
                    # If test successfull, the CMOR entry to consider is given by the test
                    variable_cmor_entry = cmor_entry
                    break
                else:
                    # If not, CMOR entry to consider is the variable from filename or from input command-line
                    variable_cmor_entry = variable
        else:
            # By default, CMOR entry to consider is the variable from filename or from input command-line
            variable_cmor_entry = variable
        # -------------------------------------------------------------------
        #  Get variable out name in netCDF record
        #  -------------------------------------------------------------------
        # Variable record name should follow CMOR table out names
        if variable_cmor_entry not in list(cmor_table['variable_entry'].keys()):
            msg = "The entry " + variable_cmor_entry + " could not be found in CMOR table"
            self.prepare_print(msg, 'FAIL', no_text_color, lines=True)
            self.prepare_print("└──> :: CV FAIL    :: {}".format(ncfile), 'FAIL', no_text_color)
            raise KeyboardInterrupt
        variable_record_name = cmor_table['variable_entry'][variable_cmor_entry]['out_name']
        # Variable id attribute should be the same as variable record name
        # in any case to be CF- and CMIP6-compliant
        variable_id = variable_record_name
        # -------------------------------------------------------------------
        # Create a dictionary of all global attributes
        # -------------------------------------------------------------------
        self.dictGbl = infile.__dict__
        for key, value in list(self.dictGbl.items()):
            cmip6_cv.set_cur_dataset_attribute(key, value)
        # Set member_id attribute depending on sub_experiment_id and variant_label
        member_id = ""
        if "sub_experiment_id" in list(self.dictGbl.keys()):
            if self.dictGbl["sub_experiment_id"] not in ['none']:
                member_id = '{}-{}'.format(self.dictGbl['sub_experiment_id'],
                                           self.dictGbl['variant_label'])
            else:
                member_id = self.dictGbl['variant_label']
        cmip6_cv.set_cur_dataset_attribute(cmip6_cv.GLOBAL_ATT_MEMBER_ID, member_id)
        # -------------------------------------------------------------------
        # Create a dictionary of attributes for the variable
        # -------------------------------------------------------------------
        try:
            self.dictVar = infile.variables[variable_record_name].__dict__
        except BaseException:
            msg = "The variable " + variable_record_name + " could not be found in file"
            self.prepare_print(msg, 'FAIL', no_text_color, lines=True)
            self.prepare_print("└──> :: CV FAIL    :: {}".format(ncfile), 'FAIL', no_text_color)
            raise KeyboardInterrupt

        # -------------------------------------------------------------------
        # Check global attributes
        # -------------------------------------------------------------------
        if cmip6_cv.check_requiredattributes(table) != 0:
            self.errors += 1
        if cmip6_cv.check_institution(table) != 0:
            self.errors += 1
        if cmip6_cv.check_sourceID(table) != 0:
            self.errors += 1
        if cmip6_cv.check_experiment(table) != 0:
            self.errors += 1
        if cmip6_cv.check_grids(table) != 0:
            self.errors += 1
        if cmip6_cv.check_ISOTime() != 0:
            self.errors += 1
        if cmip6_cv.check_furtherinfourl(table) != 0:
            self.errors += 1
        if cmip6_cv.check_subExpID(table) != 0:
            self.errors += 1
        for attr in ['branch_time_in_child', 'branch_time_in_parent']:
            if attr in list(self.dictGbl.keys()):
                self.set_double_value(attr)
                if not numpy.issubdtype(type(self.dictGbl[attr]), numpy.float64):
                    msg = "{} is not a double: {}".format(attr, type(self.dictGbl[attr]))
                    self.prepare_print(msg, 'FAIL', no_text_color, lines=True)
                    self.errors += 1
        for attr in ['realization_index', 'initialization_index', 'physics_index', 'forcing_index']:
            try:
                if not numpy.issubdtype(type(self.dictGbl[attr]), numpy.integer):
                    msg = "{} is not an integer: {}".format(attr, type(self.dictGbl[attr]))
                    self.prepare_print(msg, 'FAIL', no_text_color, lines=True)
                    self.errors += 1
            except KeyError:
                msg = "{} attribute is missing in global attributes".format(attr)
                self.prepare_print(msg, 'FAIL', no_text_color, lines=True)
                self.errors += 1
        if cmip6_cv.check_parentExpID(table) != 0:
            self.errors += 1
        for attr in ['table_id', 'variable_id']:
            try:
                if locals()[attr] != self.dictGbl[attr]:
                    msg = "{} attribute is not consistent: {}".format(attr, self.dictGbl[attr])
                    self.prepare_print(msg, 'FAIL', no_text_color, lines=True)
                    self.errors += 1
            except KeyError:
                msg = "{} attribute is missing in global attributes".format(attr)
                self.prepare_print(msg, 'FAIL', no_text_color, lines=True)
                self.errors += 1
        # -------------------------------------------------------------------
        # Get time axis properties
        # -------------------------------------------------------------------
        # Get calendar and time units
        try:
            calendar = infile.variables['time'].calendar
            timeunits = infile.variables['time'].units
        except BaseException:
            calendar = "gregorian"
            timeunits = "days since ?"
        # Get first and last time bounds

        climatology = self.is_climatology(filename)
        if climatology:
            if cmip6_table.find('Amon') != -1:
                variable = '{}Clim'.format(variable)

        clim_idx = variable.find('Clim')
        if climatology and clim_idx != -1:
            var = [variable[:clim_idx]]

        try:
            if 'bounds' in list(infile.variables['time'].__dict__.keys()):
                bndsvar = infile.variables['time'].__dict__['bounds']
            elif 'climatology' in list(infile.variables['time'].__dict__.keys()):
                bndsvar = infile.variables['time'].__dict__['climatology']
            else:
                bndsvar = 'time_bnds'
            startimebnds = infile.variables[bndsvar][0][0]
            endtimebnds = infile.variables[bndsvar][-1][1]
        except BaseException:
            startimebnds = 0
            endtimebnds = 0

        try:
            startime = infile.variables['time'][0]
            endtime = infile.variables['time'][-1]
        except BaseException:
            startime = 0
            endtime = 0

        # -------------------------------------------------------------------
        # Setup variable
        # -------------------------------------------------------------------
        varid = cmip6_cv.setup_variable(variable_cmor_entry,
                                        cmor_table['variable_entry'][variable_cmor_entry]['units'],
                                        float(cmor_table['Header']['missing_value']),
                                        int(cmor_table['Header']['int_missing_value']),
                                        startime,
                                        endtime,
                                        startimebnds,
                                        endtimebnds)
        if varid == -1:
            msg = "Could not find variable {} in table {} ".format(variable_cmor_entry, cmip6_table)
            self.prepare_print(msg, 'FAIL', no_text_color, lines=True)
            self.prepare_print("└──> :: CV FAIL    :: {}".format(ncfile), 'FAIL', no_text_color)
            raise KeyboardInterrupt
        # -------------------------------------------------------------------
        # Check filename
        # -------------------------------------------------------------------
        if cmip6_cv.check_filename(table, varid, calendar, timeunits, filename) != 0:
            self.errors += 1
        # -------------------------------------------------------------------
        # Check variable attributes
        # -------------------------------------------------------------------
        cv_attrs = cmip6_cv.list_variable_attributes(varid)
        for key in cv_attrs:
            if key == "long_name":
                continue
            if key == "comment":
                continue
            if key == "cell_measures":
                if " OR " in cv_attrs[key] or "OPT" in cv_attrs[key] or "MODEL" in cv_attrs[key] or "UGRID" in cv_attrs[key]:
                    continue
            # Is this attribute in file?
            if key in list(self.dictVar.keys()):
                # Verify that attribute value is equal to file attribute
                table_value = cv_attrs[key]
                file_value = self.dictVar[key]
                # PrePARE accept units of 1 or 1.0 so adjust the table_value
                if key == "units":
                    if (table_value == "1") and (file_value == "1.0"):
                        table_value = "1.0"
                    if (table_value == "1.0") and (file_value == "1"):
                        table_value = "1"
                if isinstance(table_value, str) and isinstance(file_value, numpy.ndarray):
                    if numpy.array([int(value) for value in table_value.split()] == file_value).all():
                        file_value = True
                        table_value = True
                if isinstance(table_value, numpy.ndarray):
                    table_value = table_value[0]
                if isinstance(file_value, numpy.ndarray):
                    file_value = file_value[0]
                if isinstance(table_value, float):
                    if abs(table_value - file_value) <= 0.00001 * abs(table_value):
                        table_value = file_value
                if key == "cell_methods":
                    # Remove text that is inside parentheses i.e. comments
                    file_value = re.sub(r"\(.*\)", "", file_value)
                    table_value = re.sub(r"\(.*\)", "", table_value)
                    # Remove extra whitespace
                    file_value = " ".join(file_value.split())
                    table_value = " ".join(table_value.split())
                if key == "cell_measures":
                    # Check if area and volume values from the table's cell_measures are found in the file's external_variables
                    pattern = re.compile('(?:area|volume): (\w+)')
                    values = re.findall(pattern, table_value)
                    for v in values:
                        if 'external_variables' in list(self.dictGbl.keys()):
                            if not re.search(r"\b{}\b".format(v), self.dictGbl['external_variables']):
                                msg = "Your file contains external_variables = \"" + self.dictGbl['external_variables'] + "\", and"
                                if len(values) == 2:
                                    msg += "\nCMIP6 tables requires \"" + values[0] + "\" and \"" + values[1] + "\" in external_variables."
                                else:
                                    msg += "\nCMIP6 tables requires \"" + values[0] + "\" in external_variables."
                                self.prepare_print(msg, 'FAIL', no_text_color, lines=True)
                                self.errors += 1
                        else:
                            if len(values) == 2:
                                msg = "Your file contains \"" + values[0] + "\" and \"" + values[1] + "\" in cell_measures and"
                            else:
                                msg = "Your file contains \"" + values[0] + "\" in cell_measures and"
                            msg += "\nCMIP6 tables require attribute \"external_variables\" in global attributes."
                            self.prepare_print(msg, 'FAIL', no_text_color, lines=True)
                            self.errors += 1
                        continue

                if str(table_value) != str(file_value):
                    msg =  "Your file contains \"" + key + "\":\"" + str(file_value) + "\" and"
                    msg += "\nCMIP6 tables requires \"" + key + "\":\"" + str(table_value) + "\"."
                    self.prepare_print(msg, 'FAIL', no_text_color, lines=True)
                    self.errors += 1
            else:
                # That attribute is not in the file
                table_value = cv_attrs[key]
                if isinstance(table_value, numpy.ndarray):
                    table_value = table_value[0]
                if isinstance(table_value, float):
                    table_value = "{0:.2g}".format(table_value)
                msg = "CMIP6 variable " + variable + " requires \"" + key + "\":\"" + str(table_value) + "\"."
                self.prepare_print(msg, 'FAIL', no_text_color, lines=True)
                self.errors += 1
        # Check if cell_measures is defined in the file but not in the table
        if "cell_measures" in list(self.dictVar.keys()) and "cell_measures" not in cv_attrs:
                msg =  "Your file contains \"cell_measures\":\"" + str(self.dictVar["cell_measures"]) + "\" but"
                msg += "\nCMIP6 tables do not define \"cell_measures\"."
                self.prepare_print(msg, 'FAIL', no_text_color, lines=True)
                self.errors += 1
        # Print final message
        if self.errors != 0:
            self.prepare_print("└──> :: CV FAIL    :: {}".format(ncfile), 'FAIL', no_text_color)
            raise KeyboardInterrupt
        elif print_all:
            self.prepare_print("     :: CV SUCCESS :: {}".format(ncfile), 'OKGREEN', no_text_color)
Exemple #6
0
    def ControlVocab(self, ncfile, variable=None, print_all=True):
        """
        Check CMIP6 global attributes against Control Vocabulary file.

            1. Validate required attribute if presents and some values.
            2. Validate registered institution and institution_id
            3. Validate registered source and source_id
            4. Validate experiment, experiment_id and all attributes associated with this experiment.
                   Make sure that all attributes associate with the experiment_id found in CMIP6_CV.json
                   are set to the appropriate values.
            5. Validate grid_label and grid_resolution
            6. Validate creation time in ISO format (YYYY-MM-DDTHH:MM:SS)
            7. Validate furtherinfourl from CV internal template
            8. Validate variable attributes with CMOR JSON table.
            9. Validate parent_* attribute
           10. Validate sub_experiment_* attributes.
           11. Validate that all *_index are integers.

        """
        filename = os.path.basename(ncfile)
        # -------------------------------------------------------------------
        #  Initialize arrays
        # -------------------------------------------------------------------
        # If table_path is the table directory
        # Deduce corresponding JSON from filename
        if os.path.isdir(self.cmip6_table_path):
            cmip6_table = '{}/CMIP6_{}.json'.format(
                self.cmip6_table_path, self._get_table_from_filename(filename))
        else:
            cmip6_table = self.cmip6_table_path
        table_id = os.path.basename(os.path.splitext(cmip6_table)[0]).split('_')[1]
        # Check and get JSON table
        cmor_table = self._check_json_table(cmip6_table)
        # -------------------------------------------------------------------
        # Load CMIP6 table into memory
        # -------------------------------------------------------------------
        table = cmip6_cv.load_table(cmip6_table)
        # -------------------------------------------------------------------
        #  Deduce variable
        # -------------------------------------------------------------------
        # If variable can be deduced from the filename (Default)
        # If not variable submitted on command line with --variable is considered
        variable_id = self._get_variable_from_filename(filename)
        if not variable:
            variable = variable_id
        # -------------------------------------------------------------------
        #  Distinguish similar CMOR entries with the same out_name if exist
        # -------------------------------------------------------------------
        # Apply test on variable only if a particular treatment if required
        prepare_path = os.path.dirname(os.path.realpath(__file__))
        out_names_tests = json.loads(open(os.path.join(prepare_path, 'out_names_tests.json')).read())
        # -------------------------------------------------------------------
        #  Open file in processing
        #  The file needs to be open before the calling the test.
        # -------------------------------------------------------------------
        infile = Cdunif.CdunifFile(ncfile, "r")
        key = '{}_{}'.format(table_id, variable_id)
        variable_cmor_entry = None
        if key in list(out_names_tests.keys()):
            for test, cmor_entry in list(out_names_tests[key].items()):
                if getattr(self, test)(**{'infile': infile,
                                          'variable': variable,
                                          'filename': filename}):
                    # If test successfull, the CMOR entry to consider is given by the test
                    variable_cmor_entry = cmor_entry
                else:
                    # If not, CMOR entry to consider is the variable from filename or from input command-line
                    variable_cmor_entry = variable
        else:
            # By default, CMOR entry to consider is the variable from filename or from input command-line
            variable_cmor_entry = variable
        # -------------------------------------------------------------------
        #  Get variable out name in netCDF record
        #  -------------------------------------------------------------------
        # Variable record name should follow CMOR table out names
        if variable_cmor_entry not in list(cmor_table['variable_entry'].keys()):
            print(BCOLORS.FAIL)
            print("=====================================================================================")
            print("The entry " + variable_cmor_entry + " could not be found in CMOR table")
            print("=====================================================================================")
            print(BCOLORS.ENDC)
            raise KeyboardInterrupt
        variable_record_name = cmor_table['variable_entry'][variable_cmor_entry]['out_name']
        # Variable id attribute should be the same as variable record name
        # in any case to be CF- and CMIP6-compliant
        variable_id = variable_record_name
        # -------------------------------------------------------------------
        # Create a dictionary of all global attributes
        # -------------------------------------------------------------------
        self.dictGbl = infile.__dict__
        for key, value in list(self.dictGbl.items()):
            cmip6_cv.set_cur_dataset_attribute(key, value)
        # Set member_id attribute depending on sub_experiment_id and variant_label
        member_id = ""
        if "sub_experiment_id" in list(self.dictGbl.keys()):
            if self.dictGbl["sub_experiment_id"] not in ['none']:
                member_id = '{}-{}'.format(self.dictGbl['sub_experiment_id'],
                                           self.dictGbl['variant_label'])
            else:
                member_id = self.dictGbl['variant_label']
        cmip6_cv.set_cur_dataset_attribute(cmip6_cv.GLOBAL_ATT_MEMBER_ID, member_id)
        # -------------------------------------------------------------------
        # Create a dictionary of attributes for the variable
        # -------------------------------------------------------------------
        try:
            self.dictVar = infile.variables[variable_record_name].__dict__
        except BaseException:
            print(BCOLORS.FAIL)
            print("=====================================================================================")
            print("The variable " + variable_record_name + " could not be found in file")
            print("=====================================================================================")
            print(BCOLORS.ENDC)
            raise KeyboardInterrupt

        # -------------------------------------------------------------------
        # Check global attributes
        # -------------------------------------------------------------------
        self.errors += cmip6_cv.check_requiredattributes(table)
        self.errors += cmip6_cv.check_institution(table)
        self.errors += cmip6_cv.check_sourceID(table)
        self.errors += cmip6_cv.check_experiment(table)
        self.errors += cmip6_cv.check_grids(table)
        self.errors += cmip6_cv.check_ISOTime()
        self.errors += cmip6_cv.check_furtherinfourl(table)
        self.errors += cmip6_cv.check_subExpID(table)
        for attr in ['branch_time_in_child', 'branch_time_in_parent']:
            if attr in list(self.dictGbl.keys()):
                self.set_double_value(attr)
                if not isinstance(self.dictGbl[attr], numpy.float64):
                    print(BCOLORS.FAIL)
                    print("=====================================================================================")
                    print("{} is not a double: ".format(attr), type(self.dictGbl[attr]))
                    print("=====================================================================================")
                    print(BCOLORS.ENDC)
                    self.errors += 1
        for attr in ['realization_index', 'initialization_index', 'physics_index', 'forcing_index']:
            if not isinstance(self.dictGbl[attr], numpy.ndarray):
                print(BCOLORS.FAIL)
                print("=====================================================================================")
                print("{} is not an integer: ".format(attr), type(self.dictGbl[attr]))
                print("=====================================================================================")
                print(BCOLORS.ENDC)
                self.errors += 1
        self.errors += cmip6_cv.check_parentExpID(table)
        for attr in ['table_id', 'variable_id']:
            try:
                if locals()[attr] != self.dictGbl[attr]:
                    print(BCOLORS.FAIL)
                    print("=====================================================================================")
                    print("{} attribute is not consistent: ".format(attr), self.dictGbl[attr])
                    print("=====================================================================================")
                    print(BCOLORS.ENDC)
                    self.errors += 1
            except KeyError:
                print(BCOLORS.FAIL)
                print("=====================================================================================")
                print("{} attribute is missing in global attributes".format(attr))
                print("=====================================================================================")
                print(BCOLORS.ENDC)
                self.errors += 1
        # -------------------------------------------------------------------
        # Get time axis properties
        # -------------------------------------------------------------------
        # Get calendar and time units
        try:
            calendar = infile.variables['time'].calendar
            timeunits = infile.variables['time'].units
        except BaseException:
            calendar = "gregorian"
            timeunits = "days since ?"
        # Get first and last time bounds

        climatology = self.is_climatology(filename)
        if climatology:
            if cmip6_table.find('Amon') != -1:
                variable = '{}Clim'.format(variable)

        clim_idx = variable.find('Clim')
        if climatology and clim_idx != -1:
            var = [variable[:clim_idx]]

        try:
            if 'bounds' in list(infile.variables['time'].__dict__.keys()):
                bndsvar = infile.variables['time'].__dict__['bounds']
            elif 'climatology' in list(infile.variables['time'].__dict__.keys()):
                bndsvar = infile.variables['time'].__dict__['climatology']
            else:
                bndsvar = 'time_bnds'
            startimebnds = infile.variables[bndsvar][0][0]
            endtimebnds = infile.variables[bndsvar][-1][1]
        except BaseException:
            startimebnds = 0
            endtimebnds = 0

        try:
            startime = infile.variables['time'][0]
            endtime = infile.variables['time'][-1]
        except BaseException:
            startime = 0
            endtime = 0

        # -------------------------------------------------------------------
        # Setup variable
        # -------------------------------------------------------------------
        varid = cmip6_cv.setup_variable(variable_cmor_entry,
                                        self.dictVar['units'],
                                        self.dictVar['_FillValue'][0],
                                        startime,
                                        endtime,
                                        startimebnds,
                                        endtimebnds)
        if varid == -1:
            print(BCOLORS.FAIL)
            print("=====================================================================================")
            print("Could not find variable {} in table {} ".format(variable_cmor_entry, cmip6_table))
            print("=====================================================================================")
            print(BCOLORS.ENDC)
            raise KeyboardInterrupt
        # -------------------------------------------------------------------
        # Check filename
        # -------------------------------------------------------------------
        self.errors += cmip6_cv.check_filename(table,
                                               varid,
                                               calendar,
                                               timeunits,
                                               filename)
        # -------------------------------------------------------------------
        # Check variable attributes
        # -------------------------------------------------------------------
        cv_attrs = cmip6_cv.list_variable_attributes(varid)
        for key in cv_attrs:
            if key == "long_name":
                continue
            if key == "comment":
                continue
            if key == "cell_measures":
                if cv_attrs[key].find("OPT") != -1 or cv_attrs[key].find("MODEL") != -1:
                    continue
            # Is this attribute in file?
            if key in list(self.dictVar.keys()):
                # Verify that attribute value is equal to file attribute
                table_value = cv_attrs[key]
                file_value = self.dictVar[key]
                # PrePARE accept units of 1 or 1.0 so adjust the table_value
                if key == "units":
                    if (table_value == "1") and (file_value == "1.0"):
                        table_value = "1.0"
                    if (table_value == "1.0") and (file_value == "1"):
                        table_value = "1"
                if isinstance(table_value, str) and isinstance(file_value, numpy.ndarray):
                    if numpy.array([int(value) for value in table_value.split()] == file_value).all():
                        file_value = True
                        table_value = True
                if isinstance(table_value, numpy.ndarray):
                    table_value = table_value[0]
                if isinstance(file_value, numpy.ndarray):
                    file_value = file_value[0]
                if isinstance(table_value, float):
                    if abs(table_value - file_value) <= 0.00001 * abs(table_value):
                        table_value = file_value
                if key == "cell_methods":
                    idx = file_value.find(" (")
                    if idx != -1:
                        file_value = file_value[:idx]
                        table_value = table_value[:idx]
                if key == "cell_measures":
                    pattern = re.compile('(?P<param>[\w.-]+): (?P<val1>[\w.-]+) OR (?P<val2>[\w.-]+)')
                    values = re.findall(pattern, table_value)
                    table_values = [""]  # Empty string is allowed in case of useless attribute
                    if values:
                        tmp = dict()
                        for param, val1, val2 in values:
                            tmp[param] = [str('{}: {}'.format(param, val1)), str('{}: {}'.format(param, val2))]
                        table_values.extend([' '.join(i) for i in list(itertools.product(*list(tmp.values())))])
                        if str(file_value) not in list(map(str, table_values)):
                            print(BCOLORS.FAIL)
                            print("=====================================================================================")
                            print("Your file contains \"" + key + "\":\"" + str(file_value) + "\" and")
                            print("CMIP6 tables requires \"" + key + "\":\"" + str(table_value) + "\".")
                            print("=====================================================================================")
                            print(BCOLORS.ENDC)
                            self.errors += 1
                        continue

                if str(table_value) != str(file_value):
                    print(BCOLORS.FAIL)
                    print("=====================================================================================")
                    print("Your file contains \"" + key + "\":\"" + str(file_value) + "\" and")
                    print("CMIP6 tables requires \"" + key + "\":\"" + str(table_value) + "\".")
                    print("=====================================================================================")
                    print(BCOLORS.ENDC)
                    self.errors += 1
            else:
                # That attribute is not in the file
                table_value = cv_attrs[key]
                if isinstance(table_value, numpy.ndarray):
                    table_value = table_value[0]
                if isinstance(table_value, float):
                    table_value = "{0:.2g}".format(table_value)
                print(BCOLORS.FAIL)
                print("=====================================================================================")
                print("CMIP6 variable " + variable + " requires \"" + key + "\":\"" + str(table_value) + "\".")
                print("=====================================================================================")
                print(BCOLORS.ENDC)
                self.errors += 1
        # Print final message
        if self.errors != 0:
            print(BCOLORS.FAIL + "└──> :: CV FAIL    :: {}".format(ncfile) + BCOLORS.ENDC)
            raise KeyboardInterrupt
        elif print_all:
            print(BCOLORS.OKGREEN + "     :: CV SUCCESS :: {}".format(ncfile) + BCOLORS.ENDC)