def test_coverage_recovery(self):
        # Create the coverage
        dp_id, stream_id, route, stream_def_id, dataset_id = self.load_data_product()
        self.populate_dataset(dataset_id, 36)
        dset = self.dataset_management.read_dataset(dataset_id)
        dprod = self.dpsc_cli.read_data_product(dp_id)
        cov = DatasetManagementService._get_simplex_coverage(dataset_id)
        cov_pth = cov.persistence_dir
        cov.close()

        # Analyze the valid coverage
        dr = CoverageDoctor(cov_pth, dprod, dset)
        dr_result = dr.analyze()

        # Get original values (mock)
        orig_cov = AbstractCoverage.load(cov_pth)
        time_vals_orig = orig_cov.get_time_values()

        # TODO: Destroy the metadata files

        # TODO: RE-analyze coverage

        # TODO: Should be corrupt, take action to repair if so

        # Repair the metadata files
        dr.repair_metadata()

        # TODO: Re-analyze fixed coverage

        fixed_cov = AbstractCoverage.load(cov_pth)
        self.assertIsInstance(fixed_cov, AbstractCoverage)

        time_vals_fixed = fixed_cov.get_time_values()
        self.assertTrue(np.array_equiv(time_vals_orig, time_vals_fixed))
    def test_coverage_recovery(self):
        # Create the coverage
        dp_id, stream_id, route, stream_def_id, dataset_id = self.load_data_product()
        self.populate_dataset(dataset_id, 36)
        dset = self.dataset_management.read_dataset(dataset_id)
        dprod = self.dpsc_cli.read_data_product(dp_id)
        cov = DatasetManagementService._get_simplex_coverage(dataset_id)
        cov_pth = cov.persistence_dir
        cov.close()

        # Analyze the valid coverage
        dr = CoverageDoctor(cov_pth, dprod, dset)

        dr_result = dr.analyze()

        # TODO: Turn these into meaningful Asserts
        self.assertEqual(len(dr_result.get_brick_corruptions()), 0)
        self.assertEqual(len(dr_result.get_brick_size_ratios()), 8)
        self.assertEqual(len(dr_result.get_corruptions()), 0)
        self.assertEqual(len(dr_result.get_master_corruption()), 0)
        self.assertEqual(len(dr_result.get_param_corruptions()), 0)
        self.assertEqual(len(dr_result.get_param_size_ratios()), 64)
        self.assertEqual(len(dr_result.get_master_size_ratio()), 1)
        self.assertEqual(len(dr_result.get_size_ratios()), 73)
        self.assertEqual(dr_result.master_status[1], 'NORMAL')

        self.assertFalse(dr_result.is_corrupt)
        self.assertEqual(dr_result.param_file_count, 64)
        self.assertEqual(dr_result.brick_file_count, 8)
        self.assertEqual(dr_result.total_file_count, 73)

        # Get original values (mock)
        orig_cov = AbstractCoverage.load(cov_pth)
        time_vals_orig = orig_cov.get_time_values()
        orig_cov.close()

        # Corrupt the Master File
        fo = open(cov._persistence_layer.master_manager.file_path, "wb")
        fo.write('Junk')
        fo.close()
        # Corrupt the lon Parameter file
        fo = open(cov._persistence_layer.parameter_metadata['lon'].file_path, "wb")
        fo.write('Junk')
        fo.close()

        corrupt_res = dr.analyze(reanalyze=True)
        self.assertTrue(corrupt_res.is_corrupt)

        # Repair the metadata files
        dr.repair(reanalyze=True)

        fixed_res = dr.analyze(reanalyze=True)
        self.assertFalse(fixed_res.is_corrupt)

        fixed_cov = AbstractCoverage.load(cov_pth)
        self.assertIsInstance(fixed_cov, AbstractCoverage)

        time_vals_fixed = fixed_cov.get_time_values()
        fixed_cov.close()
        self.assertTrue(np.array_equiv(time_vals_orig, time_vals_fixed))
 def _splice_coverage(cls, dataset_id, scov):
     file_root = FileSystem.get_url(FS.CACHE,'datasets')
     vcov = cls._get_coverage(dataset_id,mode='a')
     scov_pth = scov.persistence_dir
     if isinstance(vcov.reference_coverage, SimplexCoverage):
         ccov = ComplexCoverage(file_root, uuid4().hex, 'Complex coverage for %s' % dataset_id, 
                 reference_coverage_locs=[vcov.head_coverage_path,],
                 parameter_dictionary=ParameterDictionary(),
                 complex_type=ComplexCoverageType.TEMPORAL_AGGREGATION)
         log.info('Creating Complex Coverage: %s', ccov.persistence_dir)
         ccov.append_reference_coverage(scov_pth)
         ccov_pth = ccov.persistence_dir
         ccov.close()
         vcov.replace_reference_coverage(ccov_pth)
     elif isinstance(vcov.reference_coverage, ComplexCoverage):
         log.info('Appending simplex coverage to complex coverage')
         #vcov.reference_coverage.append_reference_coverage(scov_pth)
         dir_path = vcov.reference_coverage.persistence_dir
         vcov.close()
         ccov = AbstractCoverage.load(dir_path, mode='a')
         ccov.append_reference_coverage(scov_pth)
         ccov.refresh()
         ccov.close()
     vcov.refresh()
     vcov.close()
Beispiel #4
0
 def _splice_coverage(cls, dataset_id, scov):
     file_root = FileSystem.get_url(FS.CACHE, 'datasets')
     vcov = cls._get_coverage(dataset_id, mode='a')
     scov_pth = scov.persistence_dir
     if isinstance(vcov.reference_coverage, SimplexCoverage):
         ccov = ComplexCoverage(
             file_root,
             uuid4().hex,
             'Complex coverage for %s' % dataset_id,
             reference_coverage_locs=[
                 vcov.head_coverage_path,
             ],
             parameter_dictionary=ParameterDictionary(),
             complex_type=ComplexCoverageType.TEMPORAL_AGGREGATION)
         log.info('Creating Complex Coverage: %s', ccov.persistence_dir)
         ccov.append_reference_coverage(scov_pth)
         ccov_pth = ccov.persistence_dir
         ccov.close()
         vcov.replace_reference_coverage(ccov_pth)
     elif isinstance(vcov.reference_coverage, ComplexCoverage):
         log.info('Appending simplex coverage to complex coverage')
         #vcov.reference_coverage.append_reference_coverage(scov_pth)
         dir_path = vcov.reference_coverage.persistence_dir
         vcov.close()
         ccov = AbstractCoverage.load(dir_path, mode='a')
         ccov.append_reference_coverage(scov_pth)
         ccov.refresh()
         ccov.close()
     vcov.refresh()
     vcov.close()
 def _get_nonview_coverage(cls, dataset_id, mode='r'):
     cov = cls._get_coverage(dataset_id, mode)
     if isinstance(cov, ViewCoverage):
         rcov = cov.reference_coverage
         pdir = rcov.persistence_dir
         rcov = None
         cov.close()
         cov = AbstractCoverage.load(pdir, mode=mode)
     return cov
Beispiel #6
0
 def _get_nonview_coverage(cls, dataset_id, mode='r'):
     cov = cls._get_coverage(dataset_id, mode)
     if isinstance(cov, ViewCoverage):
         rcov = cov.reference_coverage
         pdir = rcov.persistence_dir
         rcov = None
         cov.close()
         cov = AbstractCoverage.load(pdir, mode=mode)
     return cov
    def fill_temporal_gap(self, dataset_id, gap_coverage_path=None, gap_coverage_id=None):
        if gap_coverage_path is None and gap_coverage_id is None:
            raise ValueError('Must specify either \'gap_coverage_path\' or \'gap_coverage_id\'')

        if gap_coverage_path is None:
            gap_coverage_path = self.get_coverage_path(gap_coverage_id)

        from coverage_model import AbstractCoverage
        gap_cov = AbstractCoverage.load(gap_coverage_path)

        self.pause_ingestion(self.get_stream_id(dataset_id))
        DatasetManagementService._splice_coverage(dataset_id, gap_cov)
 def _get_coverage(cls, dataset_id, mode='w'):
     file_root = FileSystem.get_url(FS.CACHE, 'datasets')
     coverage = AbstractCoverage.load(file_root, dataset_id, mode=mode)
     return coverage
    def get_dataset_xml(self,
                        coverage_path,
                        product_id,
                        product_name='',
                        available_fields=None):
        #http://coastwatch.pfeg.noaa.gov/erddap/download/setupDatasetsXml.html
        result = ''
        paths = os.path.split(coverage_path)
        cov = AbstractCoverage.load(coverage_path)
        doc = xml.dom.minidom.Document()

        #erd_type_map = {'d':'double', 'f':"float", 'h':'short', 'i':'int', 'l':'int', 'q':'int', 'b':'byte', 'b':'char', 'S':'String'}

        #Get lists of variables with unique sets of dimensions.
        #Datasets can only have variables with the same sets of dimensions

        if not cov.list_parameters():
            raise BadRequest(
                'Attempting to register an empty dataset. The coverage (%s) has no definition.\n%s'
                % (coverage_path, cov))

        datasets = {}
        for key in cov.list_parameters():
            pc = cov.get_parameter_context(key)
            #if getattr(pc, 'visible', None):
            #    continue
            if available_fields and pc.name not in available_fields:
                continue
            #if not isinstance(pc.param_type, QuantityType):
            #    continue

            param = cov.get_parameter(key)
            dims = (cov.temporal_parameter_name, )
            if len(param.shape) == 2:
                dims = (cov.temporal_parameter_name,
                        cov.spatial_domain.shape.name)

            if not dims in datasets.keys():
                datasets[dims] = []

            datasets[dims].append(key)

        index = 0
        if not datasets:
            raise BadRequest(
                'Attempting to register a dimensionless dataset. The coverage (%s) has no dimension(s).\n%s'
                % (coverage_path, cov))

        for dims, vars in datasets.iteritems():
            erd_name_map = self.get_errdap_name_map(vars)

            if len(vars) == 1:
                raise BadRequest(
                    'A dataset needs a proper range, not just the temporal dimension. %s\n%s'
                    % (coverage_path, cov))

            dataset_element = doc.createElement('dataset')
            #dataset_element.setAttribute('type', 'EDDGridFromDap')
            dataset_element.setAttribute('type', 'EDDTableFromDapSequence')
            dataset_element.setAttribute('datasetID', 'data' + product_id)
            dataset_element.setAttribute('active', 'True')

            source_element = doc.createElement('sourceUrl')
            text_node = doc.createTextNode(self.pydap_url + paths[1])
            source_element.appendChild(text_node)
            dataset_element.appendChild(source_element)

            reload_element = doc.createElement('reloadEveryNMinutes')
            if self.CFG.get_safe('server.erddap.dataset_caching', True):
                text_node = doc.createTextNode('1440')
            else:
                text_node = doc.createTextNode('5')
            reload_element.appendChild(text_node)
            dataset_element.appendChild(reload_element)

            outer_element = doc.createElement('outerSequenceName')
            text_node = doc.createTextNode('data')
            outer_element.appendChild(text_node)
            dataset_element.appendChild(outer_element)

            default_element = doc.createElement('defaultDataQuery')
            text_node = doc.createTextNode('&time>=1970-01-01')
            default_element.appendChild(text_node)
            dataset_element.appendChild(default_element)

            # No longer applicable
            #if self.CFG.get_safe('server.erddap.dataset_caching',True):
            #refresh_interval = self.CFG.get_safe('server.erddap.refresh_interval', 30000)
            #update_element = doc.createElement('updateEveryNMillis')
            #text_node = doc.createTextNode(str(refresh_interval))
            #update_element.appendChild(text_node)
            #dataset_element.appendChild(update_element)

            add_attributes_element = doc.createElement('addAttributes')

            atts = {}
            atts['title'] = product_name or urllib.unquote(cov.name)
            atts['infoUrl'] = self.ux_url + 'DataProduct/face/' + product_id
            atts['institution'] = 'OOI'
            atts[
                'Conventions'] = "COARDS, CF-1.6, Unidata Dataset Discovery v1.0"
            atts[
                'license'] = '''These data were collected by the Ocean Observatory Initiative (OOI) project purely for internal system development purposes during the construction phase of the project and are offered for release to the public with no assurance of data quality, consistency, temporal continuity or additional support. The OOI Program assumes no liability resulting from the use of these data for other than the intended purpose. No data quality assurance steps have been implemented on this data to date.'''
            atts['summary'] = cov.name
            atts['cdm_data_type'] = 'Other'
            atts['standard_name_vocabulary'] = 'CF-12'

            for key, val in atts.iteritems():
                self.xml_attr(doc, add_attributes_element, key, val)

            if len(add_attributes_element.childNodes) > 0:
                dataset_element.appendChild(add_attributes_element)

            for var_name in vars:
                var = cov.get_parameter_context(var_name)
                if re.match(r'.*_[a-z0-9]{32}', var.name):
                    continue  # Let's not do this

                #if len(param.shape) >=1 and not param.is_coordinate: #dataVariable
                data_element = doc.createElement('dataVariable')
                source_name_element = doc.createElement('sourceName')
                text_node = doc.createTextNode(var.name)
                source_name_element.appendChild(text_node)
                data_element.appendChild(source_name_element)

                destination_name_element = doc.createElement('destinationName')
                text_node = doc.createTextNode(erd_name_map[var.name])
                destination_name_element.appendChild(text_node)
                data_element.appendChild(destination_name_element)

                add_attributes_element = doc.createElement('addAttributes')
                units = "unknown"
                if hasattr(var, 'uom') and var.uom:
                    units = var.uom
                self.xml_attr(doc, add_attributes_element, 'units', units)
                #if var.ATTRS is not None:
                #for key in var.ATTRS:
                #if not hasattr(var,key):
                #continue
                #val = getattr(var,key)
                #if not val:
                #val = ''
                #att_element = doc.createElement('att')
                #att_element.setAttribute('name', key)
                #text_node = doc.createTextNode(val)
                #att_element.appendChild(text_node)
                #add_attributes_element.appendChild(att_element)

                ioos_cat = self.get_ioos_category(var.name, units)
                self.xml_attr(doc, add_attributes_element, 'ioos_category',
                              ioos_cat)

                if hasattr(var,
                           'display_name') and var.display_name is not None:
                    self.xml_attr(doc, add_attributes_element, 'long_name',
                                  var.display_name)

                if hasattr(var,
                           'standard_name') and var.standard_name is not None:
                    self.xml_attr(doc, add_attributes_element, 'standard_name',
                                  var.standard_name)

                if 'seconds' in units and 'since' in units:
                    self.xml_attr(doc, add_attributes_element,
                                  'time_precision', '1970-01-01T00:00:00.000Z')

                if hasattr(var, 'ooi_short_name') and var.ooi_short_name:
                    sname = var.ooi_short_name
                    sname = re.sub('[\t\n ]+', ' ', sname)
                    self.xml_attr(doc, add_attributes_element,
                                  'ooi_short_name', sname)

                    m = re.match(r'[A-Z0-9]{7}', sname)
                    if m:
                        reference_url = 'https://confluence.oceanobservatories.org/display/instruments/' + m.group(
                        )
                        self.xml_attr(doc, add_attributes_element,
                                      'references', reference_url)

                    if 'L2' in var.ooi_short_name:
                        self.xml_attr(doc, add_attributes_element,
                                      'data_product_level', 'L2')
                        self.xml_attr(doc, add_attributes_element, 'source',
                                      'level 2 calibrated sensor observation')
                    elif 'L1' in var.ooi_short_name:
                        self.xml_attr(doc, add_attributes_element,
                                      'data_product_level', 'L1')
                        self.xml_attr(doc, add_attributes_element, 'source',
                                      'level 1 calibrated sensor observation')
                    elif 'L0' in var.ooi_short_name:
                        self.xml_attr(doc, add_attributes_element,
                                      'data_product_level', 'L0')
                        self.xml_attr(doc, add_attributes_element, 'source',
                                      'sensor observation')
                    elif 'QC' in var.ooi_short_name:
                        self.xml_attr(doc, add_attributes_element,
                                      'data_product_level', 'QC')

                elif not isinstance(var.param_type, ParameterFunctionType):
                    self.xml_attr(doc, add_attributes_element,
                                  'ooi_short_name', var.name)
                    if units == 'counts':
                        self.xml_attr(doc, add_attributes_element,
                                      'data_product_level', 'L0')
                        self.xml_attr(doc, add_attributes_element, 'source',
                                      'sensor observation')
                    elif 'seconds' in units and 'since' in units:
                        self.xml_attr(doc, add_attributes_element,
                                      'data_product_level', 'axis')
                    else:
                        self.xml_attr(doc, add_attributes_element,
                                      'data_product_level', 'unknown')

                if hasattr(var, 'reference_urls') and var.reference_urls:
                    if isinstance(var.reference_urls, list):
                        references = ','.join(var.reference_urls)
                    else:
                        references = var.reference_urls
                    self.xml_attr(doc, add_attributes_element,
                                  'instrument_type', references)

                if isinstance(var.param_type, ParameterFunctionType):
                    if isinstance(var.function, PythonFunction):
                        self.xml_attr(doc, add_attributes_element,
                                      'function_module', var.function.owner
                                      or '')
                        self.xml_attr(doc, add_attributes_element,
                                      'function_name', var.function.func_name
                                      or '')
                        if var.function.owner.startswith('ion_functions'):
                            s = var.function.owner
                            url = s.replace('.', '/') + '.py'
                            url = 'https://github.com/ooici/ion-functions/blob/master/' + url
                            self.xml_attr(doc, add_attributes_element,
                                          'function_url', url)
                        elif var.function.egg_uri:
                            self.xml_attr(doc, add_attributes_element,
                                          'function_url', var.function.egg_uri
                                          or '')
                    elif isinstance(var.function, NumexprFunction):
                        self.xml_attr(doc, add_attributes_element,
                                      'function_name', var.function.name or '')

                data_element.appendChild(add_attributes_element)
                dataset_element.appendChild(data_element)

            index += 1
            #bug with prettyxml
            #http://ronrothman.com/public/leftbraned/xml-dom-minidom-toprettyxml-and-silly-whitespace/
            result += dataset_element.toprettyxml() + '\n'
            #result += dataset_element.toxml() + '\n'

        cov.close()

        if not result:
            log.error(
                "Attempted to register empty dataset\nDims: %s\nDatasets: %s",
                dims, datasets)

        return result
 def _get_coverage(cls,dataset_id,mode='r'):
     file_root = FileSystem.get_url(FS.CACHE,'datasets')
     coverage = AbstractCoverage.load(file_root, dataset_id, mode=mode)
     return coverage
    def get_dataset_xml(self, coverage_path, product_id, product_name='', available_fields=None):
        #http://coastwatch.pfeg.noaa.gov/erddap/download/setupDatasetsXml.html
        result = ''
        paths = os.path.split(coverage_path)
        cov = AbstractCoverage.load(coverage_path)
        doc = xml.dom.minidom.Document()
        
        #erd_type_map = {'d':'double', 'f':"float", 'h':'short', 'i':'int', 'l':'int', 'q':'int', 'b':'byte', 'b':'char', 'S':'String'} 
        
        #Get lists of variables with unique sets of dimensions.
        #Datasets can only have variables with the same sets of dimensions

        if not cov.list_parameters():
            raise BadRequest('Attempting to register an empty dataset. The coverage (%s) has no definition.\n%s' %(coverage_path, cov))

        datasets = {}
        for key in cov.list_parameters():
            pc = cov.get_parameter_context(key)
            #if getattr(pc, 'visible', None):
            #    continue
            if available_fields and pc.name not in available_fields:
                continue
            #if not isinstance(pc.param_type, QuantityType):
            #    continue

            param = cov.get_parameter(key)
            dims = (cov.temporal_parameter_name,)
            if len(param.shape) == 2:
                dims = (cov.temporal_parameter_name, cov.spatial_domain.shape.name)

            if not dims in datasets.keys():
                datasets[dims] = []
            

            datasets[dims].append(key)
        

        index = 0
        if not datasets:
            raise BadRequest('Attempting to register a dimensionless dataset. The coverage (%s) has no dimension(s).\n%s' %( coverage_path, cov))
        
        for dims, vars in datasets.iteritems():
            erd_name_map = self.get_errdap_name_map(vars) 
            
            if len(vars)==1:
                raise BadRequest('A dataset needs a proper range, not just the temporal dimension. %s\n%s' %( coverage_path, cov))

            dataset_element = doc.createElement('dataset')
            #dataset_element.setAttribute('type', 'EDDGridFromDap')
            dataset_element.setAttribute('type', 'EDDTableFromDapSequence')
            dataset_element.setAttribute('datasetID', 'data' + product_id)
            dataset_element.setAttribute('active', 'True')

            source_element = doc.createElement('sourceUrl')
            text_node = doc.createTextNode(self.pydap_url + paths[1])
            source_element.appendChild(text_node)
            dataset_element.appendChild(source_element)

            reload_element = doc.createElement('reloadEveryNMinutes')
            if self.CFG.get_safe('server.erddap.dataset_caching',True):
                text_node = doc.createTextNode('1440')
            else:
                text_node = doc.createTextNode('5')
            reload_element.appendChild(text_node)
            dataset_element.appendChild(reload_element)
            
            outer_element = doc.createElement('outerSequenceName')
            text_node = doc.createTextNode('data')
            outer_element.appendChild(text_node)
            dataset_element.appendChild(outer_element)

            default_element = doc.createElement('defaultDataQuery')
            text_node = doc.createTextNode('&time>=1970-01-01')
            default_element.appendChild(text_node)
            dataset_element.appendChild(default_element)

            # No longer applicable
            #if self.CFG.get_safe('server.erddap.dataset_caching',True):
                #refresh_interval = self.CFG.get_safe('server.erddap.refresh_interval', 30000)
                #update_element = doc.createElement('updateEveryNMillis')
                #text_node = doc.createTextNode(str(refresh_interval))
                #update_element.appendChild(text_node)
                #dataset_element.appendChild(update_element)
            

            add_attributes_element = doc.createElement('addAttributes')

            atts = {}
            atts['title'] = product_name or urllib.unquote(cov.name)
            atts['infoUrl'] = self.ux_url + 'DataProduct/face/' + product_id
            atts['institution'] = 'OOI'
            atts['Conventions'] = "COARDS, CF-1.6, Unidata Dataset Discovery v1.0"
            atts['license'] = '''These data were collected by the Ocean Observatory Initiative (OOI) project purely for internal system development purposes during the construction phase of the project and are offered for release to the public with no assurance of data quality, consistency, temporal continuity or additional support. The OOI Program assumes no liability resulting from the use of these data for other than the intended purpose. No data quality assurance steps have been implemented on this data to date.'''
            atts['summary'] = cov.name
            atts['cdm_data_type'] = 'Other'
            atts['standard_name_vocabulary'] = 'CF-12'
            
            for key, val in atts.iteritems():
                self.xml_attr(doc, add_attributes_element, key, val)

            if len(add_attributes_element.childNodes) > 0:
                dataset_element.appendChild(add_attributes_element)

            for var_name in vars:
                var = cov.get_parameter_context(var_name)
                if re.match(r'.*_[a-z0-9]{32}', var.name):
                    continue # Let's not do this
                


                #if len(param.shape) >=1 and not param.is_coordinate: #dataVariable
                data_element = doc.createElement('dataVariable')
                source_name_element = doc.createElement('sourceName')
                text_node = doc.createTextNode(var.name)
                source_name_element.appendChild(text_node)
                data_element.appendChild(source_name_element)

                destination_name_element = doc.createElement('destinationName')
                text_node = doc.createTextNode(erd_name_map[var.name])
                destination_name_element.appendChild(text_node)
                data_element.appendChild(destination_name_element)
                
                add_attributes_element = doc.createElement('addAttributes')
                units = "unknown"
                if hasattr(var,'uom') and var.uom:
                    units = var.uom
                self.xml_attr(doc, add_attributes_element, 'units', units)
                #if var.ATTRS is not None:
                    #for key in var.ATTRS:
                        #if not hasattr(var,key):
                            #continue
                        #val = getattr(var,key)
                        #if not val:
                            #val = ''
                        #att_element = doc.createElement('att')
                        #att_element.setAttribute('name', key)
                        #text_node = doc.createTextNode(val)
                        #att_element.appendChild(text_node)
                        #add_attributes_element.appendChild(att_element)

                ioos_cat = self.get_ioos_category(var.name, units)
                self.xml_attr(doc, add_attributes_element, 'ioos_category', ioos_cat)

                if hasattr(var,'display_name') and var.display_name is not None:
                    self.xml_attr(doc, add_attributes_element, 'long_name', var.display_name)
                
                if hasattr(var,'standard_name') and var.standard_name is not None:
                    self.xml_attr(doc, add_attributes_element, 'standard_name', var.standard_name)

                if 'seconds' in units and 'since' in units:
                    self.xml_attr(doc, add_attributes_element, 'time_precision', '1970-01-01T00:00:00.000Z')

                if hasattr(var, 'ooi_short_name') and var.ooi_short_name:
                    sname = var.ooi_short_name
                    sname = re.sub('[\t\n ]+', ' ', sname)
                    self.xml_attr(doc, add_attributes_element, 'ooi_short_name', sname)

                    m = re.match(r'[A-Z0-9]{7}', sname)
                    if m:
                        reference_url = 'https://confluence.oceanobservatories.org/display/instruments/' + m.group()
                        self.xml_attr(doc, add_attributes_element, 'references', reference_url)



                    if 'L2' in var.ooi_short_name:
                        self.xml_attr(doc, add_attributes_element, 'data_product_level', 'L2')
                        self.xml_attr(doc, add_attributes_element, 'source', 'level 2 calibrated sensor observation')
                    elif 'L1' in var.ooi_short_name:
                        self.xml_attr(doc, add_attributes_element, 'data_product_level', 'L1')
                        self.xml_attr(doc, add_attributes_element, 'source', 'level 1 calibrated sensor observation')
                    elif 'L0' in var.ooi_short_name:
                        self.xml_attr(doc, add_attributes_element, 'data_product_level', 'L0')
                        self.xml_attr(doc, add_attributes_element, 'source', 'sensor observation')
                    elif 'QC' in var.ooi_short_name:
                        self.xml_attr(doc, add_attributes_element, 'data_product_level', 'QC')

                elif not isinstance(var.param_type, ParameterFunctionType):
                    self.xml_attr(doc, add_attributes_element, 'ooi_short_name', var.name)
                    if units == 'counts':
                        self.xml_attr(doc, add_attributes_element, 'data_product_level', 'L0')
                        self.xml_attr(doc, add_attributes_element, 'source', 'sensor observation')
                    elif 'seconds' in units and 'since' in units:
                        self.xml_attr(doc, add_attributes_element, 'data_product_level', 'axis')
                    else:
                        self.xml_attr(doc, add_attributes_element, 'data_product_level', 'unknown')

                  
                if hasattr(var, 'reference_urls') and var.reference_urls:
                    if isinstance(var.reference_urls, list):
                        references = ','.join(var.reference_urls)
                    else:
                        references = var.reference_urls
                    self.xml_attr(doc, add_attributes_element, 'instrument_type', references)


                if isinstance(var.param_type, ParameterFunctionType):
                    if isinstance(var.function, PythonFunction):
                        self.xml_attr(doc, add_attributes_element, 'function_module', var.function.owner or '')
                        self.xml_attr(doc, add_attributes_element, 'function_name', var.function.func_name or '')
                        if var.function.owner.startswith('ion_functions'):
                            s = var.function.owner
                            url = s.replace('.','/') + '.py'
                            url = 'https://github.com/ooici/ion-functions/blob/master/' + url
                            self.xml_attr(doc, add_attributes_element, 'function_url', url)
                        elif var.function.egg_uri:
                            self.xml_attr(doc, add_attributes_element, 'function_url', var.function.egg_uri or '')
                    elif isinstance(var.function, NumexprFunction):
                        self.xml_attr(doc, add_attributes_element, 'function_name', var.function.name or '')

                data_element.appendChild(add_attributes_element)
                dataset_element.appendChild(data_element)

            index += 1
            #bug with prettyxml
            #http://ronrothman.com/public/leftbraned/xml-dom-minidom-toprettyxml-and-silly-whitespace/
            result += dataset_element.toprettyxml() + '\n'
            #result += dataset_element.toxml() + '\n'

        cov.close()

        if not result:
            log.error("Attempted to register empty dataset\nDims: %s\nDatasets: %s", dims, datasets)


        return result
Beispiel #12
0
    def repair(
        self,
        backup=True,
        copy_over=True,
        keep_temp=False,
        reanalyze=False,
        analyze_bricks=False,
        detailed_analysis=False,
    ):
        """
        Heavy repair tool that recreates a blank persisted Coverage from the broken coverage's
        original construction parameters, then reconstructs the Master and Parameter metadata
        files by inspection of the ION objects and "valid" brick files.
        @return:
        """
        if self._ar is None or reanalyze:
            self._ar = self._do_analysis(analyze_bricks=analyze_bricks, detailed_analysis=detailed_analysis)

        if self._ar.is_corrupt:
            if len(self._ar.get_brick_corruptions()) > 0:
                raise NotImplementedError("Brick corruption.  Cannot repair at this time!!!")
            else:
                # Repair the Master and Parameter metadata files

                # Need the ParameterDictionary, TemporalDomain and SpatialDomain
                pdict = ParameterDictionary.load(self._dso.parameter_dictionary)
                tdom = GridDomain.load(self._dso.temporal_domain)
                sdom = GridDomain.load(self._dso.spatial_domain)

                # Set up the working directory for the recovered coverage
                tempcov_dir = tempfile.mkdtemp("covs")

                # Create the temporary Coverage
                tempcov = SimplexCoverage(
                    root_dir=tempcov_dir,
                    persistence_guid=self._guid,
                    name=self._guid,
                    parameter_dictionary=pdict,
                    spatial_domain=sdom,
                    temporal_domain=tdom,
                )
                # Handle to persistence layer for tempcov
                pl = tempcov._persistence_layer

                # Set up the original and temporary coverage path strings
                orig_dir = os.path.join(self.cov_pth, self._guid)
                temp_dir = os.path.join(tempcov.persistence_dir, tempcov.persistence_guid)

                # Insert same number of timesteps into temporary coverage as in broken coverage
                brick_domains_new, new_brick_list, brick_list_spans, tD, bD, min_data_bound, max_data_bound = self.inspect_bricks(
                    self.cov_pth, self._guid, "time"
                )
                empty_cov = (
                    brick_list_spans is None
                )  # If None, there are no brick files --> no timesteps, empty coverage!
                if not empty_cov:  # If None, there are no brick files --> no timesteps, empty coverage!
                    bls = [s.value for s in brick_list_spans]
                    maxes = [sum(b[3]) for b in new_brick_list.values()]

                    # Replace metadata is the Master file
                    pl.master_manager.brick_domains = brick_domains_new
                    pl.master_manager.brick_list = new_brick_list

                    # Repair ExternalLinks to brick files
                    with HDFLockingFile(pl.master_manager.file_path, "r+") as f:
                        for param_name in pdict.keys():
                            del f[param_name]
                            f.create_group(param_name)
                    for param_name in pdict.keys():
                        for brick in bls:
                            link_path = "/{0}/{1}".format(param_name, brick[0])
                            brick_file_name = "{0}.hdf5".format(brick[0])
                            brick_rel_path = os.path.join(
                                pl.parameter_metadata[param_name].root_dir.replace(tempcov.persistence_dir, "."),
                                brick_file_name,
                            )
                            log.debug("link_path: %s", link_path)
                            log.debug("brick_rel_path: %s", brick_rel_path)
                            pl.master_manager.add_external_link(link_path, brick_rel_path, brick[0])

                pl.flush_values()
                pl.flush()
                tempcov.close()

                # Remove 'rtree' dataset from Master file if it already exists (post domain expansion)
                # to make way for reconstruction
                with HDFLockingFile(pl.master_manager.file_path, "r+") as f:
                    if "rtree" in f.keys():
                        del f["rtree"]

                # Reconstruct 'rtree' dataset
                # Open temporary Coverage and PersistenceLayer objects
                fixed_cov = AbstractCoverage.load(tempcov.persistence_dir, mode="r+")
                pl_fixed = fixed_cov._persistence_layer

                # Call update_rtree for each brick using PersistenceLayer builtin
                brick_count = 0

                if not empty_cov:
                    for brick in bls:
                        rtree_extents, brick_extents, brick_active_size = pl_fixed.calculate_extents(
                            brick[1][1], bD, tD
                        )
                        pl_fixed.master_manager.update_rtree(brick_count, rtree_extents, obj=brick[0])
                        brick_count += 1

                # Update parameter_bounds property based on each parameter's brick data using deep inspection
                valid_bounds_types = ["BooleanType", "ConstantType", "QuantityType", "ConstantRangeType"]

                if not empty_cov:
                    for param in pdict.keys():
                        if pdict.get_context(param).param_type.__class__.__name__ in valid_bounds_types:
                            brick_domains_new, new_brick_list, brick_list_spans, tD, bD, min_data_bound, max_data_bound = self.inspect_bricks(
                                self.cov_pth, self._guid, param
                            )
                            # Update the metadata
                            pl_fixed.update_parameter_bounds(param, [min_data_bound, max_data_bound])
                pl_fixed.flush()
                fixed_cov.close()

                # Create backup copy of original Master and Parameter files
                if backup:
                    import datetime

                    orig_master_file = os.path.join(self.cov_pth, "{0}_master.hdf5".format(self._guid))

                    # Generate the timestamp
                    tstamp_format = "%Y%m%d%H%M%S"
                    tstamp = datetime.datetime.now().strftime(tstamp_format)

                    backup_master_file = os.path.join(self.cov_pth, "{0}_master.{1}.hdf5".format(self._guid, tstamp))

                    shutil.copy2(orig_master_file, backup_master_file)

                    for param in pdict.keys():
                        param_orig = os.path.join(orig_dir, param, "{0}.hdf5".format(param))
                        param_backup = os.path.join(orig_dir, param, "{0}.{1}.hdf5".format(param, tstamp))
                        shutil.copy2(param_orig, param_backup)

                # Copy Master and Parameter metadata files back to original/broken coverage (cov_pth) location
                if copy_over == True:
                    shutil.copy2(
                        os.path.join(tempcov.persistence_dir, "{0}_master.hdf5".format(self._guid)),
                        os.path.join(self.cov_pth, "{0}_master.hdf5".format(self._guid)),
                    )
                    for param in pdict.keys():
                        shutil.copy2(
                            os.path.join(temp_dir, param, "{0}.hdf5".format(param)),
                            os.path.join(orig_dir, param, "{0}.hdf5".format(param)),
                        )

                # Reanalyze the repaired coverage
                self._ar = self._do_analysis(analyze_bricks=True)

                # Verify repair worked, clean up if not
                if self._ar.is_corrupt:
                    # If the files were backed up then revert
                    if backup:
                        # Remove backed up files and clean up the repair attempt
                        log.info("Repair attempt failed.  Reverting to pre-repair state.")
                        # Use backup copy to replace post-repair file.
                        shutil.copy2(backup_master_file, orig_master_file)
                        # Delete the backup
                        os.remove(backup_master_file)

                        # Iterate over parameters and revert to pre-repair state
                        for param in pdict.keys():
                            param_orig = os.path.join(orig_dir, param, "{0}.hdf5".format(param))
                            param_backup = os.path.join(orig_dir, param, "{0}.{1}.hdf5".format(param, tstamp))
                            # Use backup copy to replace post-repair file.
                            shutil.copy2(param_backup, param_orig)
                            # Delete the backup
                            os.remove(param_backup)

                    raise ValueError("Coverage repair failed! Revert to stored backup version, if possible.")

                # Remove temporary coverage
                if keep_temp == False:
                    shutil.rmtree(tempcov_dir)
                else:
                    return tempcov_dir
        else:
            log.info("Coverage is not corrupt, nothing to repair!")
Beispiel #13
0
    def repair(self,
               backup=True,
               copy_over=True,
               keep_temp=False,
               reanalyze=False,
               analyze_bricks=False,
               detailed_analysis=False):
        """
        Heavy repair tool that recreates a blank persisted Coverage from the broken coverage's
        original construction parameters, then reconstructs the Master and Parameter metadata
        files by inspection of the ION objects and "valid" brick files.
        @return:
        """
        if self._ar is None or reanalyze:
            self._ar = self._do_analysis(analyze_bricks=analyze_bricks,
                                         detailed_analysis=detailed_analysis)

        if self._ar.is_corrupt:
            if len(self._ar.get_brick_corruptions()) > 0:
                raise NotImplementedError(
                    'Brick corruption.  Cannot repair at this time!!!')
            else:
                # Repair the Master and Parameter metadata files

                # Need the ParameterDictionary, TemporalDomain and SpatialDomain
                pdict = ParameterDictionary.load(
                    self._dso.parameter_dictionary)
                tdom = GridDomain.load(self._dso.temporal_domain)
                sdom = GridDomain.load(self._dso.spatial_domain)

                # Set up the working directory for the recovered coverage
                tempcov_dir = tempfile.mkdtemp('covs')

                # Create the temporary Coverage
                tempcov = SimplexCoverage(root_dir=tempcov_dir,
                                          persistence_guid=self._guid,
                                          name=self._guid,
                                          parameter_dictionary=pdict,
                                          spatial_domain=sdom,
                                          temporal_domain=tdom)
                # Handle to persistence layer for tempcov
                pl = tempcov._persistence_layer

                # Set up the original and temporary coverage path strings
                orig_dir = os.path.join(self.cov_pth, self._guid)
                temp_dir = os.path.join(tempcov.persistence_dir,
                                        tempcov.persistence_guid)

                # Insert same number of timesteps into temporary coverage as in broken coverage
                brick_domains_new, new_brick_list, brick_list_spans, tD, bD, min_data_bound, max_data_bound = self.inspect_bricks(
                    self.cov_pth, self._guid, 'time')
                empty_cov = brick_list_spans is None  # If None, there are no brick files --> no timesteps, empty coverage!
                if not empty_cov:  # If None, there are no brick files --> no timesteps, empty coverage!
                    bls = [s.value for s in brick_list_spans]
                    maxes = [sum(b[3]) for b in new_brick_list.values()]
                    tempcov.insert_timesteps(sum(maxes))

                    # Replace metadata is the Master file
                    pl.master_manager.brick_domains = brick_domains_new
                    pl.master_manager.brick_list = new_brick_list

                    # Repair ExternalLinks to brick files
                    f = h5py.File(pl.master_manager.file_path, 'a')
                    for param_name in pdict.keys():
                        del f[param_name]
                        f.create_group(param_name)
                        for brick in bls:
                            link_path = '/{0}/{1}'.format(param_name, brick[0])
                            brick_file_name = '{0}.hdf5'.format(brick[0])
                            brick_rel_path = os.path.join(
                                pl.parameter_metadata[param_name].root_dir.
                                replace(tempcov.persistence_dir,
                                        '.'), brick_file_name)
                            log.debug('link_path: %s', link_path)
                            log.debug('brick_rel_path: %s', brick_rel_path)
                            pl.master_manager.add_external_link(
                                link_path, brick_rel_path, brick[0])

                pl.flush_values()
                pl.flush()
                tempcov.close()

                # Remove 'rtree' dataset from Master file if it already exists (post domain expansion)
                # to make way for reconstruction
                f = h5py.File(pl.master_manager.file_path, 'a')
                if 'rtree' in f.keys():
                    del f['rtree']
                f.close()

                # Reconstruct 'rtree' dataset
                # Open temporary Coverage and PersistenceLayer objects
                fixed_cov = AbstractCoverage.load(tempcov.persistence_dir,
                                                  mode='a')
                pl_fixed = fixed_cov._persistence_layer

                # Call update_rtree for each brick using PersistenceLayer builtin
                brick_count = 0

                if not empty_cov:
                    for brick in bls:
                        rtree_extents, brick_extents, brick_active_size = pl_fixed.calculate_extents(
                            brick[1][1], bD, tD)
                        pl_fixed.master_manager.update_rtree(brick_count,
                                                             rtree_extents,
                                                             obj=brick[0])
                        brick_count += 1

                # Update parameter_bounds property based on each parameter's brick data using deep inspection
                valid_bounds_types = [
                    'BooleanType', 'ConstantType', 'QuantityType',
                    'ConstantRangeType'
                ]

                if not empty_cov:
                    for param in pdict.keys():
                        if pdict.get_context(
                                param
                        ).param_type.__class__.__name__ in valid_bounds_types:
                            brick_domains_new, new_brick_list, brick_list_spans, tD, bD, min_data_bound, max_data_bound = self.inspect_bricks(
                                self.cov_pth, self._guid, param)
                            # Update the metadata
                            pl_fixed.update_parameter_bounds(
                                param, [min_data_bound, max_data_bound])
                pl_fixed.flush()
                fixed_cov.close()

                # Create backup copy of original Master and Parameter files
                if backup:
                    import datetime
                    orig_master_file = os.path.join(
                        self.cov_pth, '{0}_master.hdf5'.format(self._guid))

                    # Generate the timestamp
                    tstamp_format = '%Y%m%d%H%M%S'
                    tstamp = datetime.datetime.now().strftime(tstamp_format)

                    backup_master_file = os.path.join(
                        self.cov_pth,
                        '{0}_master.{1}.hdf5'.format(self._guid, tstamp))

                    shutil.copy2(orig_master_file, backup_master_file)

                    for param in pdict.keys():
                        param_orig = os.path.join(orig_dir, param,
                                                  '{0}.hdf5'.format(param))
                        param_backup = os.path.join(
                            orig_dir, param,
                            '{0}.{1}.hdf5'.format(param, tstamp))
                        shutil.copy2(param_orig, param_backup)

                # Copy Master and Parameter metadata files back to original/broken coverage (cov_pth) location
                if copy_over == True:
                    shutil.copy2(
                        os.path.join(tempcov.persistence_dir,
                                     '{0}_master.hdf5'.format(self._guid)),
                        os.path.join(self.cov_pth,
                                     '{0}_master.hdf5'.format(self._guid)))
                    for param in pdict.keys():
                        shutil.copy2(
                            os.path.join(temp_dir, param,
                                         '{0}.hdf5'.format(param)),
                            os.path.join(orig_dir, param,
                                         '{0}.hdf5'.format(param)))

                # Reanalyze the repaired coverage
                self._ar = self._do_analysis(analyze_bricks=True)

                # Verify repair worked, clean up if not
                if self._ar.is_corrupt:
                    # If the files were backed up then revert
                    if backup:
                        # Remove backed up files and clean up the repair attempt
                        log.info(
                            'Repair attempt failed.  Reverting to pre-repair state.'
                        )
                        # Use backup copy to replace post-repair file.
                        shutil.copy2(backup_master_file, orig_master_file)
                        # Delete the backup
                        os.remove(backup_master_file)

                        # Iterate over parameters and revert to pre-repair state
                        for param in pdict.keys():
                            param_orig = os.path.join(orig_dir, param,
                                                      '{0}.hdf5'.format(param))
                            param_backup = os.path.join(
                                orig_dir, param,
                                '{0}.{1}.hdf5'.format(param, tstamp))
                            # Use backup copy to replace post-repair file.
                            shutil.copy2(param_backup, param_orig)
                            # Delete the backup
                            os.remove(param_backup)

                    raise ValueError(
                        'Coverage repair failed! Revert to stored backup version, if possible.'
                    )

                # Remove temporary coverage
                if keep_temp == False:
                    shutil.rmtree(tempcov_dir)
                else:
                    return tempcov_dir
        else:
            log.info('Coverage is not corrupt, nothing to repair!')
Beispiel #14
0
    def test_coverage_recovery(self):
        # Create the coverage
        dp_id, stream_id, route, stream_def_id, dataset_id = self.load_data_product(
        )
        self.populate_dataset(dataset_id, 36)
        dset = self.dataset_management.read_dataset(dataset_id)
        dprod = self.dpsc_cli.read_data_product(dp_id)
        cov = DatasetManagementService._get_simplex_coverage(dataset_id)
        cov_pth = cov.persistence_dir
        cov.close()

        num_params = len(cov.list_parameters())
        num_bricks = 8
        total = num_params + num_bricks + 1

        # Analyze the valid coverage
        dr = CoverageDoctor(cov_pth, dprod, dset)

        dr_result = dr.analyze()

        # TODO: Turn these into meaningful Asserts
        self.assertEqual(len(dr_result.get_brick_corruptions()), 0)
        self.assertEqual(len(dr_result.get_brick_size_ratios()), num_bricks)
        self.assertEqual(len(dr_result.get_corruptions()), 0)
        self.assertEqual(len(dr_result.get_master_corruption()), 0)
        self.assertEqual(len(dr_result.get_param_corruptions()), 0)
        self.assertEqual(len(dr_result.get_param_size_ratios()), num_params)
        self.assertEqual(len(dr_result.get_master_size_ratio()), 1)
        self.assertEqual(len(dr_result.get_size_ratios()), total)
        self.assertEqual(dr_result.master_status[1], 'NORMAL')

        self.assertFalse(dr_result.is_corrupt)
        self.assertEqual(dr_result.param_file_count, num_params)
        self.assertEqual(dr_result.brick_file_count, num_bricks)
        self.assertEqual(dr_result.total_file_count, total)

        # Get original values (mock)
        orig_cov = AbstractCoverage.load(cov_pth)
        time_vals_orig = orig_cov.get_time_values()
        orig_cov.close()

        # Corrupt the Master File
        fo = open(cov._persistence_layer.master_manager.file_path, "wb")
        fo.write('Junk')
        fo.close()
        # Corrupt the lon Parameter file
        fo = open(cov._persistence_layer.parameter_metadata['lon'].file_path,
                  "wb")
        fo.write('Junk')
        fo.close()

        corrupt_res = dr.analyze(reanalyze=True)
        self.assertTrue(corrupt_res.is_corrupt)

        # Repair the metadata files
        dr.repair(reanalyze=True)

        fixed_res = dr.analyze(reanalyze=True)
        self.assertFalse(fixed_res.is_corrupt)

        fixed_cov = AbstractCoverage.load(cov_pth)
        self.assertIsInstance(fixed_cov, AbstractCoverage)

        time_vals_fixed = fixed_cov.get_time_values()
        fixed_cov.close()
        self.assertTrue(np.array_equiv(time_vals_orig, time_vals_fixed))
    def get_dataset_xml(self, coverage_path, product_id, product_name='', available_fields=None):
        #http://coastwatch.pfeg.noaa.gov/erddap/download/setupDatasetsXml.html
        result = ''
        paths = os.path.split(coverage_path)
        cov = AbstractCoverage.load(coverage_path)
        doc = xml.dom.minidom.Document()
        
        #erd_type_map = {'d':'double', 'f':"float", 'h':'short', 'i':'int', 'l':'int', 'q':'int', 'b':'byte', 'b':'char', 'S':'String'} 
        
        #Get lists of variables with unique sets of dimensions.
        #Datasets can only have variables with the same sets of dimensions

        if not cov.list_parameters():
            raise BadRequest('Attempting to register an empty dataset. The coverage (%s) has no definition.\n%s' %(coverage_path, cov))

        datasets = {}
        for key in cov.list_parameters():
            pc = cov.get_parameter_context(key)
            #if getattr(pc, 'visible', None):
            #    continue
            if available_fields and pc.name not in available_fields:
                continue
            #if not isinstance(pc.param_type, QuantityType):
            #    continue

            param = cov.get_parameter(key)
            dims = (cov.temporal_parameter_name,)
            if len(param.shape) == 2:
                dims = (cov.temporal_parameter_name, cov.spatial_domain.shape.name)

            if not dims in datasets.keys():
                datasets[dims] = []
            

            datasets[dims].append(key)
        

        index = 0
        if not datasets:
            raise BadRequest('Attempting to register a dimensionless dataset. The coverage (%s) has no dimension(s).\n%s' %( coverage_path, cov))
        
        for dims, vars in datasets.iteritems():
            erd_name_map = self.get_errdap_name_map(vars) 
            
            if len(vars)==1:
                raise BadRequest('A dataset needs a proper range, not just the temporal dimension. %s\n%s' %( coverage_path, cov))

            if not (len(dims) == 1 and dims[0] == vars[0]):
                dataset_element = doc.createElement('dataset')
                dataset_element.setAttribute('type', 'EDDGridFromDap')
                dataset_element.setAttribute('datasetID', product_id)
                dataset_element.setAttribute('active', 'True')

                source_element = doc.createElement('sourceUrl')
                text_node = doc.createTextNode(self.pydap_url + paths[1])
                source_element.appendChild(text_node)
                dataset_element.appendChild(source_element)

                reload_element = doc.createElement('reloadEveryNMinutes')
                if self.CFG.get_safe('server.erddap.dataset_caching',True):
                    text_node = doc.createTextNode('1440')
                else:
                    text_node = doc.createTextNode('5')
                reload_element.appendChild(text_node)
                dataset_element.appendChild(reload_element)

                if self.CFG.get_safe('server.erddap.dataset_caching',True):
                    refresh_interval = self.CFG.get_safe('server.erddap.refresh_interval', 30000)
                    update_element = doc.createElement('updateEveryNMillis')
                    text_node = doc.createTextNode(str(refresh_interval))
                    update_element.appendChild(text_node)
                    dataset_element.appendChild(update_element)
                

                add_attributes_element = doc.createElement('addAttributes')

                atts = {}
                atts['title'] = product_name or urllib.unquote(cov.name)
                atts['infoUrl'] = self.pydap_url + paths[1]
                atts['institution'] = 'OOI'
                atts['Conventions'] = "COARDS, CF-1.6, Unidata Dataset Discovery v1.0"
                atts['license'] = '[standard]'
                atts['summary'] = cov.name
                atts['cdm_data_type'] = 'Grid'
                atts['standard_name_vocabulary'] = 'CF-12'
                
                for key, val in atts.iteritems():
                    att_element = doc.createElement('att')
                    att_element.setAttribute('name', key)
                    text_node = doc.createTextNode(val)
                    att_element.appendChild(text_node)
                    add_attributes_element.appendChild(att_element)

                if len(add_attributes_element.childNodes) > 0:
                    dataset_element.appendChild(add_attributes_element)

                for var_name in vars:
                    var = cov.get_parameter_context(var_name)
                    
                    units = "unknown"
                    if hasattr(var,'uom') and var.uom:
                        units = var.uom

                    #if len(param.shape) >=1 and not param.is_coordinate: #dataVariable
                    data_element = doc.createElement('dataVariable')
                    source_name_element = doc.createElement('sourceName')
                    text_node = doc.createTextNode(var.name)
                    source_name_element.appendChild(text_node)
                    data_element.appendChild(source_name_element)

                    destination_name_element = doc.createElement('destinationName')
                    text_node = doc.createTextNode(erd_name_map[var.name])
                    destination_name_element.appendChild(text_node)
                    data_element.appendChild(destination_name_element)
                    
                    add_attributes_element = doc.createElement('addAttributes')
                    if var.ATTRS is not None:
                        for key in var.ATTRS:
                            if not hasattr(var,key):
                                continue
                            val = getattr(var,key)
                            if not val:
                                val = ''
                            att_element = doc.createElement('att')
                            att_element.setAttribute('name', key)
                            text_node = doc.createTextNode(val)
                            att_element.appendChild(text_node)
                            add_attributes_element.appendChild(att_element)

                    att_element = doc.createElement('att')
                    att_element.setAttribute('name', 'ioos_category')
                    text_node = doc.createTextNode(self.get_ioos_category(var.name, units))
                    att_element.appendChild(text_node)
                    add_attributes_element.appendChild(att_element)

                    att_element = doc.createElement('att')
                    att_element.setAttribute('name', 'long_name')
                    long_name = ""
                    if hasattr(var,'display_name') and var.display_name is not None:
                        long_name = var.display_name
                        text_node = doc.createTextNode(long_name)
                        att_element.appendChild(text_node)
                        add_attributes_element.appendChild(att_element)
                    
                    att_element = doc.createElement('att')
                    standard_name = ""
                    if hasattr(var,'standard_name') and var.standard_name is not None:
                        standard_name = var.standard_name
                        att_element.setAttribute('name', 'standard_name')
                        text_node = doc.createTextNode(standard_name)
                        att_element.appendChild(text_node)
                        add_attributes_element.appendChild(att_element)
                    

                    att_element = doc.createElement('att')
                    att_element.setAttribute('name', 'units')
                    text_node = doc.createTextNode(units)
                    att_element.appendChild(text_node)
                    add_attributes_element.appendChild(att_element)

                    data_element.appendChild(add_attributes_element)
                    dataset_element.appendChild(data_element)

                index += 1
                #bug with prettyxml
                #http://ronrothman.com/public/leftbraned/xml-dom-minidom-toprettyxml-and-silly-whitespace/
                #result += dataset_element.toprettyxml() + '\n'
                result += dataset_element.toxml() + '\n'

        cov.close()

        return result