Exemple #1
0
    def __init__(self, filename, gdal_dataset, metadata, quartile=0, *args, **kwargs):

        if not 'ascat' in metadata.get('NC_GLOBAL#source', '').lower():
            raise WrongMapperError

        super(Mapper, self).__init__(filename, gdal_dataset, metadata, quartile=quartile, *args, **kwargs)

        lat = self.dataset.GetRasterBand(self._latitude_band_number(gdal_dataset)).ReadAsArray()
        lon = self.dataset.GetRasterBand(self._longitude_band_number(gdal_dataset)).ReadAsArray()
        lon = ScatterometryMapper.shift_longitudes(lon)
        self.set_gcps(lon, lat, gdal_dataset)

        # Get dictionary describing the instrument and platform according to
        # the GCMD keywords
        ii = pti.get_gcmd_instrument('ascat')
        pp = pti.get_gcmd_platform(metadata['NC_GLOBAL#source'].split(' ')[0])
        provider = pti.get_gcmd_provider(re.split('[^a-zA-Z]',
            metadata['NC_GLOBAL#institution'])[0])

        # TODO: Validate that the found instrument and platform are indeed what
        # we want....

        self.dataset.SetMetadataItem('instrument', json.dumps(ii))
        self.dataset.SetMetadataItem('platform', json.dumps(pp))
        self.dataset.SetMetadataItem('data_center', json.dumps(provider))
        self.dataset.SetMetadataItem('entry_title', metadata['NC_GLOBAL#title'])
        self.dataset.SetMetadataItem('ISO_topic_category',
                json.dumps(pti.get_iso19115_topic_category('Oceans')))
Exemple #2
0
    def __init__(self,
                 filename,
                 gdal_dataset,
                 gdal_metadata,
                 date=None,
                 ds=None,
                 bands=None,
                 cachedir=None,
                 *args,
                 **kwargs):

        self.test_mapper(filename)
        timestamp = date if date else self.get_date(filename)
        ds = Dataset(filename)
        try:
            self.srcDSProjection = NSR(ds.variables['projection_3'].proj4 +
                                       ' +to_meter=0.0174532925199 +wktext')
        except KeyError:
            raise WrongMapperError

        self.create_vrt(filename, gdal_dataset, gdal_metadata, timestamp, ds,
                        bands, cachedir)

        self.dataset.SetMetadataItem(
            'instrument', json.dumps(pti.get_gcmd_instrument('Computer')))
        self.dataset.SetMetadataItem(
            'platform', json.dumps(pti.get_gcmd_platform('MODELS')))
        self.dataset.SetMetadataItem(
            'Data Center', json.dumps(pti.get_gcmd_provider('NO/MET')))
        self.dataset.SetMetadataItem('Entry Title', str(ds.getncattr('title')))
        self.dataset.SetMetadataItem(
            'Entry Title',
            json.dumps(pti.get_iso19115_topic_category('Oceans')))
        self.dataset.SetMetadataItem(
            'gcmd_location', json.dumps(pti.get_gcmd_location('sea surface')))
Exemple #3
0
    def set_gcmd_dif_keywords(self):
        mditem = 'entry_title'
        if not self.dataset.GetMetadataItem(mditem):
            self.dataset.SetMetadataItem(mditem, self.input_filename)
        mditem = 'data_center'
        if not self.dataset.GetMetadataItem(mditem):
            self.dataset.SetMetadataItem(
                mditem, json.dumps(pti.get_gcmd_provider('NO/MET')))
        mditem = 'ISO_topic_category'
        if not self.dataset.GetMetadataItem(mditem):
            self.dataset.SetMetadataItem(
                mditem,
                pti.get_iso19115_topic_category(
                    'Imagery/Base Maps/Earth Cover')['iso_topic_category'])

        mm = pti.get_gcmd_instrument('sar')
        if self.ds.MISSION_ID == 'S1A':
            ee = pti.get_gcmd_platform('sentinel-1a')
        else:
            ee = pti.get_gcmd_platform('sentinel-1b')
        self.dataset.SetMetadataItem('instrument', json.dumps(mm))
        self.dataset.SetMetadataItem('platform', json.dumps(ee))

        self.dataset.SetMetadataItem(
            'time_coverage_start',
            self.dataset.GetMetadataItem('ACQUISITION_START_TIME'))
        self.dataset.SetMetadataItem(
            'time_coverage_end',
            self.dataset.GetMetadataItem('ACQUISITION_STOP_TIME'))
    def __init__(self, filename, gdal_dataset, metadata, quartile=0, *args, **kwargs):

        if not 'quikscat' in metadata.get('NC_GLOBAL#source', '').lower():
            raise WrongMapperError

        super(Mapper, self).__init__(filename, gdal_dataset, metadata, quartile=quartile, *args, **kwargs)

        lat = self.dataset.GetRasterBand(self._latitude_band_number(gdal_dataset)).ReadAsArray()
        lon = self.dataset.GetRasterBand(self._longitude_band_number(gdal_dataset)).ReadAsArray()
        lon = ScatterometryMapper.shift_longitudes(lon)
        self.set_gcps(lon, lat, gdal_dataset)

        # Get dictionary describing the instrument and platform according to
        # the GCMD keywords
        mm = pti.get_gcmd_instrument('seawinds')
        ee = pti.get_gcmd_platform('quikscat')
        provider = metadata['NC_GLOBAL#institution']
        if provider.lower()=='jpl':
            provider = 'NASA/JPL/QUIKSCAT'
        provider = pti.get_gcmd_provider(provider)

        self.dataset.SetMetadataItem('instrument', json.dumps(mm))
        self.dataset.SetMetadataItem('platform', json.dumps(ee))
        self.dataset.SetMetadataItem('data_center', json.dumps(provider))
        self.dataset.SetMetadataItem('entry_title', metadata['NC_GLOBAL#title'])
        self.dataset.SetMetadataItem('ISO_topic_category',
                json.dumps(pti.get_iso19115_topic_category('Oceans')))
Exemple #5
0
    def __init__(self, filename, gdal_dataset, metadata, quartile=0, *args, **kwargs):

        if not 'ascat' in metadata.get('NC_GLOBAL#source', '').lower():
            raise WrongMapperError

        super(Mapper, self).__init__(filename, gdal_dataset, metadata, quartile=quartile, *args, **kwargs)

        lat = self.dataset.GetRasterBand(self._latitude_band_number(gdal_dataset)).ReadAsArray()
        lon = self.dataset.GetRasterBand(self._longitude_band_number(gdal_dataset)).ReadAsArray()
        lon = ScatterometryMapper.shift_longitudes(lon)
        self.set_gcps(lon, lat, gdal_dataset)

        # Get dictionary describing the instrument and platform according to
        # the GCMD keywords
        ii = pti.get_gcmd_instrument('ascat')
        pp = pti.get_gcmd_platform(metadata['NC_GLOBAL#source'].split(' ')[0])
        provider = pti.get_gcmd_provider(re.split('[^a-zA-Z]',
            metadata['NC_GLOBAL#institution'])[0])

        # TODO: Validate that the found instrument and platform are indeed what
        # we want....

        self.dataset.SetMetadataItem('instrument', json.dumps(ii))
        self.dataset.SetMetadataItem('platform', json.dumps(pp))
        self.dataset.SetMetadataItem('data_center', json.dumps(provider))
        self.dataset.SetMetadataItem('entry_title', metadata['NC_GLOBAL#title'])
        self.dataset.SetMetadataItem('ISO_topic_category',
                json.dumps(pti.get_iso19115_topic_category('Oceans')))
    def __init__(self, filename, gdal_dataset, metadata, quartile=0, *args, **kwargs):

        if not 'quikscat' in metadata.get('NC_GLOBAL#source', '').lower():
            raise WrongMapperError

        super(Mapper, self).__init__(filename, gdal_dataset, metadata, quartile=quartile, *args, **kwargs)

        lat = self.dataset.GetRasterBand(self._latitude_band_number(gdal_dataset)).ReadAsArray()
        lon = self.dataset.GetRasterBand(self._longitude_band_number(gdal_dataset)).ReadAsArray()
        lon = ScatterometryMapper.shift_longitudes(lon)
        self.set_gcps(lon, lat, gdal_dataset)

        # Get dictionary describing the instrument and platform according to
        # the GCMD keywords
        mm = pti.get_gcmd_instrument('seawinds')
        ee = pti.get_gcmd_platform('quikscat')
        provider = metadata['NC_GLOBAL#institution']
        if provider.lower()=='jpl':
            provider = 'NASA/JPL/QUIKSCAT'
        provider = pti.get_gcmd_provider(provider)

        self.dataset.SetMetadataItem('instrument', json.dumps(mm))
        self.dataset.SetMetadataItem('platform', json.dumps(ee))
        self.dataset.SetMetadataItem('data_center', json.dumps(provider))
        self.dataset.SetMetadataItem('entry_title', metadata['NC_GLOBAL#title'])
        self.dataset.SetMetadataItem('ISO_topic_category',
                json.dumps(pti.get_iso19115_topic_category('Oceans')))
    def _init_empty(self, manifest_data, annotation_data):
        """ Fast initialization from minimum of information

        Parameters
        ----------
        manifest_data : dict
            data from the manifest file (time_coverage_start, etc)
        annotation_data : dict
            data from annotation file (longitude, latitude, x_size, etc)

        Note
        ----
            Calls VRT.__init__, Adds GCPs, metadata
        """
        # init empty dataset
        super(Mapper, self).__init__(annotation_data['x_size'], annotation_data['y_size'])
        # add GCPs from (corrected) geolocation data
        gcps = Mapper.create_gcps(annotation_data['longitude'],
                                  annotation_data['latitude'],
                                  annotation_data['height'],
                                  annotation_data['pixel'],
                                  annotation_data['line'])
        self.dataset.SetGCPs(gcps, NSR().wkt)
        # set metadata
        self.dataset.SetMetadataItem('time_coverage_start', manifest_data['time_coverage_start'])
        self.dataset.SetMetadataItem('time_coverage_end', manifest_data['time_coverage_end'])
        platform_name = manifest_data['platform_family_name'] + manifest_data['platform_number']
        self.dataset.SetMetadataItem('platform', json.dumps(pti.get_gcmd_platform(platform_name)))
        self.dataset.SetMetadataItem('instrument', json.dumps(pti.get_gcmd_instrument('SAR')))
        self.dataset.SetMetadataItem('entry_title', platform_name + ' SAR')
        self.dataset.SetMetadataItem('data_center', json.dumps(pti.get_gcmd_provider('ESA/EO')))
        self.dataset.SetMetadataItem('iso_topic_category', json.dumps(pti.get_iso19115_topic_category('Oceans')))
        self.dataset.SetMetadataItem('summary', platform_name + ' SAR data')
        self.dataset.FlushCache()
    def get_or_create(self, uri, force):
        # Validate uri - this should raise an exception if the uri doesn't
        # point to a valid file or stream
        validate_uri(uri)
        # Several datasets can refer to the same uri (e.g., scatterometers and svp drifters), so we
        # need to pass uri_filter_args
        uris = DatasetURI.objects.filter(uri=uri)
        # If the ingested uri is already in the database and not <force> ingestion then stop
        if uris.exists() and not force:
            return uris[0].dataset, False
        elif uris.exists() and force:
            uris[0].dataset.delete()
        # Open file with Nansat
        n = Nansat(nansat_filename(uri))
        # get metadata from Nansat and get objects from vocabularies
        n_metadata = n.get_metadata()
        # set compulsory metadata (source)
        platform, _ = Platform.objects.get_or_create(
            json.loads(n_metadata['platform']))
        instrument, _ = Instrument.objects.get_or_create(
            json.loads(n_metadata['instrument']))
        specs = n_metadata.get('specs', '')
        source, _ = Source.objects.get_or_create(platform=platform,
                                                 instrument=instrument,
                                                 specs=specs)
        footprint = Polygon(list(zip(*n.get_border())))
        geolocation = GeographicLocation.objects.get_or_create(
            geometry=footprint)[0]
        data_center = DataCenter.objects.get_or_create(
            json.loads(n_metadata['Data Center']))[0]
        iso_category = ISOTopicCategory.objects.get_or_create(
            pti.get_iso19115_topic_category('Oceans'))[0]
        location = Location.objects.get_or_create(
            json.loads(n_metadata['gcmd_location']))[0]
        # create dataset
        ds, created = Dataset.objects.get_or_create(
            time_coverage_start=make_aware(n.time_coverage_start),
            time_coverage_end=make_aware(
                n.time_coverage_start +
                timedelta(hours=23, minutes=59, seconds=59)),
            source=source,
            geographic_location=geolocation,
            ISO_topic_category=iso_category,
            data_center=data_center,
            summary='',
            gcmd_location=location,
            access_constraints='',
            entry_id=lambda: 'NERSC_' + str(uuid.uuid4()))

        ds_uri, _ = DatasetURI.objects.get_or_create(
            name=FILE_SERVICE_NAME,
            service=LOCAL_FILE_SERVICE,
            uri=uri,
            dataset=ds)
        return ds, created
    def _init_empty(self, manifest_data, annotation_data):
        """ Fast initialization from minimum of information

        Parameters
        ----------
        manifest_data : dict
            data from the manifest file (time_coverage_start, etc)
        annotation_data : dict
            data from annotation file (longitude, latitude, x_size, etc)

        Note
        ----
            Calls VRT.__init__, Adds GCPs, metadata
        """
        # init empty dataset
        super(Mapper, self).__init__(annotation_data['x_size'],
                                     annotation_data['y_size'])
        # add GCPs from (corrected) geolocation data
        gcps = Mapper.create_gcps(annotation_data['longitude'],
                                  annotation_data['latitude'],
                                  annotation_data['height'],
                                  annotation_data['pixel'],
                                  annotation_data['line'])
        self.dataset.SetGCPs(gcps, NSR().wkt)
        # set metadata
        self.dataset.SetMetadataItem('time_coverage_start',
                                     manifest_data['time_coverage_start'])
        self.dataset.SetMetadataItem('time_coverage_end',
                                     manifest_data['time_coverage_end'])
        platform_name = manifest_data['platform_family_name'] + manifest_data[
            'platform_number']
        self.dataset.SetMetadataItem(
            'platform', json.dumps(pti.get_gcmd_platform(platform_name)))
        self.dataset.SetMetadataItem(
            'instrument', json.dumps(pti.get_gcmd_instrument('SAR')))
        self.dataset.SetMetadataItem('entry_title', platform_name + ' SAR')
        self.dataset.SetMetadataItem(
            'data_center', json.dumps(pti.get_gcmd_provider('ESA/EO')))
        self.dataset.SetMetadataItem(
            'iso_topic_category',
            json.dumps(pti.get_iso19115_topic_category('Oceans')))
        self.dataset.SetMetadataItem('summary', platform_name + ' SAR data')
        self.dataset.FlushCache()
Exemple #10
0
    def __init__(self, filename, gdal_dataset, gdal_metadata, date=None,
                 ds=None, bands=None, cachedir=None, *args, **kwargs):

        self.test_mapper(filename)
        timestamp = date if date else self.get_date(filename)
        ds = Dataset(filename)
        self.create_vrt(filename, gdal_dataset, gdal_metadata, timestamp, ds, bands, cachedir)
        self.dataset.SetMetadataItem('entry_title', str(ds.getncattr('title')))
        self.dataset.SetMetadataItem('data_center', json.dumps(pti.get_gcmd_provider('UK/MOD/MET')))
        self.dataset.SetMetadataItem('ISO_topic_category',
                pti.get_iso19115_topic_category('oceans')['iso_topic_category'])
        self.dataset.SetMetadataItem('gcmd_location', json.dumps(pti.get_gcmd_location('sea surface')))

        #mm = pti.get_gcmd_instrument('amsr-e')
        #ee = pti.get_gcmd_platform('aqua')
        #self.dataset.SetMetadataItem('instrument', json.dumps(mm))
        #self.dataset.SetMetadataItem('platform', json.dumps(ee))
        self.dataset.SetMetadataItem('platform/instrument',
                json.dumps(self.get_platform_and_instrument_list(ds)))
Exemple #11
0
    def __init__(self,
                 filename,
                 gdal_dataset,
                 gdal_metadata,
                 date=None,
                 ds=None,
                 bands=None,
                 cachedir=None,
                 *args,
                 **kwargs):

        self.test_mapper(filename)
        timestamp = date if date else self.get_date(filename)

        self.create_vrt(filename, gdal_dataset, gdal_metadata, timestamp, ds,
                        bands, cachedir)

        mditem = 'entry_title'
        if not self.dataset.GetMetadataItem(mditem):
            try:
                self.dataset.SetMetadataItem(mditem,
                                             str(self.ds.getncattr('title')))
            except AttributeError:
                self.dataset.SetMetadataItem(mditem, filename)
        mditem = 'data_center'
        if not self.dataset.GetMetadataItem(mditem):
            self.dataset.SetMetadataItem(
                'data_center', json.dumps(pti.get_gcmd_provider('NO/MET')))
        mditem = 'ISO_topic_category'
        if not self.dataset.GetMetadataItem(mditem):
            self.dataset.SetMetadataItem(
                mditem,
                pti.get_iso19115_topic_category(
                    'Imagery/Base Maps/Earth Cover')['iso_topic_category'])

        mm = pti.get_gcmd_instrument('multi-spectral')
        ee = pti.get_gcmd_platform('sentinel-2')
        self.dataset.SetMetadataItem('instrument', json.dumps(mm))
        self.dataset.SetMetadataItem('platform', json.dumps(ee))
    def get_or_create(self,
                      uri,
                      n_points=10,
                      uri_filter_args=None,
                      uri_service_name=FILE_SERVICE_NAME,
                      uri_service_type=LOCAL_FILE_SERVICE,
                      *args,
                      **kwargs):
        """ Create dataset and corresponding metadata

        Parameters:
        ----------
            uri : str
                  URI to file or stream openable by Nansat
            n_points : int
                  Number of border points (default is 10)
            uri_filter_args : dict
                Extra DatasetURI filter arguments if several datasets can refer to the same URI
            uri_service_name : str
                name of the service which is used  ('dapService', 'fileService', 'http' or 'wms')
            uri_service_type : str
                type of the service which is used  ('OPENDAP', 'local', 'HTTPServer' or 'WMS')

        Returns:
        -------
            dataset and flag
        """
        if not uri_filter_args:
            uri_filter_args = {}

        # Validate uri - this should raise an exception if the uri doesn't point to a valid
        # file or stream
        validate_uri(uri)

        # Several datasets can refer to the same uri (e.g., scatterometers and svp drifters), so we
        # need to pass uri_filter_args
        uris = DatasetURI.objects.filter(uri=uri, **uri_filter_args)
        if len(uris) > 0:
            return uris[0].dataset, False

        # Open file with Nansat
        n = Nansat(nansat_filename(uri), **kwargs)

        # get metadata from Nansat and get objects from vocabularies
        n_metadata = n.get_metadata()

        entry_id = n_metadata.get('entry_id', None)
        # set compulsory metadata (source)
        platform, _ = Platform.objects.get_or_create(
            json.loads(n_metadata['platform']))
        instrument, _ = Instrument.objects.get_or_create(
            json.loads(n_metadata['instrument']))
        specs = n_metadata.get('specs', '')
        source, _ = Source.objects.get_or_create(platform=platform,
                                                 instrument=instrument,
                                                 specs=specs)

        default_char_fields = {
            # Adding NERSC_ in front of the id violates the string representation of the uuid
            #'entry_id': lambda: 'NERSC_' + str(uuid.uuid4()),
            'entry_id': lambda: str(uuid.uuid4()),
            'entry_title': lambda: 'NONE',
            'summary': lambda: 'NONE',
        }

        # set optional CharField metadata from Nansat or from default_char_fields
        options = {}
        try:
            existing_ds = Dataset.objects.get(entry_id=entry_id)
        except Dataset.DoesNotExist:
            existing_ds = None
        for name in default_char_fields:
            if name not in n_metadata:
                warnings.warn('%s is not provided in Nansat metadata!' % name)
                # prevent overwriting of existing values by defaults
                if existing_ds:
                    options[name] = existing_ds.__getattribute__(name)
                else:
                    options[name] = default_char_fields[name]()
            else:
                options[name] = n_metadata[name]

        default_foreign_keys = {
            'gcmd_location': {
                'model': Location,
                'value': pti.get_gcmd_location('SEA SURFACE')
            },
            'data_center': {
                'model': DataCenter,
                'value': pti.get_gcmd_provider('NERSC')
            },
            'ISO_topic_category': {
                'model': ISOTopicCategory,
                'value': pti.get_iso19115_topic_category('Oceans')
            },
        }

        # set optional ForeignKey metadata from Nansat or from default_foreign_keys
        for name in default_foreign_keys:
            value = default_foreign_keys[name]['value']
            model = default_foreign_keys[name]['model']
            if name not in n_metadata:
                warnings.warn('%s is not provided in Nansat metadata!' % name)
            else:
                try:
                    value = json.loads(n_metadata[name])
                except:
                    warnings.warn(
                        '%s value of %s  metadata provided in Nansat is wrong!'
                        % (n_metadata[name], name))
            if existing_ds:
                options[name] = existing_ds.__getattribute__(name)
            else:
                options[name], _ = model.objects.get_or_create(value)

        # Find coverage to set number of points in the geolocation
        if len(n.vrt.dataset.GetGCPs()) > 0:
            n.reproject_gcps()
        geolocation = GeographicLocation.objects.get_or_create(
            geometry=WKTReader().read(n.get_border_wkt(nPoints=n_points)))[0]

        # create dataset
        # - the get_or_create method should use get_or_create here as well,
        #   or its name should be changed - see issue #127
        ds, created = Dataset.objects.update_or_create(
            entry_id=options['entry_id'],
            defaults={
                'time_coverage_start': n.get_metadata('time_coverage_start'),
                'time_coverage_end': n.get_metadata('time_coverage_end'),
                'source': source,
                'geographic_location': geolocation,
                'gcmd_location': options["gcmd_location"],
                'ISO_topic_category': options["ISO_topic_category"],
                "data_center": options["data_center"],
                'entry_title': options["entry_title"],
                'summary': options["summary"]
            })

        # create parameter
        all_band_meta = n.bands()
        for band_id in range(1, len(all_band_meta) + 1):
            band_meta = all_band_meta[band_id]
            standard_name = band_meta.get('standard_name', None)
            short_name = band_meta.get('short_name', None)
            units = band_meta.get('units', None)
            if standard_name in ['latitude', 'longitude', None]:
                continue
            params = Parameter.objects.filter(standard_name=standard_name)
            if params.count() > 1 and short_name is not None:
                params = params.filter(short_name=short_name)
            if params.count() > 1 and units is not None:
                params = params.filter(units=units)
            if params.count() >= 1:
                ds.parameters.add(params[0])

        # create dataset URI
        DatasetURI.objects.get_or_create(name=uri_service_name,
                                         service=uri_service_type,
                                         uri=uri,
                                         dataset=ds)

        return ds, created
 def test_get_iso19115_topic_category(self):
     item = 'Oceans'
     self.assertIsInstance(pti.get_iso19115_topic_category(item),
             collections.OrderedDict)
Exemple #14
0
 def get_iso_topic_category(self, raw_metadata):
     """Get the ISO topic category from the raw metadata"""
     return pti.get_iso19115_topic_category('Oceans')
    def _get_normalized_attributes(self, dataset_info, *args, **kwargs):
        """Gets dataset attributes using nansat"""
        normalized_attributes = {}
        n_points = int(kwargs.get('n_points', 10))
        nansat_options = kwargs.get('nansat_options', {})
        url_scheme = urlparse(dataset_info).scheme
        if not 'http' in url_scheme and not 'ftp' in url_scheme:
            normalized_attributes['geospaas_service_name'] = FILE_SERVICE_NAME
            normalized_attributes['geospaas_service'] = LOCAL_FILE_SERVICE
        elif 'http' in url_scheme and not 'ftp' in url_scheme:
            normalized_attributes['geospaas_service_name'] = DAP_SERVICE_NAME
            normalized_attributes['geospaas_service'] = OPENDAP_SERVICE
        elif 'ftp' in url_scheme:
            raise ValueError(
                f"Can't ingest '{dataset_info}': nansat can't open remote ftp files"
            )

        # Open file with Nansat
        nansat_object = Nansat(nansat_filename(dataset_info),
                               log_level=self.LOGGER.getEffectiveLevel(),
                               **nansat_options)

        # get metadata from Nansat and get objects from vocabularies
        n_metadata = nansat_object.get_metadata()

        # set compulsory metadata (source)
        normalized_attributes['entry_title'] = n_metadata.get(
            'entry_title', 'NONE')
        normalized_attributes['summary'] = n_metadata.get('summary', 'NONE')
        normalized_attributes['time_coverage_start'] = dateutil.parser.parse(
            n_metadata['time_coverage_start']).replace(tzinfo=tzutc())
        normalized_attributes['time_coverage_end'] = dateutil.parser.parse(
            n_metadata['time_coverage_end']).replace(tzinfo=tzutc())
        normalized_attributes['platform'] = json.loads(n_metadata['platform'])
        normalized_attributes['instrument'] = json.loads(
            n_metadata['instrument'])
        normalized_attributes['specs'] = n_metadata.get('specs', '')
        normalized_attributes['entry_id'] = n_metadata.get(
            'entry_id', 'NERSC_' + str(uuid.uuid4()))

        # set optional ForeignKey metadata from Nansat or from defaults
        normalized_attributes['gcmd_location'] = n_metadata.get(
            'gcmd_location', pti.get_gcmd_location('SEA SURFACE'))
        normalized_attributes['provider'] = pti.get_gcmd_provider(
            n_metadata.get('provider', 'NERSC'))
        normalized_attributes['iso_topic_category'] = n_metadata.get(
            'ISO_topic_category', pti.get_iso19115_topic_category('Oceans'))

        # Find coverage to set number of points in the geolocation
        if nansat_object.vrt.dataset.GetGCPs():
            nansat_object.reproject_gcps()
        normalized_attributes['location_geometry'] = GEOSGeometry(
            nansat_object.get_border_wkt(n_points=n_points), srid=4326)

        json_dumped_dataset_parameters = n_metadata.get(
            'dataset_parameters', None)
        if json_dumped_dataset_parameters:
            json_loads_result = json.loads(json_dumped_dataset_parameters)
            if isinstance(json_loads_result, list):
                normalized_attributes['dataset_parameters'] = [
                    get_cf_or_wkv_standard_name(dataset_param)
                    for dataset_param in json_loads_result
                ]
            else:
                raise TypeError(
                    f"Can't ingest '{dataset_info}': the 'dataset_parameters' section of the "
                    "metadata returned by nansat is not a JSON list")
        else:
            normalized_attributes['dataset_parameters'] = []

        return normalized_attributes
Exemple #16
0
    def get_or_create(self,
                      uri,
                      n_points=10,
                      uri_filter_args=None,
                      *args,
                      **kwargs):
        ''' Create dataset and corresponding metadata

        Parameters:
        ----------
            uri : str
                  URI to file or stream openable by Nansat
            n_points : int
                  Number of border points (default is 10)
            uri_filter_args : dict
                Extra DatasetURI filter arguments if several datasets can refer to the same URI

        Returns:
        -------
            dataset and flag
        '''
        if not uri_filter_args:
            uri_filter_args = {}

        # Validate uri - this should raise an exception if the uri doesn't point to a valid
        # file or stream
        validate_uri(uri)

        # Several datasets can refer to the same uri (e.g., scatterometers and svp drifters), so we
        # need to pass uri_filter_args
        uris = DatasetURI.objects.filter(uri=uri, **uri_filter_args)
        if len(uris) > 0:
            return uris[0].dataset, False

        # Open file with Nansat
        n = Nansat(nansat_filename(uri), **kwargs)

        # get metadata from Nansat and get objects from vocabularies
        n_metadata = n.get_metadata()

        # set compulsory metadata (source)
        platform, _ = Platform.objects.get_or_create(
            json.loads(n_metadata['platform']))
        instrument, _ = Instrument.objects.get_or_create(
            json.loads(n_metadata['instrument']))
        specs = n_metadata.get('specs', '')
        source, _ = Source.objects.get_or_create(platform=platform,
                                                 instrument=instrument,
                                                 specs=specs)

        default_char_fields = {
            'entry_id': lambda: 'NERSC_' + str(uuid.uuid4()),
            'entry_title': lambda: 'NONE',
            'summary': lambda: 'NONE',
        }

        # set optional CharField metadata from Nansat or from default_char_fields
        options = {}
        for name in default_char_fields:
            if name not in n_metadata:
                warnings.warn('%s is not provided in Nansat metadata!' % name)
                options[name] = default_char_fields[name]()
            else:
                options[name] = n_metadata[name]

        default_foreign_keys = {
            'gcmd_location': {
                'model': Location,
                'value': pti.get_gcmd_location('SEA SURFACE')
            },
            'data_center': {
                'model': DataCenter,
                'value': pti.get_gcmd_provider('NERSC')
            },
            'ISO_topic_category': {
                'model': ISOTopicCategory,
                'value': pti.get_iso19115_topic_category('Oceans')
            },
        }

        # set optional ForeignKey metadata from Nansat or from default_foreign_keys
        for name in default_foreign_keys:
            value = default_foreign_keys[name]['value']
            model = default_foreign_keys[name]['model']
            if name not in n_metadata:
                warnings.warn('%s is not provided in Nansat metadata!' % name)
            else:
                try:
                    value = json.loads(n_metadata[name])
                except:
                    warnings.warn(
                        '%s value of %s  metadata provided in Nansat is wrong!'
                        % (n_metadata[name], name))
            options[name], _ = model.objects.get_or_create(value)

        # Find coverage to set number of points in the geolocation
        if len(n.vrt.dataset.GetGCPs()) > 0:
            n.reproject_gcps()
        geolocation = GeographicLocation.objects.get_or_create(
            geometry=WKTReader().read(n.get_border_wkt(nPoints=n_points)))[0]

        # create dataset
        ds, created = Dataset.objects.get_or_create(
            time_coverage_start=n.get_metadata('time_coverage_start'),
            time_coverage_end=n.get_metadata('time_coverage_end'),
            source=source,
            geographic_location=geolocation,
            **options)
        # create dataset URI
        ds_uri, _ = DatasetURI.objects.get_or_create(uri=uri, dataset=ds)

        return ds, created
Exemple #17
0
 def test_get_iso19115_topic_category(self):
     item = 'Oceans'
     self.assertIsInstance(pti.get_iso19115_topic_category(item),
             collections.OrderedDict)