Esempio n. 1
0
    def __init__(self, filename, gdal_dataset, metadata, quartile=0, *args, **kwargs):

        if not 'ascat' in metadata.get('NC_GLOBAL#source', '').lower():
            raise WrongMapperError

        super(Mapper, self).__init__(filename, gdal_dataset, metadata, quartile=quartile, *args, **kwargs)

        lat = self.dataset.GetRasterBand(self._latitude_band_number(gdal_dataset)).ReadAsArray()
        lon = self.dataset.GetRasterBand(self._longitude_band_number(gdal_dataset)).ReadAsArray()
        lon = ScatterometryMapper.shift_longitudes(lon)
        self.set_gcps(lon, lat, gdal_dataset)

        # Get dictionary describing the instrument and platform according to
        # the GCMD keywords
        ii = pti.get_gcmd_instrument('ascat')
        pp = pti.get_gcmd_platform(metadata['NC_GLOBAL#source'].split(' ')[0])
        provider = pti.get_gcmd_provider(re.split('[^a-zA-Z]',
            metadata['NC_GLOBAL#institution'])[0])

        # TODO: Validate that the found instrument and platform are indeed what
        # we want....

        self.dataset.SetMetadataItem('instrument', json.dumps(ii))
        self.dataset.SetMetadataItem('platform', json.dumps(pp))
        self.dataset.SetMetadataItem('data_center', json.dumps(provider))
        self.dataset.SetMetadataItem('entry_title', metadata['NC_GLOBAL#title'])
        self.dataset.SetMetadataItem('ISO_topic_category',
                json.dumps(pti.get_iso19115_topic_category('Oceans')))
Esempio n. 2
0
    def __init__(self,
                 filename,
                 gdal_dataset,
                 gdal_metadata,
                 date=None,
                 ds=None,
                 bands=None,
                 cachedir=None,
                 *args,
                 **kwargs):

        self.test_mapper(filename)
        timestamp = date if date else self.get_date(filename)
        ds = Dataset(filename)
        try:
            self.srcDSProjection = NSR(ds.variables['projection_3'].proj4 +
                                       ' +to_meter=0.0174532925199 +wktext')
        except KeyError:
            raise WrongMapperError

        self.create_vrt(filename, gdal_dataset, gdal_metadata, timestamp, ds,
                        bands, cachedir)

        self.dataset.SetMetadataItem(
            'instrument', json.dumps(pti.get_gcmd_instrument('Computer')))
        self.dataset.SetMetadataItem(
            'platform', json.dumps(pti.get_gcmd_platform('MODELS')))
        self.dataset.SetMetadataItem(
            'Data Center', json.dumps(pti.get_gcmd_provider('NO/MET')))
        self.dataset.SetMetadataItem('Entry Title', str(ds.getncattr('title')))
        self.dataset.SetMetadataItem(
            'Entry Title',
            json.dumps(pti.get_iso19115_topic_category('Oceans')))
        self.dataset.SetMetadataItem(
            'gcmd_location', json.dumps(pti.get_gcmd_location('sea surface')))
Esempio n. 3
0
    def _init_empty(self, manifest_data, annotation_data):
        """ Fast initialization from minimum of information

        Parameters
        ----------
        manifest_data : dict
            data from the manifest file (time_coverage_start, etc)
        annotation_data : dict
            data from annotation file (longitude, latitude, x_size, etc)

        Note
        ----
            Calls VRT.__init__, Adds GCPs, metadata
        """
        # init empty dataset
        super(Mapper, self).__init__(annotation_data['x_size'], annotation_data['y_size'])
        # add GCPs from (corrected) geolocation data
        gcps = Mapper.create_gcps(annotation_data['longitude'],
                                  annotation_data['latitude'],
                                  annotation_data['height'],
                                  annotation_data['pixel'],
                                  annotation_data['line'])
        self.dataset.SetGCPs(gcps, NSR().wkt)
        # set metadata
        self.dataset.SetMetadataItem('time_coverage_start', manifest_data['time_coverage_start'])
        self.dataset.SetMetadataItem('time_coverage_end', manifest_data['time_coverage_end'])
        platform_name = manifest_data['platform_family_name'] + manifest_data['platform_number']
        self.dataset.SetMetadataItem('platform', json.dumps(pti.get_gcmd_platform(platform_name)))
        self.dataset.SetMetadataItem('instrument', json.dumps(pti.get_gcmd_instrument('SAR')))
        self.dataset.SetMetadataItem('entry_title', platform_name + ' SAR')
        self.dataset.SetMetadataItem('data_center', json.dumps(pti.get_gcmd_provider('ESA/EO')))
        self.dataset.SetMetadataItem('iso_topic_category', json.dumps(pti.get_iso19115_topic_category('Oceans')))
        self.dataset.SetMetadataItem('summary', platform_name + ' SAR data')
        self.dataset.FlushCache()
Esempio n. 4
0
    def __init__(self, filename, gdal_dataset, metadata, quartile=0, *args, **kwargs):

        if not 'ascat' in metadata.get('NC_GLOBAL#source', '').lower():
            raise WrongMapperError

        super(Mapper, self).__init__(filename, gdal_dataset, metadata, quartile=quartile, *args, **kwargs)

        lat = self.dataset.GetRasterBand(self._latitude_band_number(gdal_dataset)).ReadAsArray()
        lon = self.dataset.GetRasterBand(self._longitude_band_number(gdal_dataset)).ReadAsArray()
        lon = ScatterometryMapper.shift_longitudes(lon)
        self.set_gcps(lon, lat, gdal_dataset)

        # Get dictionary describing the instrument and platform according to
        # the GCMD keywords
        ii = pti.get_gcmd_instrument('ascat')
        pp = pti.get_gcmd_platform(metadata['NC_GLOBAL#source'].split(' ')[0])
        provider = pti.get_gcmd_provider(re.split('[^a-zA-Z]',
            metadata['NC_GLOBAL#institution'])[0])

        # TODO: Validate that the found instrument and platform are indeed what
        # we want....

        self.dataset.SetMetadataItem('instrument', json.dumps(ii))
        self.dataset.SetMetadataItem('platform', json.dumps(pp))
        self.dataset.SetMetadataItem('data_center', json.dumps(provider))
        self.dataset.SetMetadataItem('entry_title', metadata['NC_GLOBAL#title'])
        self.dataset.SetMetadataItem('ISO_topic_category',
                json.dumps(pti.get_iso19115_topic_category('Oceans')))
Esempio n. 5
0
    def set_gcmd_dif_keywords(self):
        mditem = 'entry_title'
        if not self.dataset.GetMetadataItem(mditem):
            self.dataset.SetMetadataItem(mditem, self.input_filename)
        mditem = 'data_center'
        if not self.dataset.GetMetadataItem(mditem):
            self.dataset.SetMetadataItem(
                mditem, json.dumps(pti.get_gcmd_provider('NO/MET')))
        mditem = 'ISO_topic_category'
        if not self.dataset.GetMetadataItem(mditem):
            self.dataset.SetMetadataItem(
                mditem,
                pti.get_iso19115_topic_category(
                    'Imagery/Base Maps/Earth Cover')['iso_topic_category'])

        mm = pti.get_gcmd_instrument('sar')
        if self.ds.MISSION_ID == 'S1A':
            ee = pti.get_gcmd_platform('sentinel-1a')
        else:
            ee = pti.get_gcmd_platform('sentinel-1b')
        self.dataset.SetMetadataItem('instrument', json.dumps(mm))
        self.dataset.SetMetadataItem('platform', json.dumps(ee))

        self.dataset.SetMetadataItem(
            'time_coverage_start',
            self.dataset.GetMetadataItem('ACQUISITION_START_TIME'))
        self.dataset.SetMetadataItem(
            'time_coverage_end',
            self.dataset.GetMetadataItem('ACQUISITION_STOP_TIME'))
Esempio n. 6
0
    def __init__(self, filename, gdal_dataset, metadata, quartile=0, *args, **kwargs):

        if not 'quikscat' in metadata.get('NC_GLOBAL#source', '').lower():
            raise WrongMapperError

        super(Mapper, self).__init__(filename, gdal_dataset, metadata, quartile=quartile, *args, **kwargs)

        lat = self.dataset.GetRasterBand(self._latitude_band_number(gdal_dataset)).ReadAsArray()
        lon = self.dataset.GetRasterBand(self._longitude_band_number(gdal_dataset)).ReadAsArray()
        lon = ScatterometryMapper.shift_longitudes(lon)
        self.set_gcps(lon, lat, gdal_dataset)

        # Get dictionary describing the instrument and platform according to
        # the GCMD keywords
        mm = pti.get_gcmd_instrument('seawinds')
        ee = pti.get_gcmd_platform('quikscat')
        provider = metadata['NC_GLOBAL#institution']
        if provider.lower()=='jpl':
            provider = 'NASA/JPL/QUIKSCAT'
        provider = pti.get_gcmd_provider(provider)

        self.dataset.SetMetadataItem('instrument', json.dumps(mm))
        self.dataset.SetMetadataItem('platform', json.dumps(ee))
        self.dataset.SetMetadataItem('data_center', json.dumps(provider))
        self.dataset.SetMetadataItem('entry_title', metadata['NC_GLOBAL#title'])
        self.dataset.SetMetadataItem('ISO_topic_category',
                json.dumps(pti.get_iso19115_topic_category('Oceans')))
Esempio n. 7
0
    def __init__(self, filename, gdal_dataset, metadata, quartile=0, *args, **kwargs):

        if not 'quikscat' in metadata.get('NC_GLOBAL#source', '').lower():
            raise WrongMapperError

        super(Mapper, self).__init__(filename, gdal_dataset, metadata, quartile=quartile, *args, **kwargs)

        lat = self.dataset.GetRasterBand(self._latitude_band_number(gdal_dataset)).ReadAsArray()
        lon = self.dataset.GetRasterBand(self._longitude_band_number(gdal_dataset)).ReadAsArray()
        lon = ScatterometryMapper.shift_longitudes(lon)
        self.set_gcps(lon, lat, gdal_dataset)

        # Get dictionary describing the instrument and platform according to
        # the GCMD keywords
        mm = pti.get_gcmd_instrument('seawinds')
        ee = pti.get_gcmd_platform('quikscat')
        provider = metadata['NC_GLOBAL#institution']
        if provider.lower()=='jpl':
            provider = 'NASA/JPL/QUIKSCAT'
        provider = pti.get_gcmd_provider(provider)

        self.dataset.SetMetadataItem('instrument', json.dumps(mm))
        self.dataset.SetMetadataItem('platform', json.dumps(ee))
        self.dataset.SetMetadataItem('data_center', json.dumps(provider))
        self.dataset.SetMetadataItem('entry_title', metadata['NC_GLOBAL#title'])
        self.dataset.SetMetadataItem('ISO_topic_category',
                json.dumps(pti.get_iso19115_topic_category('Oceans')))
    def _init_empty(self, manifest_data, annotation_data):
        """ Fast initialization from minimum of information

        Parameters
        ----------
        manifest_data : dict
            data from the manifest file (time_coverage_start, etc)
        annotation_data : dict
            data from annotation file (longitude, latitude, x_size, etc)

        Note
        ----
            Calls VRT.__init__, Adds GCPs, metadata
        """
        # init empty dataset
        super(Mapper, self).__init__(annotation_data['x_size'],
                                     annotation_data['y_size'])
        # add GCPs from (corrected) geolocation data
        gcps = Mapper.create_gcps(annotation_data['longitude'],
                                  annotation_data['latitude'],
                                  annotation_data['height'],
                                  annotation_data['pixel'],
                                  annotation_data['line'])
        self.dataset.SetGCPs(gcps, NSR().wkt)
        # set metadata
        self.dataset.SetMetadataItem('time_coverage_start',
                                     manifest_data['time_coverage_start'])
        self.dataset.SetMetadataItem('time_coverage_end',
                                     manifest_data['time_coverage_end'])
        platform_name = manifest_data['platform_family_name'] + manifest_data[
            'platform_number']
        self.dataset.SetMetadataItem(
            'platform', json.dumps(pti.get_gcmd_platform(platform_name)))
        self.dataset.SetMetadataItem(
            'instrument', json.dumps(pti.get_gcmd_instrument('SAR')))
        self.dataset.SetMetadataItem('entry_title', platform_name + ' SAR')
        self.dataset.SetMetadataItem(
            'data_center', json.dumps(pti.get_gcmd_provider('ESA/EO')))
        self.dataset.SetMetadataItem(
            'iso_topic_category',
            json.dumps(pti.get_iso19115_topic_category('Oceans')))
        self.dataset.SetMetadataItem('summary', platform_name + ' SAR data')
        self.dataset.FlushCache()
Esempio n. 9
0
    def __init__(self, filename, gdal_dataset, gdal_metadata, date=None,
                 ds=None, bands=None, cachedir=None, *args, **kwargs):

        self.test_mapper(filename)
        timestamp = date if date else self.get_date(filename)
        ds = Dataset(filename)
        self.create_vrt(filename, gdal_dataset, gdal_metadata, timestamp, ds, bands, cachedir)
        self.dataset.SetMetadataItem('entry_title', str(ds.getncattr('title')))
        self.dataset.SetMetadataItem('data_center', json.dumps(pti.get_gcmd_provider('UK/MOD/MET')))
        self.dataset.SetMetadataItem('ISO_topic_category',
                pti.get_iso19115_topic_category('oceans')['iso_topic_category'])
        self.dataset.SetMetadataItem('gcmd_location', json.dumps(pti.get_gcmd_location('sea surface')))

        #mm = pti.get_gcmd_instrument('amsr-e')
        #ee = pti.get_gcmd_platform('aqua')
        #self.dataset.SetMetadataItem('instrument', json.dumps(mm))
        #self.dataset.SetMetadataItem('platform', json.dumps(ee))
        self.dataset.SetMetadataItem('platform/instrument',
                json.dumps(self.get_platform_and_instrument_list(ds)))
Esempio n. 10
0
    def __init__(self,
                 filename,
                 gdal_dataset,
                 gdal_metadata,
                 date=None,
                 ds=None,
                 bands=None,
                 cachedir=None,
                 *args,
                 **kwargs):

        self.test_mapper(filename)
        timestamp = date if date else self.get_date(filename)

        self.create_vrt(filename, gdal_dataset, gdal_metadata, timestamp, ds,
                        bands, cachedir)

        mditem = 'entry_title'
        if not self.dataset.GetMetadataItem(mditem):
            try:
                self.dataset.SetMetadataItem(mditem,
                                             str(self.ds.getncattr('title')))
            except AttributeError:
                self.dataset.SetMetadataItem(mditem, filename)
        mditem = 'data_center'
        if not self.dataset.GetMetadataItem(mditem):
            self.dataset.SetMetadataItem(
                'data_center', json.dumps(pti.get_gcmd_provider('NO/MET')))
        mditem = 'ISO_topic_category'
        if not self.dataset.GetMetadataItem(mditem):
            self.dataset.SetMetadataItem(
                mditem,
                pti.get_iso19115_topic_category(
                    'Imagery/Base Maps/Earth Cover')['iso_topic_category'])

        mm = pti.get_gcmd_instrument('multi-spectral')
        ee = pti.get_gcmd_platform('sentinel-2')
        self.dataset.SetMetadataItem('instrument', json.dumps(mm))
        self.dataset.SetMetadataItem('platform', json.dumps(ee))
Esempio n. 11
0
    def __init__(self, filename, gdal_dataset, gdal_metadata, GCP_COUNT=10, timestamp=None, **kwargs):
        filename_name = os.path.split(filename)[-1].split('.')[0]
        # Check if correct mapper
        correct_mapper = False 
        for location in self.SUPPORTED_LOCATIONS:
            # If it matches with one of locateions break the loop and flag True
            if filename_name.startswith(location):
                correct_mapper = True
                break
        if not correct_mapper:
            raise WrongMapperError

        # Import NetCDF4 dataset
        nc_dataset = Dataset(filename)
        # Define projection (depending on the HFR)
        if nc_dataset.getncattr('site') == 'TORU':
            proj4 = '+proj=utm +zone=32 +ellps=WGS84 +datum=WGS84 +units=m +no_defs'
            GRID_PX_SIZE = 1500 # Final raster px size in meters
        elif nc_dataset.getncattr('site') == 'FRUH':
            proj4 = '+proj=utm +zone=34 +ellps=WGS84 +datum=WGS84 +units=m +no_defs'
            GRID_PX_SIZE = 5000 # Final raster px size in meters
        elif nc_dataset.getncattr('site') == 'BERL':
            proj4 = '+proj=utm +zone=35 +ellps=WGS84 +datum=WGS84 +units=m +no_defs'
            GRID_PX_SIZE = 5000 # Final raster px size in meters
        else:
            raise WrongMapperError

        srs = osr.SpatialReference()
        srs.ImportFromProj4(proj4)
        projection = srs.ExportToWkt()
        # Get x grid and y grid
        x_grd, y_grd = self.create_linear_grid(nc_dataset['x'][:], nc_dataset['y'][:], GRID_PX_SIZE)
        raster_x_size, raster_y_size = x_grd.shape
        # Define geotransform
        geotransform = (x_grd.min(), GRID_PX_SIZE, 0.0, y_grd.max(), 0.0, GRID_PX_SIZE * -1)
        # Define x and y size
        self._init_from_dataset_params(raster_x_size, raster_y_size, geotransform, projection)
        # If required timestamp was not specified then extract date from filename and use first time
        if timestamp is None:
            timestamp = self.date_from_filename(filename)
        # Comvert time info from the dataset to the datetime
        timestamps = num2date(nc_dataset['time'][:].data, nc_dataset['time'].units)
        # find band id for the required timestamp
        # Note add 1 because in gdal counting starts from 1 not from 0
        src_timestamp_id = np.where(timestamps == timestamp)[0][0] + 1
        # Iterate through all subdatasets and bands to the dataset
        for subdataset in gdal_dataset.GetSubDatasets():
            # Get name of subdataset
            subdataset_name = subdataset[0].split(':')[2]
            # Check if the subdataset in the accepted 3D vars list
            if subdataset_name not in self.BAND_NAMES:
                continue
            gdal_subdataset = gdal.Open(subdataset[0])
            # need to be float for the nan replasement
            band_data = gdal_subdataset.GetRasterBand(int(src_timestamp_id)).ReadAsArray().astype('float')
            # remove fill value (replace with nan)
            fill_value = int(gdal_subdataset.GetMetadata_Dict()['#'.join([subdataset_name, '_FillValue'])])
            band_data[band_data == fill_value] = np.nan
            # Interpolate data on the regular grid
            band_grid_data = self.band2grid((nc_dataset['x'][:], nc_dataset['y'][:]), 
                                             band_data, (x_grd, y_grd))
            # Create VRT ffor the regridded data
            band_vrt = VRT.from_array(band_grid_data)
            # Add VRT to the list of all dataset vrts
            self.band_vrts[subdataset_name + 'VRT'] = band_vrt
            # Add band to the dataset
            src = {'SourceFilename': self.band_vrts[subdataset_name + 'VRT'].filename, 
                   'SourceBand': 1}
            # Add band specific metadata
            dst = {'name': subdataset_name}
            for key in gdal_subdataset.GetMetadata_Dict().keys():
                if key.startswith(subdataset_name):
                    clean_metadata_name = key.split('#')[1]
                    dst[clean_metadata_name] = gdal_subdataset.GetMetadata_Dict()[key]
            # Create band
            self.create_band(src, dst)
            self.dataset.FlushCache()

        # Set GCMD metadata
        self.dataset.SetMetadataItem('instrument', json.dumps(pti.get_gcmd_instrument('SCR-HF')))
        self.dataset.SetMetadataItem('platform', json.dumps(pti.get_gcmd_platform('CODAR SeaSonde')))
        self.dataset.SetMetadataItem('Data Center', json.dumps(pti.get_gcmd_provider('NO/MET')))
        self.dataset.SetMetadataItem('Entry Title', 'Near-Real Time Surface Ocean Radial Velocity')
        self.dataset.SetMetadataItem('gcmd_location',json.dumps(pti.get_gcmd_location('NORTH SEA')))
        # Set time coverage metadata
        self.dataset.SetMetadataItem('time_coverage_start', timestamp.isoformat())
        self.dataset.SetMetadataItem('time_coverage_end',
                                     (timestamp + timedelta(minutes=59, seconds=59)).isoformat())
        # Set NetCDF dataset metadata
        for key, value in gdal_dataset.GetMetadata_Dict().items():
            self.dataset.SetMetadataItem(key.split('#')[1], value)
Esempio n. 12
0
    def get_or_create(self,
                      uri,
                      n_points=10,
                      uri_filter_args=None,
                      uri_service_name=FILE_SERVICE_NAME,
                      uri_service_type=LOCAL_FILE_SERVICE,
                      *args,
                      **kwargs):
        """ Create dataset and corresponding metadata

        Parameters:
        ----------
            uri : str
                  URI to file or stream openable by Nansat
            n_points : int
                  Number of border points (default is 10)
            uri_filter_args : dict
                Extra DatasetURI filter arguments if several datasets can refer to the same URI
            uri_service_name : str
                name of the service which is used  ('dapService', 'fileService', 'http' or 'wms')
            uri_service_type : str
                type of the service which is used  ('OPENDAP', 'local', 'HTTPServer' or 'WMS')

        Returns:
        -------
            dataset and flag
        """
        if not uri_filter_args:
            uri_filter_args = {}

        # Validate uri - this should raise an exception if the uri doesn't point to a valid
        # file or stream
        validate_uri(uri)

        # Several datasets can refer to the same uri (e.g., scatterometers and svp drifters), so we
        # need to pass uri_filter_args
        uris = DatasetURI.objects.filter(uri=uri, **uri_filter_args)
        if len(uris) > 0:
            return uris[0].dataset, False

        # Open file with Nansat
        n = Nansat(nansat_filename(uri), **kwargs)

        # get metadata from Nansat and get objects from vocabularies
        n_metadata = n.get_metadata()

        entry_id = n_metadata.get('entry_id', None)
        # set compulsory metadata (source)
        platform, _ = Platform.objects.get_or_create(
            json.loads(n_metadata['platform']))
        instrument, _ = Instrument.objects.get_or_create(
            json.loads(n_metadata['instrument']))
        specs = n_metadata.get('specs', '')
        source, _ = Source.objects.get_or_create(platform=platform,
                                                 instrument=instrument,
                                                 specs=specs)

        default_char_fields = {
            # Adding NERSC_ in front of the id violates the string representation of the uuid
            #'entry_id': lambda: 'NERSC_' + str(uuid.uuid4()),
            'entry_id': lambda: str(uuid.uuid4()),
            'entry_title': lambda: 'NONE',
            'summary': lambda: 'NONE',
        }

        # set optional CharField metadata from Nansat or from default_char_fields
        options = {}
        try:
            existing_ds = Dataset.objects.get(entry_id=entry_id)
        except Dataset.DoesNotExist:
            existing_ds = None
        for name in default_char_fields:
            if name not in n_metadata:
                warnings.warn('%s is not provided in Nansat metadata!' % name)
                # prevent overwriting of existing values by defaults
                if existing_ds:
                    options[name] = existing_ds.__getattribute__(name)
                else:
                    options[name] = default_char_fields[name]()
            else:
                options[name] = n_metadata[name]

        default_foreign_keys = {
            'gcmd_location': {
                'model': Location,
                'value': pti.get_gcmd_location('SEA SURFACE')
            },
            'data_center': {
                'model': DataCenter,
                'value': pti.get_gcmd_provider('NERSC')
            },
            'ISO_topic_category': {
                'model': ISOTopicCategory,
                'value': pti.get_iso19115_topic_category('Oceans')
            },
        }

        # set optional ForeignKey metadata from Nansat or from default_foreign_keys
        for name in default_foreign_keys:
            value = default_foreign_keys[name]['value']
            model = default_foreign_keys[name]['model']
            if name not in n_metadata:
                warnings.warn('%s is not provided in Nansat metadata!' % name)
            else:
                try:
                    value = json.loads(n_metadata[name])
                except:
                    warnings.warn(
                        '%s value of %s  metadata provided in Nansat is wrong!'
                        % (n_metadata[name], name))
            if existing_ds:
                options[name] = existing_ds.__getattribute__(name)
            else:
                options[name], _ = model.objects.get_or_create(value)

        # Find coverage to set number of points in the geolocation
        if len(n.vrt.dataset.GetGCPs()) > 0:
            n.reproject_gcps()
        geolocation = GeographicLocation.objects.get_or_create(
            geometry=WKTReader().read(n.get_border_wkt(nPoints=n_points)))[0]

        # create dataset
        # - the get_or_create method should use get_or_create here as well,
        #   or its name should be changed - see issue #127
        ds, created = Dataset.objects.update_or_create(
            entry_id=options['entry_id'],
            defaults={
                'time_coverage_start': n.get_metadata('time_coverage_start'),
                'time_coverage_end': n.get_metadata('time_coverage_end'),
                'source': source,
                'geographic_location': geolocation,
                'gcmd_location': options["gcmd_location"],
                'ISO_topic_category': options["ISO_topic_category"],
                "data_center": options["data_center"],
                'entry_title': options["entry_title"],
                'summary': options["summary"]
            })

        # create parameter
        all_band_meta = n.bands()
        for band_id in range(1, len(all_band_meta) + 1):
            band_meta = all_band_meta[band_id]
            standard_name = band_meta.get('standard_name', None)
            short_name = band_meta.get('short_name', None)
            units = band_meta.get('units', None)
            if standard_name in ['latitude', 'longitude', None]:
                continue
            params = Parameter.objects.filter(standard_name=standard_name)
            if params.count() > 1 and short_name is not None:
                params = params.filter(short_name=short_name)
            if params.count() > 1 and units is not None:
                params = params.filter(units=units)
            if params.count() >= 1:
                ds.parameters.add(params[0])

        # create dataset URI
        DatasetURI.objects.get_or_create(name=uri_service_name,
                                         service=uri_service_type,
                                         uri=uri,
                                         dataset=ds)

        return ds, created
Esempio n. 13
0
 def get_provider(self, raw_metadata):
     return pti.get_gcmd_provider('UC-LONDON/CPOM')
 def test_get_gcmd_provider(self):
     item = 'NERSC'
     self.assertIsInstance(pti.get_gcmd_provider(item),
             collections.OrderedDict)
Esempio n. 15
0
    def export2netcdf(self, n, ds, history_message=''):

        if not history_message:
            history_message = 'Export to netCDF [geospaas sar_doppler version %s]' % os.getenv(
                'GEOSPAAS_SAR_DOPPLER_VERSION', 'dev')

        ii = int(n.get_metadata('subswath'))

        date_created = datetime.now(timezone.utc)

        fn = self.nc_name(ds, ii)

        original = Nansat(n.get_metadata('Originating file'), subswath=ii)
        metadata = original.get_metadata()

        def pretty_print_gcmd_keywords(kw):
            retval = ''
            value_prev = ''
            for key, value in kw.items():
                if value:
                    if value_prev:
                        retval += ' > '
                    retval += value
                    value_prev = value
            return retval

        # Set global metadata
        metadata['Conventions'] = metadata['Conventions'] + ', ACDD-1.3'
        # id - the ID from the database should be registered in the file if it is not already there
        try:
            entry_id = n.get_metadata('entry_id')
        except ValueError:
            n.set_metadata(key='entry_id', value=ds.entry_id)
        try:
            id = n.get_metadata('id')
        except ValueError:
            n.set_metadata(key='id', value=ds.entry_id)
        metadata['date_created'] = date_created.strftime('%Y-%m-%d')
        metadata['date_created_type'] = 'Created'
        metadata['date_metadata_modified'] = date_created.strftime('%Y-%m-%d')
        metadata['processing_level'] = 'Scientific'
        metadata['creator_role'] = 'Investigator'
        metadata['creator_name'] = 'Morten Wergeland Hansen'
        metadata['creator_email'] = '*****@*****.**'
        metadata['creator_institution'] = pretty_print_gcmd_keywords(
            pti.get_gcmd_provider('NO/MET'))

        metadata[
            'project'] = 'Norwegian Space Agency project JOP.06.20.2: Reprocessing and analysis of historical data for future operationalization of Doppler shifts from SAR'
        metadata['publisher_name'] = 'Morten Wergeland Hansen'
        metadata['publisher_url'] = 'https://www.met.no/'
        metadata['publisher_email'] = '*****@*****.**'

        metadata['references'] = 'https://github.com/mortenwh/openwind'

        metadata['dataset_production_status'] = 'Complete'

        # Get image boundary
        lon, lat = n.get_border()
        boundary = 'POLYGON (('
        for la, lo in list(zip(lat, lon)):
            boundary += '%.2f %.2f, ' % (la, lo)
        boundary = boundary[:-2] + '))'
        # Set bounds as (lat,lon) following ACDD convention and EPSG:4326
        metadata['geospatial_bounds'] = boundary
        metadata['geospatial_bounds_crs'] = 'EPSG:4326'

        # history
        try:
            history = n.get_metadata('history')
        except ValueError:
            metadata['history'] = date_created.isoformat(
            ) + ': ' + history_message
        else:
            metadata['history'] = history + '\n' + date_created.isoformat(
            ) + ': ' + history_message

        # Set metadata from dict (export2thredds could take it as input..)
        for key, val in metadata.items():
            n.set_metadata(key=key, value=val)

        # Export data to netcdf
        logging.info('Exporting %s to %s (subswath %d)' %
                     (n.filename, fn, ii + 1))
        n.export(filename=fn)
        #ww.export2thredds(thredds_fn, mask_name='swathmask', metadata=metadata, no_mask_value=1)

        # Clean netcdf attributes
        history = n.get_metadata('history')
        self.clean_nc_attrs(fn, history)

        # Add netcdf uri to DatasetURIs
        ncuri = 'file://localhost' + fn
        new_uri, created = DatasetURI.objects.get_or_create(uri=ncuri,
                                                            dataset=ds)
        connection.close()

        return new_uri, created
    def _get_normalized_attributes(self, dataset_info, *args, **kwargs):
        """Gets dataset attributes using nansat"""
        normalized_attributes = {}
        n_points = int(kwargs.get('n_points', 10))
        nansat_options = kwargs.get('nansat_options', {})
        url_scheme = urlparse(dataset_info).scheme
        if not 'http' in url_scheme and not 'ftp' in url_scheme:
            normalized_attributes['geospaas_service_name'] = FILE_SERVICE_NAME
            normalized_attributes['geospaas_service'] = LOCAL_FILE_SERVICE
        elif 'http' in url_scheme and not 'ftp' in url_scheme:
            normalized_attributes['geospaas_service_name'] = DAP_SERVICE_NAME
            normalized_attributes['geospaas_service'] = OPENDAP_SERVICE
        elif 'ftp' in url_scheme:
            raise ValueError(
                f"Can't ingest '{dataset_info}': nansat can't open remote ftp files"
            )

        # Open file with Nansat
        nansat_object = Nansat(nansat_filename(dataset_info),
                               log_level=self.LOGGER.getEffectiveLevel(),
                               **nansat_options)

        # get metadata from Nansat and get objects from vocabularies
        n_metadata = nansat_object.get_metadata()

        # set compulsory metadata (source)
        normalized_attributes['entry_title'] = n_metadata.get(
            'entry_title', 'NONE')
        normalized_attributes['summary'] = n_metadata.get('summary', 'NONE')
        normalized_attributes['time_coverage_start'] = dateutil.parser.parse(
            n_metadata['time_coverage_start']).replace(tzinfo=tzutc())
        normalized_attributes['time_coverage_end'] = dateutil.parser.parse(
            n_metadata['time_coverage_end']).replace(tzinfo=tzutc())
        normalized_attributes['platform'] = json.loads(n_metadata['platform'])
        normalized_attributes['instrument'] = json.loads(
            n_metadata['instrument'])
        normalized_attributes['specs'] = n_metadata.get('specs', '')
        normalized_attributes['entry_id'] = n_metadata.get(
            'entry_id', 'NERSC_' + str(uuid.uuid4()))

        # set optional ForeignKey metadata from Nansat or from defaults
        normalized_attributes['gcmd_location'] = n_metadata.get(
            'gcmd_location', pti.get_gcmd_location('SEA SURFACE'))
        normalized_attributes['provider'] = pti.get_gcmd_provider(
            n_metadata.get('provider', 'NERSC'))
        normalized_attributes['iso_topic_category'] = n_metadata.get(
            'ISO_topic_category', pti.get_iso19115_topic_category('Oceans'))

        # Find coverage to set number of points in the geolocation
        if nansat_object.vrt.dataset.GetGCPs():
            nansat_object.reproject_gcps()
        normalized_attributes['location_geometry'] = GEOSGeometry(
            nansat_object.get_border_wkt(n_points=n_points), srid=4326)

        json_dumped_dataset_parameters = n_metadata.get(
            'dataset_parameters', None)
        if json_dumped_dataset_parameters:
            json_loads_result = json.loads(json_dumped_dataset_parameters)
            if isinstance(json_loads_result, list):
                normalized_attributes['dataset_parameters'] = [
                    get_cf_or_wkv_standard_name(dataset_param)
                    for dataset_param in json_loads_result
                ]
            else:
                raise TypeError(
                    f"Can't ingest '{dataset_info}': the 'dataset_parameters' section of the "
                    "metadata returned by nansat is not a JSON list")
        else:
            normalized_attributes['dataset_parameters'] = []

        return normalized_attributes
Esempio n. 17
0
 def get_provider(self, raw_metadata):
     return pti.get_gcmd_provider('CSA')
Esempio n. 18
0
    def process(self, uri, force=False, *args, **kwargs):
        fn = 'WIND_'+os.path.basename(uri)
        if DatasetURI.objects.filter(uri__contains=fn) and not force:
            wds = Dataset.objects.filter(dataseturi__uri__contains=fn)[0]
            return wds, False

        try:
            w = wind_from_sar_and_arome_forecast(uri)
        except (TooHighResolutionError, PolarizationError, ObjectDoesNotExist) as e:
            if type(e)==Dataset.DoesNotExist:
                warnings.warn(uri+': '+e.args[0])
            else:
                # ObjectDoesNotExist could happen if there is no overlap between SAR and model
                warnings.warn(e.file + ': ' + e.msg)
            return None, False

        metadata = w.get_metadata()

        # Direct reprojection fails - gdal can't read the bands if we do w.reproject...
        # Workaround: Export wind to temporary file
        #tmp_filename = os.path.join(settings.PRODUCTS_ROOT,'TMP_WIND_'+os.path.basename(uri))
        fd, tmp_filename = tempfile.mkstemp(suffix='.nc')
        os.close(fd) # Just in case - see https://www.logilab.org/blogentry/17873
        w.export(tmp_filename)

        # Read temporary file
        ww = Nansat(tmp_filename)

        # Reproject
        lon, lat = ww.get_geolocation_grids()
        #lon, lat = w.get_geolocation_grids()
        srs = '+proj=stere +datum=WGS84 +ellps=WGS84 +lat_0=%.2f +lon_0=%.2f +no_defs'%(np.mean(lat),np.mean(lon))
        xmin, xmax, ymin, ymax = -haversine(np.mean(lon),np.mean(lat),np.min(lon),np.mean(lat)), \
                    haversine(np.mean(lon),np.mean(lat),np.max(lon),np.mean(lat)), \
                    -haversine(np.mean(lon),np.mean(lat),np.mean(lon),np.min(lat)), \
                    haversine(np.mean(lon),np.mean(lat),np.mean(lon),np.max(lat))
        ext = '-te %d %d %d %d -tr 500 500' %(xmin-10000, ymin-10000, xmax+10000, ymax+10000)
        d = Domain(srs, ext)
        ww.reproject(d, tps=True)
        #w.reproject(d, tps=True)

        # Set global metadata
        metadata['data_center'] = json.dumps(pti.get_gcmd_provider(kwargs.pop('data_center', '')))
        metadata['naming_authority'] = kwargs.pop('naming_authority', '')
        metadata['project'] = 'SIOS InfraNor'
        metadata['entry_title'] = 'Wind field from '+os.path.basename(uri)
        metadata.pop('file_creation_date')
        metadata['history'] = metadata['history'] + ' ' + timezone.now().isoformat() + \
                '. Calculated wind field from NRCS and Arome Arctic forecast wind directions.'
        metadata.pop('institution')
        metadata['keywords'] += ', ['
        for key, value in pti.get_gcmd_science_keyword('U/V WIND COMPONENTS').items():
            if value:
                metadata['keywords'] += value + ', '
        metadata['keywords'] += ']'
        metadata.pop('LINE_SPACING')
        metadata.pop('PIXEL_SPACING')
        metadata['summary'] = 'Near surface (10m) wind from Arome Arctic forecast wind and ' + metadata['summary']
        metadata['title'] = 'Near surface wind from '+metadata['title']

	# Get or create data folder
        start_time = parse(metadata['time_coverage_start'])
        yfolder = os.path.join(settings.PRODUCTS_ROOT, '{:04d}'.format(start_time.year))
        mfolder = os.path.join(yfolder, '{:02d}'.format(start_time.month))
        dfolder = os.path.join(mfolder, '{:02d}'.format(start_time.day))
        if not os.path.isdir(yfolder):
            os.mkdir(yfolder)
        if not os.path.isdir(mfolder):
            os.mkdir(mfolder)
        if not os.path.isdir(dfolder):
            os.mkdir(dfolder)

        # Export
        thredds_fn = os.path.join(dfolder, fn)
        wind_uri = 'file://localhost' + thredds_fn
        ww.export2thredds(thredds_fn, mask_name='swathmask', metadata=metadata, no_mask_value=1)
        wds, cr = super(WindManager, self).get_or_create(wind_uri)
        
        os.unlink(tmp_filename)

        return wds, cr
Esempio n. 19
0
    def get_or_create(self,
                      uri,
                      n_points=10,
                      uri_filter_args=None,
                      *args,
                      **kwargs):
        ''' Create dataset and corresponding metadata

        Parameters:
        ----------
            uri : str
                  URI to file or stream openable by Nansat
            n_points : int
                  Number of border points (default is 10)
            uri_filter_args : dict
                Extra DatasetURI filter arguments if several datasets can refer to the same URI

        Returns:
        -------
            dataset and flag
        '''
        if not uri_filter_args:
            uri_filter_args = {}

        # Validate uri - this should raise an exception if the uri doesn't point to a valid
        # file or stream
        validate_uri(uri)

        # Several datasets can refer to the same uri (e.g., scatterometers and svp drifters), so we
        # need to pass uri_filter_args
        uris = DatasetURI.objects.filter(uri=uri, **uri_filter_args)
        if len(uris) > 0:
            return uris[0].dataset, False

        # Open file with Nansat
        n = Nansat(nansat_filename(uri), **kwargs)

        # get metadata from Nansat and get objects from vocabularies
        n_metadata = n.get_metadata()

        # set compulsory metadata (source)
        platform, _ = Platform.objects.get_or_create(
            json.loads(n_metadata['platform']))
        instrument, _ = Instrument.objects.get_or_create(
            json.loads(n_metadata['instrument']))
        specs = n_metadata.get('specs', '')
        source, _ = Source.objects.get_or_create(platform=platform,
                                                 instrument=instrument,
                                                 specs=specs)

        default_char_fields = {
            'entry_id': lambda: 'NERSC_' + str(uuid.uuid4()),
            'entry_title': lambda: 'NONE',
            'summary': lambda: 'NONE',
        }

        # set optional CharField metadata from Nansat or from default_char_fields
        options = {}
        for name in default_char_fields:
            if name not in n_metadata:
                warnings.warn('%s is not provided in Nansat metadata!' % name)
                options[name] = default_char_fields[name]()
            else:
                options[name] = n_metadata[name]

        default_foreign_keys = {
            'gcmd_location': {
                'model': Location,
                'value': pti.get_gcmd_location('SEA SURFACE')
            },
            'data_center': {
                'model': DataCenter,
                'value': pti.get_gcmd_provider('NERSC')
            },
            'ISO_topic_category': {
                'model': ISOTopicCategory,
                'value': pti.get_iso19115_topic_category('Oceans')
            },
        }

        # set optional ForeignKey metadata from Nansat or from default_foreign_keys
        for name in default_foreign_keys:
            value = default_foreign_keys[name]['value']
            model = default_foreign_keys[name]['model']
            if name not in n_metadata:
                warnings.warn('%s is not provided in Nansat metadata!' % name)
            else:
                try:
                    value = json.loads(n_metadata[name])
                except:
                    warnings.warn(
                        '%s value of %s  metadata provided in Nansat is wrong!'
                        % (n_metadata[name], name))
            options[name], _ = model.objects.get_or_create(value)

        # Find coverage to set number of points in the geolocation
        if len(n.vrt.dataset.GetGCPs()) > 0:
            n.reproject_gcps()
        geolocation = GeographicLocation.objects.get_or_create(
            geometry=WKTReader().read(n.get_border_wkt(nPoints=n_points)))[0]

        # create dataset
        ds, created = Dataset.objects.get_or_create(
            time_coverage_start=n.get_metadata('time_coverage_start'),
            time_coverage_end=n.get_metadata('time_coverage_end'),
            source=source,
            geographic_location=geolocation,
            **options)
        # create dataset URI
        ds_uri, _ = DatasetURI.objects.get_or_create(uri=uri, dataset=ds)

        return ds, created
Esempio n. 20
0
 def test_get_gcmd_provider(self):
     item = 'NERSC'
     self.assertIsInstance(pti.get_gcmd_provider(item),
             collections.OrderedDict)