def __init__(self, filename, gdal_dataset, metadata, quartile=0, *args, **kwargs): if not 'ascat' in metadata.get('NC_GLOBAL#source', '').lower(): raise WrongMapperError super(Mapper, self).__init__(filename, gdal_dataset, metadata, quartile=quartile, *args, **kwargs) lat = self.dataset.GetRasterBand(self._latitude_band_number(gdal_dataset)).ReadAsArray() lon = self.dataset.GetRasterBand(self._longitude_band_number(gdal_dataset)).ReadAsArray() lon = ScatterometryMapper.shift_longitudes(lon) self.set_gcps(lon, lat, gdal_dataset) # Get dictionary describing the instrument and platform according to # the GCMD keywords ii = pti.get_gcmd_instrument('ascat') pp = pti.get_gcmd_platform(metadata['NC_GLOBAL#source'].split(' ')[0]) provider = pti.get_gcmd_provider(re.split('[^a-zA-Z]', metadata['NC_GLOBAL#institution'])[0]) # TODO: Validate that the found instrument and platform are indeed what # we want.... self.dataset.SetMetadataItem('instrument', json.dumps(ii)) self.dataset.SetMetadataItem('platform', json.dumps(pp)) self.dataset.SetMetadataItem('data_center', json.dumps(provider)) self.dataset.SetMetadataItem('entry_title', metadata['NC_GLOBAL#title']) self.dataset.SetMetadataItem('ISO_topic_category', json.dumps(pti.get_iso19115_topic_category('Oceans')))
def __init__(self, filename, gdal_dataset, gdal_metadata, date=None, ds=None, bands=None, cachedir=None, *args, **kwargs): self.test_mapper(filename) timestamp = date if date else self.get_date(filename) ds = Dataset(filename) try: self.srcDSProjection = NSR(ds.variables['projection_3'].proj4 + ' +to_meter=0.0174532925199 +wktext') except KeyError: raise WrongMapperError self.create_vrt(filename, gdal_dataset, gdal_metadata, timestamp, ds, bands, cachedir) self.dataset.SetMetadataItem( 'instrument', json.dumps(pti.get_gcmd_instrument('Computer'))) self.dataset.SetMetadataItem( 'platform', json.dumps(pti.get_gcmd_platform('MODELS'))) self.dataset.SetMetadataItem( 'Data Center', json.dumps(pti.get_gcmd_provider('NO/MET'))) self.dataset.SetMetadataItem('Entry Title', str(ds.getncattr('title'))) self.dataset.SetMetadataItem( 'Entry Title', json.dumps(pti.get_iso19115_topic_category('Oceans'))) self.dataset.SetMetadataItem( 'gcmd_location', json.dumps(pti.get_gcmd_location('sea surface')))
def _init_empty(self, manifest_data, annotation_data): """ Fast initialization from minimum of information Parameters ---------- manifest_data : dict data from the manifest file (time_coverage_start, etc) annotation_data : dict data from annotation file (longitude, latitude, x_size, etc) Note ---- Calls VRT.__init__, Adds GCPs, metadata """ # init empty dataset super(Mapper, self).__init__(annotation_data['x_size'], annotation_data['y_size']) # add GCPs from (corrected) geolocation data gcps = Mapper.create_gcps(annotation_data['longitude'], annotation_data['latitude'], annotation_data['height'], annotation_data['pixel'], annotation_data['line']) self.dataset.SetGCPs(gcps, NSR().wkt) # set metadata self.dataset.SetMetadataItem('time_coverage_start', manifest_data['time_coverage_start']) self.dataset.SetMetadataItem('time_coverage_end', manifest_data['time_coverage_end']) platform_name = manifest_data['platform_family_name'] + manifest_data['platform_number'] self.dataset.SetMetadataItem('platform', json.dumps(pti.get_gcmd_platform(platform_name))) self.dataset.SetMetadataItem('instrument', json.dumps(pti.get_gcmd_instrument('SAR'))) self.dataset.SetMetadataItem('entry_title', platform_name + ' SAR') self.dataset.SetMetadataItem('data_center', json.dumps(pti.get_gcmd_provider('ESA/EO'))) self.dataset.SetMetadataItem('iso_topic_category', json.dumps(pti.get_iso19115_topic_category('Oceans'))) self.dataset.SetMetadataItem('summary', platform_name + ' SAR data') self.dataset.FlushCache()
def set_gcmd_dif_keywords(self): mditem = 'entry_title' if not self.dataset.GetMetadataItem(mditem): self.dataset.SetMetadataItem(mditem, self.input_filename) mditem = 'data_center' if not self.dataset.GetMetadataItem(mditem): self.dataset.SetMetadataItem( mditem, json.dumps(pti.get_gcmd_provider('NO/MET'))) mditem = 'ISO_topic_category' if not self.dataset.GetMetadataItem(mditem): self.dataset.SetMetadataItem( mditem, pti.get_iso19115_topic_category( 'Imagery/Base Maps/Earth Cover')['iso_topic_category']) mm = pti.get_gcmd_instrument('sar') if self.ds.MISSION_ID == 'S1A': ee = pti.get_gcmd_platform('sentinel-1a') else: ee = pti.get_gcmd_platform('sentinel-1b') self.dataset.SetMetadataItem('instrument', json.dumps(mm)) self.dataset.SetMetadataItem('platform', json.dumps(ee)) self.dataset.SetMetadataItem( 'time_coverage_start', self.dataset.GetMetadataItem('ACQUISITION_START_TIME')) self.dataset.SetMetadataItem( 'time_coverage_end', self.dataset.GetMetadataItem('ACQUISITION_STOP_TIME'))
def __init__(self, filename, gdal_dataset, metadata, quartile=0, *args, **kwargs): if not 'quikscat' in metadata.get('NC_GLOBAL#source', '').lower(): raise WrongMapperError super(Mapper, self).__init__(filename, gdal_dataset, metadata, quartile=quartile, *args, **kwargs) lat = self.dataset.GetRasterBand(self._latitude_band_number(gdal_dataset)).ReadAsArray() lon = self.dataset.GetRasterBand(self._longitude_band_number(gdal_dataset)).ReadAsArray() lon = ScatterometryMapper.shift_longitudes(lon) self.set_gcps(lon, lat, gdal_dataset) # Get dictionary describing the instrument and platform according to # the GCMD keywords mm = pti.get_gcmd_instrument('seawinds') ee = pti.get_gcmd_platform('quikscat') provider = metadata['NC_GLOBAL#institution'] if provider.lower()=='jpl': provider = 'NASA/JPL/QUIKSCAT' provider = pti.get_gcmd_provider(provider) self.dataset.SetMetadataItem('instrument', json.dumps(mm)) self.dataset.SetMetadataItem('platform', json.dumps(ee)) self.dataset.SetMetadataItem('data_center', json.dumps(provider)) self.dataset.SetMetadataItem('entry_title', metadata['NC_GLOBAL#title']) self.dataset.SetMetadataItem('ISO_topic_category', json.dumps(pti.get_iso19115_topic_category('Oceans')))
def _init_empty(self, manifest_data, annotation_data): """ Fast initialization from minimum of information Parameters ---------- manifest_data : dict data from the manifest file (time_coverage_start, etc) annotation_data : dict data from annotation file (longitude, latitude, x_size, etc) Note ---- Calls VRT.__init__, Adds GCPs, metadata """ # init empty dataset super(Mapper, self).__init__(annotation_data['x_size'], annotation_data['y_size']) # add GCPs from (corrected) geolocation data gcps = Mapper.create_gcps(annotation_data['longitude'], annotation_data['latitude'], annotation_data['height'], annotation_data['pixel'], annotation_data['line']) self.dataset.SetGCPs(gcps, NSR().wkt) # set metadata self.dataset.SetMetadataItem('time_coverage_start', manifest_data['time_coverage_start']) self.dataset.SetMetadataItem('time_coverage_end', manifest_data['time_coverage_end']) platform_name = manifest_data['platform_family_name'] + manifest_data[ 'platform_number'] self.dataset.SetMetadataItem( 'platform', json.dumps(pti.get_gcmd_platform(platform_name))) self.dataset.SetMetadataItem( 'instrument', json.dumps(pti.get_gcmd_instrument('SAR'))) self.dataset.SetMetadataItem('entry_title', platform_name + ' SAR') self.dataset.SetMetadataItem( 'data_center', json.dumps(pti.get_gcmd_provider('ESA/EO'))) self.dataset.SetMetadataItem( 'iso_topic_category', json.dumps(pti.get_iso19115_topic_category('Oceans'))) self.dataset.SetMetadataItem('summary', platform_name + ' SAR data') self.dataset.FlushCache()
def __init__(self, filename, gdal_dataset, gdal_metadata, date=None, ds=None, bands=None, cachedir=None, *args, **kwargs): self.test_mapper(filename) timestamp = date if date else self.get_date(filename) ds = Dataset(filename) self.create_vrt(filename, gdal_dataset, gdal_metadata, timestamp, ds, bands, cachedir) self.dataset.SetMetadataItem('entry_title', str(ds.getncattr('title'))) self.dataset.SetMetadataItem('data_center', json.dumps(pti.get_gcmd_provider('UK/MOD/MET'))) self.dataset.SetMetadataItem('ISO_topic_category', pti.get_iso19115_topic_category('oceans')['iso_topic_category']) self.dataset.SetMetadataItem('gcmd_location', json.dumps(pti.get_gcmd_location('sea surface'))) #mm = pti.get_gcmd_instrument('amsr-e') #ee = pti.get_gcmd_platform('aqua') #self.dataset.SetMetadataItem('instrument', json.dumps(mm)) #self.dataset.SetMetadataItem('platform', json.dumps(ee)) self.dataset.SetMetadataItem('platform/instrument', json.dumps(self.get_platform_and_instrument_list(ds)))
def __init__(self, filename, gdal_dataset, gdal_metadata, date=None, ds=None, bands=None, cachedir=None, *args, **kwargs): self.test_mapper(filename) timestamp = date if date else self.get_date(filename) self.create_vrt(filename, gdal_dataset, gdal_metadata, timestamp, ds, bands, cachedir) mditem = 'entry_title' if not self.dataset.GetMetadataItem(mditem): try: self.dataset.SetMetadataItem(mditem, str(self.ds.getncattr('title'))) except AttributeError: self.dataset.SetMetadataItem(mditem, filename) mditem = 'data_center' if not self.dataset.GetMetadataItem(mditem): self.dataset.SetMetadataItem( 'data_center', json.dumps(pti.get_gcmd_provider('NO/MET'))) mditem = 'ISO_topic_category' if not self.dataset.GetMetadataItem(mditem): self.dataset.SetMetadataItem( mditem, pti.get_iso19115_topic_category( 'Imagery/Base Maps/Earth Cover')['iso_topic_category']) mm = pti.get_gcmd_instrument('multi-spectral') ee = pti.get_gcmd_platform('sentinel-2') self.dataset.SetMetadataItem('instrument', json.dumps(mm)) self.dataset.SetMetadataItem('platform', json.dumps(ee))
def __init__(self, filename, gdal_dataset, gdal_metadata, GCP_COUNT=10, timestamp=None, **kwargs): filename_name = os.path.split(filename)[-1].split('.')[0] # Check if correct mapper correct_mapper = False for location in self.SUPPORTED_LOCATIONS: # If it matches with one of locateions break the loop and flag True if filename_name.startswith(location): correct_mapper = True break if not correct_mapper: raise WrongMapperError # Import NetCDF4 dataset nc_dataset = Dataset(filename) # Define projection (depending on the HFR) if nc_dataset.getncattr('site') == 'TORU': proj4 = '+proj=utm +zone=32 +ellps=WGS84 +datum=WGS84 +units=m +no_defs' GRID_PX_SIZE = 1500 # Final raster px size in meters elif nc_dataset.getncattr('site') == 'FRUH': proj4 = '+proj=utm +zone=34 +ellps=WGS84 +datum=WGS84 +units=m +no_defs' GRID_PX_SIZE = 5000 # Final raster px size in meters elif nc_dataset.getncattr('site') == 'BERL': proj4 = '+proj=utm +zone=35 +ellps=WGS84 +datum=WGS84 +units=m +no_defs' GRID_PX_SIZE = 5000 # Final raster px size in meters else: raise WrongMapperError srs = osr.SpatialReference() srs.ImportFromProj4(proj4) projection = srs.ExportToWkt() # Get x grid and y grid x_grd, y_grd = self.create_linear_grid(nc_dataset['x'][:], nc_dataset['y'][:], GRID_PX_SIZE) raster_x_size, raster_y_size = x_grd.shape # Define geotransform geotransform = (x_grd.min(), GRID_PX_SIZE, 0.0, y_grd.max(), 0.0, GRID_PX_SIZE * -1) # Define x and y size self._init_from_dataset_params(raster_x_size, raster_y_size, geotransform, projection) # If required timestamp was not specified then extract date from filename and use first time if timestamp is None: timestamp = self.date_from_filename(filename) # Comvert time info from the dataset to the datetime timestamps = num2date(nc_dataset['time'][:].data, nc_dataset['time'].units) # find band id for the required timestamp # Note add 1 because in gdal counting starts from 1 not from 0 src_timestamp_id = np.where(timestamps == timestamp)[0][0] + 1 # Iterate through all subdatasets and bands to the dataset for subdataset in gdal_dataset.GetSubDatasets(): # Get name of subdataset subdataset_name = subdataset[0].split(':')[2] # Check if the subdataset in the accepted 3D vars list if subdataset_name not in self.BAND_NAMES: continue gdal_subdataset = gdal.Open(subdataset[0]) # need to be float for the nan replasement band_data = gdal_subdataset.GetRasterBand(int(src_timestamp_id)).ReadAsArray().astype('float') # remove fill value (replace with nan) fill_value = int(gdal_subdataset.GetMetadata_Dict()['#'.join([subdataset_name, '_FillValue'])]) band_data[band_data == fill_value] = np.nan # Interpolate data on the regular grid band_grid_data = self.band2grid((nc_dataset['x'][:], nc_dataset['y'][:]), band_data, (x_grd, y_grd)) # Create VRT ffor the regridded data band_vrt = VRT.from_array(band_grid_data) # Add VRT to the list of all dataset vrts self.band_vrts[subdataset_name + 'VRT'] = band_vrt # Add band to the dataset src = {'SourceFilename': self.band_vrts[subdataset_name + 'VRT'].filename, 'SourceBand': 1} # Add band specific metadata dst = {'name': subdataset_name} for key in gdal_subdataset.GetMetadata_Dict().keys(): if key.startswith(subdataset_name): clean_metadata_name = key.split('#')[1] dst[clean_metadata_name] = gdal_subdataset.GetMetadata_Dict()[key] # Create band self.create_band(src, dst) self.dataset.FlushCache() # Set GCMD metadata self.dataset.SetMetadataItem('instrument', json.dumps(pti.get_gcmd_instrument('SCR-HF'))) self.dataset.SetMetadataItem('platform', json.dumps(pti.get_gcmd_platform('CODAR SeaSonde'))) self.dataset.SetMetadataItem('Data Center', json.dumps(pti.get_gcmd_provider('NO/MET'))) self.dataset.SetMetadataItem('Entry Title', 'Near-Real Time Surface Ocean Radial Velocity') self.dataset.SetMetadataItem('gcmd_location',json.dumps(pti.get_gcmd_location('NORTH SEA'))) # Set time coverage metadata self.dataset.SetMetadataItem('time_coverage_start', timestamp.isoformat()) self.dataset.SetMetadataItem('time_coverage_end', (timestamp + timedelta(minutes=59, seconds=59)).isoformat()) # Set NetCDF dataset metadata for key, value in gdal_dataset.GetMetadata_Dict().items(): self.dataset.SetMetadataItem(key.split('#')[1], value)
def get_or_create(self, uri, n_points=10, uri_filter_args=None, uri_service_name=FILE_SERVICE_NAME, uri_service_type=LOCAL_FILE_SERVICE, *args, **kwargs): """ Create dataset and corresponding metadata Parameters: ---------- uri : str URI to file or stream openable by Nansat n_points : int Number of border points (default is 10) uri_filter_args : dict Extra DatasetURI filter arguments if several datasets can refer to the same URI uri_service_name : str name of the service which is used ('dapService', 'fileService', 'http' or 'wms') uri_service_type : str type of the service which is used ('OPENDAP', 'local', 'HTTPServer' or 'WMS') Returns: ------- dataset and flag """ if not uri_filter_args: uri_filter_args = {} # Validate uri - this should raise an exception if the uri doesn't point to a valid # file or stream validate_uri(uri) # Several datasets can refer to the same uri (e.g., scatterometers and svp drifters), so we # need to pass uri_filter_args uris = DatasetURI.objects.filter(uri=uri, **uri_filter_args) if len(uris) > 0: return uris[0].dataset, False # Open file with Nansat n = Nansat(nansat_filename(uri), **kwargs) # get metadata from Nansat and get objects from vocabularies n_metadata = n.get_metadata() entry_id = n_metadata.get('entry_id', None) # set compulsory metadata (source) platform, _ = Platform.objects.get_or_create( json.loads(n_metadata['platform'])) instrument, _ = Instrument.objects.get_or_create( json.loads(n_metadata['instrument'])) specs = n_metadata.get('specs', '') source, _ = Source.objects.get_or_create(platform=platform, instrument=instrument, specs=specs) default_char_fields = { # Adding NERSC_ in front of the id violates the string representation of the uuid #'entry_id': lambda: 'NERSC_' + str(uuid.uuid4()), 'entry_id': lambda: str(uuid.uuid4()), 'entry_title': lambda: 'NONE', 'summary': lambda: 'NONE', } # set optional CharField metadata from Nansat or from default_char_fields options = {} try: existing_ds = Dataset.objects.get(entry_id=entry_id) except Dataset.DoesNotExist: existing_ds = None for name in default_char_fields: if name not in n_metadata: warnings.warn('%s is not provided in Nansat metadata!' % name) # prevent overwriting of existing values by defaults if existing_ds: options[name] = existing_ds.__getattribute__(name) else: options[name] = default_char_fields[name]() else: options[name] = n_metadata[name] default_foreign_keys = { 'gcmd_location': { 'model': Location, 'value': pti.get_gcmd_location('SEA SURFACE') }, 'data_center': { 'model': DataCenter, 'value': pti.get_gcmd_provider('NERSC') }, 'ISO_topic_category': { 'model': ISOTopicCategory, 'value': pti.get_iso19115_topic_category('Oceans') }, } # set optional ForeignKey metadata from Nansat or from default_foreign_keys for name in default_foreign_keys: value = default_foreign_keys[name]['value'] model = default_foreign_keys[name]['model'] if name not in n_metadata: warnings.warn('%s is not provided in Nansat metadata!' % name) else: try: value = json.loads(n_metadata[name]) except: warnings.warn( '%s value of %s metadata provided in Nansat is wrong!' % (n_metadata[name], name)) if existing_ds: options[name] = existing_ds.__getattribute__(name) else: options[name], _ = model.objects.get_or_create(value) # Find coverage to set number of points in the geolocation if len(n.vrt.dataset.GetGCPs()) > 0: n.reproject_gcps() geolocation = GeographicLocation.objects.get_or_create( geometry=WKTReader().read(n.get_border_wkt(nPoints=n_points)))[0] # create dataset # - the get_or_create method should use get_or_create here as well, # or its name should be changed - see issue #127 ds, created = Dataset.objects.update_or_create( entry_id=options['entry_id'], defaults={ 'time_coverage_start': n.get_metadata('time_coverage_start'), 'time_coverage_end': n.get_metadata('time_coverage_end'), 'source': source, 'geographic_location': geolocation, 'gcmd_location': options["gcmd_location"], 'ISO_topic_category': options["ISO_topic_category"], "data_center": options["data_center"], 'entry_title': options["entry_title"], 'summary': options["summary"] }) # create parameter all_band_meta = n.bands() for band_id in range(1, len(all_band_meta) + 1): band_meta = all_band_meta[band_id] standard_name = band_meta.get('standard_name', None) short_name = band_meta.get('short_name', None) units = band_meta.get('units', None) if standard_name in ['latitude', 'longitude', None]: continue params = Parameter.objects.filter(standard_name=standard_name) if params.count() > 1 and short_name is not None: params = params.filter(short_name=short_name) if params.count() > 1 and units is not None: params = params.filter(units=units) if params.count() >= 1: ds.parameters.add(params[0]) # create dataset URI DatasetURI.objects.get_or_create(name=uri_service_name, service=uri_service_type, uri=uri, dataset=ds) return ds, created
def get_provider(self, raw_metadata): return pti.get_gcmd_provider('UC-LONDON/CPOM')
def test_get_gcmd_provider(self): item = 'NERSC' self.assertIsInstance(pti.get_gcmd_provider(item), collections.OrderedDict)
def export2netcdf(self, n, ds, history_message=''): if not history_message: history_message = 'Export to netCDF [geospaas sar_doppler version %s]' % os.getenv( 'GEOSPAAS_SAR_DOPPLER_VERSION', 'dev') ii = int(n.get_metadata('subswath')) date_created = datetime.now(timezone.utc) fn = self.nc_name(ds, ii) original = Nansat(n.get_metadata('Originating file'), subswath=ii) metadata = original.get_metadata() def pretty_print_gcmd_keywords(kw): retval = '' value_prev = '' for key, value in kw.items(): if value: if value_prev: retval += ' > ' retval += value value_prev = value return retval # Set global metadata metadata['Conventions'] = metadata['Conventions'] + ', ACDD-1.3' # id - the ID from the database should be registered in the file if it is not already there try: entry_id = n.get_metadata('entry_id') except ValueError: n.set_metadata(key='entry_id', value=ds.entry_id) try: id = n.get_metadata('id') except ValueError: n.set_metadata(key='id', value=ds.entry_id) metadata['date_created'] = date_created.strftime('%Y-%m-%d') metadata['date_created_type'] = 'Created' metadata['date_metadata_modified'] = date_created.strftime('%Y-%m-%d') metadata['processing_level'] = 'Scientific' metadata['creator_role'] = 'Investigator' metadata['creator_name'] = 'Morten Wergeland Hansen' metadata['creator_email'] = '*****@*****.**' metadata['creator_institution'] = pretty_print_gcmd_keywords( pti.get_gcmd_provider('NO/MET')) metadata[ 'project'] = 'Norwegian Space Agency project JOP.06.20.2: Reprocessing and analysis of historical data for future operationalization of Doppler shifts from SAR' metadata['publisher_name'] = 'Morten Wergeland Hansen' metadata['publisher_url'] = 'https://www.met.no/' metadata['publisher_email'] = '*****@*****.**' metadata['references'] = 'https://github.com/mortenwh/openwind' metadata['dataset_production_status'] = 'Complete' # Get image boundary lon, lat = n.get_border() boundary = 'POLYGON ((' for la, lo in list(zip(lat, lon)): boundary += '%.2f %.2f, ' % (la, lo) boundary = boundary[:-2] + '))' # Set bounds as (lat,lon) following ACDD convention and EPSG:4326 metadata['geospatial_bounds'] = boundary metadata['geospatial_bounds_crs'] = 'EPSG:4326' # history try: history = n.get_metadata('history') except ValueError: metadata['history'] = date_created.isoformat( ) + ': ' + history_message else: metadata['history'] = history + '\n' + date_created.isoformat( ) + ': ' + history_message # Set metadata from dict (export2thredds could take it as input..) for key, val in metadata.items(): n.set_metadata(key=key, value=val) # Export data to netcdf logging.info('Exporting %s to %s (subswath %d)' % (n.filename, fn, ii + 1)) n.export(filename=fn) #ww.export2thredds(thredds_fn, mask_name='swathmask', metadata=metadata, no_mask_value=1) # Clean netcdf attributes history = n.get_metadata('history') self.clean_nc_attrs(fn, history) # Add netcdf uri to DatasetURIs ncuri = 'file://localhost' + fn new_uri, created = DatasetURI.objects.get_or_create(uri=ncuri, dataset=ds) connection.close() return new_uri, created
def _get_normalized_attributes(self, dataset_info, *args, **kwargs): """Gets dataset attributes using nansat""" normalized_attributes = {} n_points = int(kwargs.get('n_points', 10)) nansat_options = kwargs.get('nansat_options', {}) url_scheme = urlparse(dataset_info).scheme if not 'http' in url_scheme and not 'ftp' in url_scheme: normalized_attributes['geospaas_service_name'] = FILE_SERVICE_NAME normalized_attributes['geospaas_service'] = LOCAL_FILE_SERVICE elif 'http' in url_scheme and not 'ftp' in url_scheme: normalized_attributes['geospaas_service_name'] = DAP_SERVICE_NAME normalized_attributes['geospaas_service'] = OPENDAP_SERVICE elif 'ftp' in url_scheme: raise ValueError( f"Can't ingest '{dataset_info}': nansat can't open remote ftp files" ) # Open file with Nansat nansat_object = Nansat(nansat_filename(dataset_info), log_level=self.LOGGER.getEffectiveLevel(), **nansat_options) # get metadata from Nansat and get objects from vocabularies n_metadata = nansat_object.get_metadata() # set compulsory metadata (source) normalized_attributes['entry_title'] = n_metadata.get( 'entry_title', 'NONE') normalized_attributes['summary'] = n_metadata.get('summary', 'NONE') normalized_attributes['time_coverage_start'] = dateutil.parser.parse( n_metadata['time_coverage_start']).replace(tzinfo=tzutc()) normalized_attributes['time_coverage_end'] = dateutil.parser.parse( n_metadata['time_coverage_end']).replace(tzinfo=tzutc()) normalized_attributes['platform'] = json.loads(n_metadata['platform']) normalized_attributes['instrument'] = json.loads( n_metadata['instrument']) normalized_attributes['specs'] = n_metadata.get('specs', '') normalized_attributes['entry_id'] = n_metadata.get( 'entry_id', 'NERSC_' + str(uuid.uuid4())) # set optional ForeignKey metadata from Nansat or from defaults normalized_attributes['gcmd_location'] = n_metadata.get( 'gcmd_location', pti.get_gcmd_location('SEA SURFACE')) normalized_attributes['provider'] = pti.get_gcmd_provider( n_metadata.get('provider', 'NERSC')) normalized_attributes['iso_topic_category'] = n_metadata.get( 'ISO_topic_category', pti.get_iso19115_topic_category('Oceans')) # Find coverage to set number of points in the geolocation if nansat_object.vrt.dataset.GetGCPs(): nansat_object.reproject_gcps() normalized_attributes['location_geometry'] = GEOSGeometry( nansat_object.get_border_wkt(n_points=n_points), srid=4326) json_dumped_dataset_parameters = n_metadata.get( 'dataset_parameters', None) if json_dumped_dataset_parameters: json_loads_result = json.loads(json_dumped_dataset_parameters) if isinstance(json_loads_result, list): normalized_attributes['dataset_parameters'] = [ get_cf_or_wkv_standard_name(dataset_param) for dataset_param in json_loads_result ] else: raise TypeError( f"Can't ingest '{dataset_info}': the 'dataset_parameters' section of the " "metadata returned by nansat is not a JSON list") else: normalized_attributes['dataset_parameters'] = [] return normalized_attributes
def get_provider(self, raw_metadata): return pti.get_gcmd_provider('CSA')
def process(self, uri, force=False, *args, **kwargs): fn = 'WIND_'+os.path.basename(uri) if DatasetURI.objects.filter(uri__contains=fn) and not force: wds = Dataset.objects.filter(dataseturi__uri__contains=fn)[0] return wds, False try: w = wind_from_sar_and_arome_forecast(uri) except (TooHighResolutionError, PolarizationError, ObjectDoesNotExist) as e: if type(e)==Dataset.DoesNotExist: warnings.warn(uri+': '+e.args[0]) else: # ObjectDoesNotExist could happen if there is no overlap between SAR and model warnings.warn(e.file + ': ' + e.msg) return None, False metadata = w.get_metadata() # Direct reprojection fails - gdal can't read the bands if we do w.reproject... # Workaround: Export wind to temporary file #tmp_filename = os.path.join(settings.PRODUCTS_ROOT,'TMP_WIND_'+os.path.basename(uri)) fd, tmp_filename = tempfile.mkstemp(suffix='.nc') os.close(fd) # Just in case - see https://www.logilab.org/blogentry/17873 w.export(tmp_filename) # Read temporary file ww = Nansat(tmp_filename) # Reproject lon, lat = ww.get_geolocation_grids() #lon, lat = w.get_geolocation_grids() srs = '+proj=stere +datum=WGS84 +ellps=WGS84 +lat_0=%.2f +lon_0=%.2f +no_defs'%(np.mean(lat),np.mean(lon)) xmin, xmax, ymin, ymax = -haversine(np.mean(lon),np.mean(lat),np.min(lon),np.mean(lat)), \ haversine(np.mean(lon),np.mean(lat),np.max(lon),np.mean(lat)), \ -haversine(np.mean(lon),np.mean(lat),np.mean(lon),np.min(lat)), \ haversine(np.mean(lon),np.mean(lat),np.mean(lon),np.max(lat)) ext = '-te %d %d %d %d -tr 500 500' %(xmin-10000, ymin-10000, xmax+10000, ymax+10000) d = Domain(srs, ext) ww.reproject(d, tps=True) #w.reproject(d, tps=True) # Set global metadata metadata['data_center'] = json.dumps(pti.get_gcmd_provider(kwargs.pop('data_center', ''))) metadata['naming_authority'] = kwargs.pop('naming_authority', '') metadata['project'] = 'SIOS InfraNor' metadata['entry_title'] = 'Wind field from '+os.path.basename(uri) metadata.pop('file_creation_date') metadata['history'] = metadata['history'] + ' ' + timezone.now().isoformat() + \ '. Calculated wind field from NRCS and Arome Arctic forecast wind directions.' metadata.pop('institution') metadata['keywords'] += ', [' for key, value in pti.get_gcmd_science_keyword('U/V WIND COMPONENTS').items(): if value: metadata['keywords'] += value + ', ' metadata['keywords'] += ']' metadata.pop('LINE_SPACING') metadata.pop('PIXEL_SPACING') metadata['summary'] = 'Near surface (10m) wind from Arome Arctic forecast wind and ' + metadata['summary'] metadata['title'] = 'Near surface wind from '+metadata['title'] # Get or create data folder start_time = parse(metadata['time_coverage_start']) yfolder = os.path.join(settings.PRODUCTS_ROOT, '{:04d}'.format(start_time.year)) mfolder = os.path.join(yfolder, '{:02d}'.format(start_time.month)) dfolder = os.path.join(mfolder, '{:02d}'.format(start_time.day)) if not os.path.isdir(yfolder): os.mkdir(yfolder) if not os.path.isdir(mfolder): os.mkdir(mfolder) if not os.path.isdir(dfolder): os.mkdir(dfolder) # Export thredds_fn = os.path.join(dfolder, fn) wind_uri = 'file://localhost' + thredds_fn ww.export2thredds(thredds_fn, mask_name='swathmask', metadata=metadata, no_mask_value=1) wds, cr = super(WindManager, self).get_or_create(wind_uri) os.unlink(tmp_filename) return wds, cr
def get_or_create(self, uri, n_points=10, uri_filter_args=None, *args, **kwargs): ''' Create dataset and corresponding metadata Parameters: ---------- uri : str URI to file or stream openable by Nansat n_points : int Number of border points (default is 10) uri_filter_args : dict Extra DatasetURI filter arguments if several datasets can refer to the same URI Returns: ------- dataset and flag ''' if not uri_filter_args: uri_filter_args = {} # Validate uri - this should raise an exception if the uri doesn't point to a valid # file or stream validate_uri(uri) # Several datasets can refer to the same uri (e.g., scatterometers and svp drifters), so we # need to pass uri_filter_args uris = DatasetURI.objects.filter(uri=uri, **uri_filter_args) if len(uris) > 0: return uris[0].dataset, False # Open file with Nansat n = Nansat(nansat_filename(uri), **kwargs) # get metadata from Nansat and get objects from vocabularies n_metadata = n.get_metadata() # set compulsory metadata (source) platform, _ = Platform.objects.get_or_create( json.loads(n_metadata['platform'])) instrument, _ = Instrument.objects.get_or_create( json.loads(n_metadata['instrument'])) specs = n_metadata.get('specs', '') source, _ = Source.objects.get_or_create(platform=platform, instrument=instrument, specs=specs) default_char_fields = { 'entry_id': lambda: 'NERSC_' + str(uuid.uuid4()), 'entry_title': lambda: 'NONE', 'summary': lambda: 'NONE', } # set optional CharField metadata from Nansat or from default_char_fields options = {} for name in default_char_fields: if name not in n_metadata: warnings.warn('%s is not provided in Nansat metadata!' % name) options[name] = default_char_fields[name]() else: options[name] = n_metadata[name] default_foreign_keys = { 'gcmd_location': { 'model': Location, 'value': pti.get_gcmd_location('SEA SURFACE') }, 'data_center': { 'model': DataCenter, 'value': pti.get_gcmd_provider('NERSC') }, 'ISO_topic_category': { 'model': ISOTopicCategory, 'value': pti.get_iso19115_topic_category('Oceans') }, } # set optional ForeignKey metadata from Nansat or from default_foreign_keys for name in default_foreign_keys: value = default_foreign_keys[name]['value'] model = default_foreign_keys[name]['model'] if name not in n_metadata: warnings.warn('%s is not provided in Nansat metadata!' % name) else: try: value = json.loads(n_metadata[name]) except: warnings.warn( '%s value of %s metadata provided in Nansat is wrong!' % (n_metadata[name], name)) options[name], _ = model.objects.get_or_create(value) # Find coverage to set number of points in the geolocation if len(n.vrt.dataset.GetGCPs()) > 0: n.reproject_gcps() geolocation = GeographicLocation.objects.get_or_create( geometry=WKTReader().read(n.get_border_wkt(nPoints=n_points)))[0] # create dataset ds, created = Dataset.objects.get_or_create( time_coverage_start=n.get_metadata('time_coverage_start'), time_coverage_end=n.get_metadata('time_coverage_end'), source=source, geographic_location=geolocation, **options) # create dataset URI ds_uri, _ = DatasetURI.objects.get_or_create(uri=uri, dataset=ds) return ds, created