def nc_name(self, ds, ii): # Filename of exported netcdf fn = os.path.join( product_path( self.module_name(), nansat_filename( ds.dataseturi_set.get(uri__endswith='.gsar').uri)), os.path.basename( nansat_filename( ds.dataseturi_set.get( uri__endswith='.gsar').uri)).split('.')[0] + 'subswath%s.nc' % ii) connection.close() return fn
def get_merged_swaths(self, ds, **kwargs): """Get merged swaths """ try: uri = ds.dataseturi_set.get(uri__contains='merged') except DatasetURI.DoesNotExist: n = Nansat( nansat_filename( ds.dataseturi_set.get(uri__contains='subswath1').uri)) if not n.has_band('Ur'): # Process dataset ds, processed = self.process(ds, **kwargs) else: m = self.create_merged_swaths(ds) uri = ds.dataseturi_set.get(uri__contains='merged') connection.close() m = Nansat(nansat_filename(uri.uri)) return m
def get_or_create(self, uri, force): # Validate uri - this should raise an exception if the uri doesn't # point to a valid file or stream validate_uri(uri) # Several datasets can refer to the same uri (e.g., scatterometers and svp drifters), so we # need to pass uri_filter_args uris = DatasetURI.objects.filter(uri=uri) # If the ingested uri is already in the database and not <force> ingestion then stop if uris.exists() and not force: return uris[0].dataset, False elif uris.exists() and force: uris[0].dataset.delete() # Open file with Nansat n = Nansat(nansat_filename(uri)) # get metadata from Nansat and get objects from vocabularies n_metadata = n.get_metadata() # set compulsory metadata (source) platform, _ = Platform.objects.get_or_create( json.loads(n_metadata['platform'])) instrument, _ = Instrument.objects.get_or_create( json.loads(n_metadata['instrument'])) specs = n_metadata.get('specs', '') source, _ = Source.objects.get_or_create(platform=platform, instrument=instrument, specs=specs) footprint = Polygon(list(zip(*n.get_border()))) geolocation = GeographicLocation.objects.get_or_create( geometry=footprint)[0] data_center = DataCenter.objects.get_or_create( json.loads(n_metadata['Data Center']))[0] iso_category = ISOTopicCategory.objects.get_or_create( pti.get_iso19115_topic_category('Oceans'))[0] location = Location.objects.get_or_create( json.loads(n_metadata['gcmd_location']))[0] # create dataset ds, created = Dataset.objects.get_or_create( time_coverage_start=make_aware(n.time_coverage_start), time_coverage_end=make_aware( n.time_coverage_start + timedelta(hours=23, minutes=59, seconds=59)), source=source, geographic_location=geolocation, ISO_topic_category=iso_category, data_center=data_center, summary='', gcmd_location=location, access_constraints='', entry_id=lambda: 'NERSC_' + str(uuid.uuid4())) ds_uri, _ = DatasetURI.objects.get_or_create( name=FILE_SERVICE_NAME, service=LOCAL_FILE_SERVICE, uri=uri, dataset=ds) return ds, created
def get_trajectory(self, start_time, end_time): if not type(start_time) == datetime.datetime: raise ValueError('Given times must be of type datetime.datetime') if not type(end_time) == datetime.datetime: raise ValueError('Given times must be of type datetime.datetime') # Could also take the trajectory directly from the geometry given 0.25 # day sampling frequency... m = re.search('^.*drifter\s{1}no\.\s{1}(\d+)$', self.entry_title) id = int(m.group(1)) uu = self.dataseturi_set.get(uri__contains='buoydata') fn = nansat_filename(uu.uri) # Get all drifter ID's ids = np.loadtxt(fn, usecols=(0, )) # Get indices of current drifter ind = np.where(ids == id) # Get year, month, day and hour of each sample year = np.loadtxt(fn, usecols=(3, ))[ind] month = np.loadtxt(fn, usecols=(1, ))[ind] day = np.loadtxt(fn, usecols=(2, ))[ind] hour = np.remainder(day, np.floor(day)) * 24 # Get longitudes and latitudes lat = np.loadtxt(fn, usecols=(4, ))[ind] lon = np.loadtxt(fn, usecols=(5, ))[ind] # Pandas DataFrame df = pd.DataFrame({ 'year': year, 'month': month, 'day': np.floor(day), 'hour': hour }) # Create datetime64 array datetimes = pd.to_datetime(df) # Pick indices of required trajectory t0_diff = np.min(np.abs(datetimes - start_time.replace(tzinfo=None))) t1_diff = np.min(np.abs(datetimes - end_time.replace(tzinfo=None))) indt0 = np.argmin(np.abs(datetimes - start_time.replace(tzinfo=None))) indt1 = np.argmin(np.abs(datetimes - end_time.replace(tzinfo=None))) # Return geometry of required trajectory return LineString(zip(lon[indt0:indt1], lat[indt0:indt1]))
def add_polarization(apps, schema_editor): ds_model = apps.get_model('sar_doppler', 'dataset') extra_model = apps.get_model('sar_doppler', 'sardopplerextrametadata') for ds in ds_model.objects.filter(dataseturi__uri__endswith='.gsar'): if ds.sardopplerextrametadata_set.all(): # This should only happen if the migration is interrupted # No point in adding polarization if it was already added... continue fn = nansat_filename(ds.dataseturi_set.get(uri__endswith='.gsar').uri) if not os.path.isfile(fn): # a missing file will break the migration # remove the dataset in case the file doesn't exist ds.delete() continue n = Nansat(fn) # Store the polarization and associate the dataset extra = extra_model(dataset=ds, polarization=n.get_metadata('polarization')) extra.save() ds.sardopplerextrametadata_set.add(extra)
def get_or_create(self, uri, geometry, *args, **kwargs): filename = nansat_filename(uri) data = pd.read_csv(filename, header=0) data_center, iso, source = self.set_metadata() for i in range(len(data)): # Check if a buoy location is inside of a geometry (domain) buoy_location = Point(data[' LONGITUDE'][i], data[' LATITUDE'][i]) # TODO: All <continue> should appear in/affect an output if isinstance(geometry, Polygon): if not geometry.contains(buoy_location): continue else: continue buoy_time = datetime.strptime(data['Data Date (UTC)'][i], '%Y-%m-%d %H:%M:%S') if kwargs['end'] < buoy_time or kwargs['start'] > buoy_time: continue buoy_time = self.TIMEZONE.localize(buoy_time) geoloc, geo_cr = GeographicLocation.objects.get_or_create( geometry=buoy_location) ds, ds_cr = Dataset.objects.get_or_create( entry_title='Lofoten experiment 2016 ', ISO_topic_category=iso, data_center=data_center, summary='', time_coverage_start=buoy_time, time_coverage_end=buoy_time, source=source, geographic_location=geoloc) if ds_cr: data_uri, duc = DatasetURI.objects.get_or_create(uri=uri, dataset=ds)
def get_or_create(self, uri, n_points=10, uri_filter_args=None, *args, **kwargs): ''' Create dataset and corresponding metadata Parameters: ---------- uri : str URI to file or stream openable by Nansat n_points : int Number of border points (default is 10) uri_filter_args : dict Extra DatasetURI filter arguments if several datasets can refer to the same URI Returns: ------- dataset and flag ''' if not uri_filter_args: uri_filter_args = {} # Validate uri - this should raise an exception if the uri doesn't point to a valid # file or stream validate_uri(uri) # Several datasets can refer to the same uri (e.g., scatterometers and svp drifters), so we # need to pass uri_filter_args uris = DatasetURI.objects.filter(uri=uri, **uri_filter_args) if len(uris) > 0: return uris[0].dataset, False # Open file with Nansat n = Nansat(nansat_filename(uri), **kwargs) # get metadata from Nansat and get objects from vocabularies n_metadata = n.get_metadata() # set compulsory metadata (source) platform, _ = Platform.objects.get_or_create( json.loads(n_metadata['platform'])) instrument, _ = Instrument.objects.get_or_create( json.loads(n_metadata['instrument'])) specs = n_metadata.get('specs', '') source, _ = Source.objects.get_or_create(platform=platform, instrument=instrument, specs=specs) default_char_fields = { 'entry_id': lambda: 'NERSC_' + str(uuid.uuid4()), 'entry_title': lambda: 'NONE', 'summary': lambda: 'NONE', } # set optional CharField metadata from Nansat or from default_char_fields options = {} for name in default_char_fields: if name not in n_metadata: warnings.warn('%s is not provided in Nansat metadata!' % name) options[name] = default_char_fields[name]() else: options[name] = n_metadata[name] default_foreign_keys = { 'gcmd_location': { 'model': Location, 'value': pti.get_gcmd_location('SEA SURFACE') }, 'data_center': { 'model': DataCenter, 'value': pti.get_gcmd_provider('NERSC') }, 'ISO_topic_category': { 'model': ISOTopicCategory, 'value': pti.get_iso19115_topic_category('Oceans') }, } # set optional ForeignKey metadata from Nansat or from default_foreign_keys for name in default_foreign_keys: value = default_foreign_keys[name]['value'] model = default_foreign_keys[name]['model'] if name not in n_metadata: warnings.warn('%s is not provided in Nansat metadata!' % name) else: try: value = json.loads(n_metadata[name]) except: warnings.warn( '%s value of %s metadata provided in Nansat is wrong!' % (n_metadata[name], name)) options[name], _ = model.objects.get_or_create(value) # Find coverage to set number of points in the geolocation if len(n.vrt.dataset.GetGCPs()) > 0: n.reproject_gcps() geolocation = GeographicLocation.objects.get_or_create( geometry=WKTReader().read(n.get_border_wkt(nPoints=n_points)))[0] # create dataset ds, created = Dataset.objects.get_or_create( time_coverage_start=n.get_metadata('time_coverage_start'), time_coverage_end=n.get_metadata('time_coverage_end'), source=source, geographic_location=geolocation, **options) # create dataset URI ds_uri, _ = DatasetURI.objects.get_or_create(uri=uri, dataset=ds) return ds, created
def get_or_create(self, uri, n_points=10, uri_filter_args=None, uri_service_name=FILE_SERVICE_NAME, uri_service_type=LOCAL_FILE_SERVICE, *args, **kwargs): """ Create dataset and corresponding metadata Parameters: ---------- uri : str URI to file or stream openable by Nansat n_points : int Number of border points (default is 10) uri_filter_args : dict Extra DatasetURI filter arguments if several datasets can refer to the same URI uri_service_name : str name of the service which is used ('dapService', 'fileService', 'http' or 'wms') uri_service_type : str type of the service which is used ('OPENDAP', 'local', 'HTTPServer' or 'WMS') Returns: ------- dataset and flag """ if not uri_filter_args: uri_filter_args = {} # Validate uri - this should raise an exception if the uri doesn't point to a valid # file or stream validate_uri(uri) # Several datasets can refer to the same uri (e.g., scatterometers and svp drifters), so we # need to pass uri_filter_args uris = DatasetURI.objects.filter(uri=uri, **uri_filter_args) if len(uris) > 0: return uris[0].dataset, False # Open file with Nansat n = Nansat(nansat_filename(uri), **kwargs) # get metadata from Nansat and get objects from vocabularies n_metadata = n.get_metadata() entry_id = n_metadata.get('entry_id', None) # set compulsory metadata (source) platform, _ = Platform.objects.get_or_create( json.loads(n_metadata['platform'])) instrument, _ = Instrument.objects.get_or_create( json.loads(n_metadata['instrument'])) specs = n_metadata.get('specs', '') source, _ = Source.objects.get_or_create(platform=platform, instrument=instrument, specs=specs) default_char_fields = { # Adding NERSC_ in front of the id violates the string representation of the uuid #'entry_id': lambda: 'NERSC_' + str(uuid.uuid4()), 'entry_id': lambda: str(uuid.uuid4()), 'entry_title': lambda: 'NONE', 'summary': lambda: 'NONE', } # set optional CharField metadata from Nansat or from default_char_fields options = {} try: existing_ds = Dataset.objects.get(entry_id=entry_id) except Dataset.DoesNotExist: existing_ds = None for name in default_char_fields: if name not in n_metadata: warnings.warn('%s is not provided in Nansat metadata!' % name) # prevent overwriting of existing values by defaults if existing_ds: options[name] = existing_ds.__getattribute__(name) else: options[name] = default_char_fields[name]() else: options[name] = n_metadata[name] default_foreign_keys = { 'gcmd_location': { 'model': Location, 'value': pti.get_gcmd_location('SEA SURFACE') }, 'data_center': { 'model': DataCenter, 'value': pti.get_gcmd_provider('NERSC') }, 'ISO_topic_category': { 'model': ISOTopicCategory, 'value': pti.get_iso19115_topic_category('Oceans') }, } # set optional ForeignKey metadata from Nansat or from default_foreign_keys for name in default_foreign_keys: value = default_foreign_keys[name]['value'] model = default_foreign_keys[name]['model'] if name not in n_metadata: warnings.warn('%s is not provided in Nansat metadata!' % name) else: try: value = json.loads(n_metadata[name]) except: warnings.warn( '%s value of %s metadata provided in Nansat is wrong!' % (n_metadata[name], name)) if existing_ds: options[name] = existing_ds.__getattribute__(name) else: options[name], _ = model.objects.get_or_create(value) # Find coverage to set number of points in the geolocation if len(n.vrt.dataset.GetGCPs()) > 0: n.reproject_gcps() geolocation = GeographicLocation.objects.get_or_create( geometry=WKTReader().read(n.get_border_wkt(nPoints=n_points)))[0] # create dataset # - the get_or_create method should use get_or_create here as well, # or its name should be changed - see issue #127 ds, created = Dataset.objects.update_or_create( entry_id=options['entry_id'], defaults={ 'time_coverage_start': n.get_metadata('time_coverage_start'), 'time_coverage_end': n.get_metadata('time_coverage_end'), 'source': source, 'geographic_location': geolocation, 'gcmd_location': options["gcmd_location"], 'ISO_topic_category': options["ISO_topic_category"], "data_center": options["data_center"], 'entry_title': options["entry_title"], 'summary': options["summary"] }) # create parameter all_band_meta = n.bands() for band_id in range(1, len(all_band_meta) + 1): band_meta = all_band_meta[band_id] standard_name = band_meta.get('standard_name', None) short_name = band_meta.get('short_name', None) units = band_meta.get('units', None) if standard_name in ['latitude', 'longitude', None]: continue params = Parameter.objects.filter(standard_name=standard_name) if params.count() > 1 and short_name is not None: params = params.filter(short_name=short_name) if params.count() > 1 and units is not None: params = params.filter(units=units) if params.count() >= 1: ds.parameters.add(params[0]) # create dataset URI DatasetURI.objects.get_or_create(name=uri_service_name, service=uri_service_type, uri=uri, dataset=ds) return ds, created
def add_svp_drifters(self, uri_metadata, uri_data, time_coverage_start=None, time_coverage_end=None, maxnum=None, minlat=-90, maxlat=90, minlon=-180, maxlon=180): ''' Create all datasets from given file and add corresponding metadata Parameters: ---------- uri_data : str URI to file uri_metadata : str URI to metadata file time_coverage_start : timezone.datetime object Optional start time for ingestion time_coverage_end : timezone.datetime object Optional end time for ingestion Returns: ------- count : Number of ingested buoy datasets ''' # set metadata pp = Platform.objects.get(short_name='BUOYS') ii = Instrument.objects.get(short_name='DRIFTING BUOYS') src = Source.objects.get_or_create(platform=pp, instrument=ii)[0] dc = DataCenter.objects.get(short_name='DOC/NOAA/OAR/AOML') iso = ISOTopicCategory.objects.get(name='Oceans') #'ID WMC_id experimentNumber BuoyType deploymentDate deploymetTime deplat deplon endDate endTime endlat endlon drogueLostDate drogueLostTime deathReason' #'11846540 4400770 2222 SVPB 2012/07/17 10:00 59.61 317.61 2015/11/29 15:00 57.66 352.24 2012/11/11 04:04 1\n' # Death reasons: 0=buoy still alive, 1=buoy ran aground, 2=picked up by # vessel, 3=stop transmitting, 4=sporadic transmissions, 5=bad # batteries, 6=inactive status ## Get and loop drifter identification numbers #id = np.loadtxt(metafile,usecols=(0,)) #buoyType = np.loadtxt(metafile, usecols=(3,)) ## load drifter deployment date #dep_date = np.loadtxt(metafile,usecols=(4,), dtype='str') ## load drifter deployment time #dep_time = np.loadtxt(metafile,usecols=(5,), dtype='str') metafile = nansat_filename(uri_metadata) datafile = nansat_filename(uri_data) print 'Reading large files ...' names = [ 'id', 'WMC_id', 'expNum', 'buoyType', 'depDate', 'depTime', 'depLat', 'depLon', 'endDate', 'endTime', 'endLat', 'endLon', 'drogueLostDate', 'drogueLostTime', 'deathReason' ] metadata = pd.read_csv(metafile, delim_whitespace=True, header=None, names=names, usecols=[ 'id', 'buoyType', 'depDate', 'depTime', 'endDate', 'endTime' ], parse_dates={ 'depDateTime': ['depDate', 'depTime'], 'endDateTime': ['endDate', 'endTime'] }).to_records() data = pd.read_csv( datafile, header=None, delim_whitespace=True, usecols=[0, 1, 2, 3, 4, 5], names=['id', 'month', 'day', 'year', 'latitude', 'longitude'], ).to_records() longitude = np.mod(data['longitude'] + 180, 360) - 180. hour = np.remainder(data['day'], np.floor(data['day'])) * 24 df = pd.DataFrame({ 'year': data['year'], 'month': data['month'], 'day': data['day'], 'hour': hour }) dates = pd.to_datetime(df).as_matrix().astype('<M8[h]') print 'OK!' # set time_coverage_start/end as np.datetime64 if time_coverage_start is None: time_coverage_start = metadata['depDateTime'].min() else: time_coverage_start = np.datetime64(time_coverage_start) if time_coverage_end is None: time_coverage_end = metadata['endDateTime'].max() else: time_coverage_end = np.datetime64(time_coverage_end) # select drifters matching given time period, i.e. which are # NOT taken only before or only after the given period ids = metadata['id'][~( (metadata['endDateTime'] < time_coverage_start) + (metadata['depDateTime'] > time_coverage_end))] cnt = 0 for i, drifter_id in enumerate(ids[:maxnum]): buoyType = metadata['buoyType'][metadata['id'] == drifter_id][0] # find all valid drifter records for given period # Longitudes are shifted from range [0,360] to range [-180,180] # degrees gpi = ((data['id'] == drifter_id) * (longitude >= minlon) * (longitude <= maxlon) * (data['latitude'] >= minlat) * (data['latitude'] <= maxlat) * (dates >= time_coverage_start) * (dates <= time_coverage_end)) if len(gpi[gpi]) < 2: continue chunk_dates = np.arange(dates[gpi][0], dates[gpi][-1], CHUNK_DURATION * 24) for j, chunk_date in enumerate(chunk_dates): print 'Add drifter #%d (%d/%d) on %s (%d/%d)' % ( drifter_id, i, len(ids), str(chunk_date), j, len(chunk_dates)) chunk_gpi = ((dates[gpi] >= chunk_date) * (dates[gpi] < (chunk_date + CHUNK_DURATION * 24))) if len(chunk_gpi[chunk_gpi]) < 2: continue chunk_lon = longitude[gpi][chunk_gpi] chunk_lat = data['latitude'][gpi][chunk_gpi] geometry = LineString((zip(chunk_lon, chunk_lat))) geoloc, geo_cr = GeographicLocation.objects.get_or_create( geometry=geometry) if not geo_cr: continue ds, ds_cr = Dataset.objects.get_or_create( entry_title='%s drifter no. %d' % (buoyType, drifter_id), ISO_topic_category=iso, data_center=dc, summary='', time_coverage_start=chunk_date.astype(datetime.datetime), time_coverage_end=(chunk_date + CHUNK_DURATION * 24).astype( datetime.datetime), source=src, geographic_location=geoloc) if ds_cr: cnt += 1 meta_uri, muc = DatasetURI.objects.get_or_create( uri=uri_metadata, dataset=ds) data_uri, duc = DatasetURI.objects.get_or_create( uri=uri_data, dataset=ds) return cnt
def _get_normalized_attributes(self, dataset_info, *args, **kwargs): """Gets dataset attributes using nansat""" normalized_attributes = {} n_points = int(kwargs.get('n_points', 10)) nansat_options = kwargs.get('nansat_options', {}) url_scheme = urlparse(dataset_info).scheme if not 'http' in url_scheme and not 'ftp' in url_scheme: normalized_attributes['geospaas_service_name'] = FILE_SERVICE_NAME normalized_attributes['geospaas_service'] = LOCAL_FILE_SERVICE elif 'http' in url_scheme and not 'ftp' in url_scheme: normalized_attributes['geospaas_service_name'] = DAP_SERVICE_NAME normalized_attributes['geospaas_service'] = OPENDAP_SERVICE elif 'ftp' in url_scheme: raise ValueError( f"Can't ingest '{dataset_info}': nansat can't open remote ftp files" ) # Open file with Nansat nansat_object = Nansat(nansat_filename(dataset_info), log_level=self.LOGGER.getEffectiveLevel(), **nansat_options) # get metadata from Nansat and get objects from vocabularies n_metadata = nansat_object.get_metadata() # set compulsory metadata (source) normalized_attributes['entry_title'] = n_metadata.get( 'entry_title', 'NONE') normalized_attributes['summary'] = n_metadata.get('summary', 'NONE') normalized_attributes['time_coverage_start'] = dateutil.parser.parse( n_metadata['time_coverage_start']).replace(tzinfo=tzutc()) normalized_attributes['time_coverage_end'] = dateutil.parser.parse( n_metadata['time_coverage_end']).replace(tzinfo=tzutc()) normalized_attributes['platform'] = json.loads(n_metadata['platform']) normalized_attributes['instrument'] = json.loads( n_metadata['instrument']) normalized_attributes['specs'] = n_metadata.get('specs', '') normalized_attributes['entry_id'] = n_metadata.get( 'entry_id', 'NERSC_' + str(uuid.uuid4())) # set optional ForeignKey metadata from Nansat or from defaults normalized_attributes['gcmd_location'] = n_metadata.get( 'gcmd_location', pti.get_gcmd_location('SEA SURFACE')) normalized_attributes['provider'] = pti.get_gcmd_provider( n_metadata.get('provider', 'NERSC')) normalized_attributes['iso_topic_category'] = n_metadata.get( 'ISO_topic_category', pti.get_iso19115_topic_category('Oceans')) # Find coverage to set number of points in the geolocation if nansat_object.vrt.dataset.GetGCPs(): nansat_object.reproject_gcps() normalized_attributes['location_geometry'] = GEOSGeometry( nansat_object.get_border_wkt(n_points=n_points), srid=4326) json_dumped_dataset_parameters = n_metadata.get( 'dataset_parameters', None) if json_dumped_dataset_parameters: json_loads_result = json.loads(json_dumped_dataset_parameters) if isinstance(json_loads_result, list): normalized_attributes['dataset_parameters'] = [ get_cf_or_wkv_standard_name(dataset_param) for dataset_param in json_loads_result ] else: raise TypeError( f"Can't ingest '{dataset_info}': the 'dataset_parameters' section of the " "metadata returned by nansat is not a JSON list") else: normalized_attributes['dataset_parameters'] = [] return normalized_attributes
def get_or_create(self, uri, *args, **kwargs): """ Ingest gsar file to geo-spaas db """ ds, created = super(DatasetManager, self).get_or_create(uri, *args, **kwargs) connection.close() # TODO: Check if the following is necessary if not type(ds) == Dataset: return ds, False fn = nansat_filename(uri) n = Nansat(fn, subswath=0) # set Dataset entry_title ds.entry_title = n.get_metadata('title') ds.save() if created: from sar_doppler.models import SARDopplerExtraMetadata # Store the polarization and associate the dataset extra, _ = SARDopplerExtraMetadata.objects.get_or_create( dataset=ds, polarization=n.get_metadata('polarization')) if not _: raise ValueError( 'Created new dataset but could not create instance of ExtraMetadata' ) ds.sardopplerextrametadata_set.add(extra) connection.close() gg = WKTReader().read(n.get_border_wkt()) #lon, lat = n.get_border() #ind_near_range = 0 #ind_far_range = int(lon.size/4) #import pyproj #geod = pyproj.Geod(ellps='WGS84') #angle1,angle2,img_width = geod.inv(lon[ind_near_range], lat[ind_near_range], # lon[ind_far_range], lat[ind_far_range]) # If the area of the dataset geometry is larger than the area of the subswath border, it means that the dataset # has already been created (the area should be the total area of all subswaths) if np.floor(ds.geographic_location.geometry.area) > np.round(gg.area): return ds, False swath_data = {} lon = {} lat = {} astep = {} rstep = {} az_left_lon = {} ra_upper_lon = {} az_right_lon = {} ra_lower_lon = {} az_left_lat = {} ra_upper_lat = {} az_right_lat = {} ra_lower_lat = {} num_border_points = 10 border = 'POLYGON((' for i in range(self.N_SUBSWATHS): # Read subswaths swath_data[i] = Nansat(fn, subswath=i) lon[i], lat[i] = swath_data[i].get_geolocation_grids() astep[i] = int( max(1, (lon[i].shape[0] / 2 * 2 - 1) / num_border_points)) rstep[i] = int( max(1, (lon[i].shape[1] / 2 * 2 - 1) / num_border_points)) az_left_lon[i] = lon[i][0:-1:astep[i], 0] az_left_lat[i] = lat[i][0:-1:astep[i], 0] az_right_lon[i] = lon[i][0:-1:astep[i], -1] az_right_lat[i] = lat[i][0:-1:astep[i], -1] ra_upper_lon[i] = lon[i][-1, 0:-1:rstep[i]] ra_upper_lat[i] = lat[i][-1, 0:-1:rstep[i]] ra_lower_lon[i] = lon[i][0, 0:-1:rstep[i]] ra_lower_lat[i] = lat[i][0, 0:-1:rstep[i]] lons = np.concatenate( (az_left_lon[0], ra_upper_lon[0], ra_upper_lon[1], ra_upper_lon[2], ra_upper_lon[3], ra_upper_lon[4], np.flipud(az_right_lon[4]), np.flipud(ra_lower_lon[4]), np.flipud(ra_lower_lon[3]), np.flipud(ra_lower_lon[2]), np.flipud(ra_lower_lon[1]), np.flipud(ra_lower_lon[0]))).round(decimals=3) # apply 180 degree correction to longitude - code copied from # get_border_wkt... # TODO: simplify using np.mod? for ilon, llo in enumerate(lons): lons[ilon] = copysign( acos(cos(llo * pi / 180.)) / pi * 180, sin(llo * pi / 180.)) lats = np.concatenate( (az_left_lat[0], ra_upper_lat[0], ra_upper_lat[1], ra_upper_lat[2], ra_upper_lat[3], ra_upper_lat[4], np.flipud(az_right_lat[4]), np.flipud(ra_lower_lat[4]), np.flipud(ra_lower_lat[3]), np.flipud(ra_lower_lat[2]), np.flipud(ra_lower_lat[1]), np.flipud(ra_lower_lat[0]))).round(decimals=3) poly_border = ','.join( str(llo) + ' ' + str(lla) for llo, lla in zip(lons, lats)) wkt = 'POLYGON((%s))' % poly_border new_geometry = WKTReader().read(wkt) # Get or create new geolocation of dataset # Returns False if it is the same as an already created one (this may happen when a lot of data is processed) ds.geographic_location, cr = GeographicLocation.objects.get_or_create( geometry=new_geometry) connection.close() return ds, True
def create_merged_swaths(self, ds, EPSG=4326, **kwargs): """Merge swaths, add dataseturi, and return Nansat object. EPSG options: - 4326: WGS 84 / longlat - 3995: WGS 84 / Arctic Polar Stereographic """ nn = {} nn[0] = Doppler( nansat_filename( ds.dataseturi_set.get(uri__endswith='%d.nc' % 0).uri)) lon0, lat0 = nn[0].get_geolocation_grids() nn[1] = Doppler( nansat_filename( ds.dataseturi_set.get(uri__endswith='%d.nc' % 1).uri)) lon1, lat1 = nn[1].get_geolocation_grids() nn[2] = Doppler( nansat_filename( ds.dataseturi_set.get(uri__endswith='%d.nc' % 2).uri)) lon2, lat2 = nn[2].get_geolocation_grids() nn[3] = Doppler( nansat_filename( ds.dataseturi_set.get(uri__endswith='%d.nc' % 3).uri)) lon3, lat3 = nn[3].get_geolocation_grids() nn[4] = Doppler( nansat_filename( ds.dataseturi_set.get(uri__endswith='%d.nc' % 4).uri)) lon4, lat4 = nn[4].get_geolocation_grids() connection.close() dlon = np.mean([ np.abs(np.mean(np.gradient(lon0, axis=1))), np.abs(np.mean(np.gradient(lon1, axis=1))), np.abs(np.mean(np.gradient(lon2, axis=1))), np.abs(np.mean(np.gradient(lon3, axis=1))), np.abs(np.mean(np.gradient(lon4, axis=1))) ]) nx = len( np.arange( np.array([ lon0.min(), lon1.min(), lon2.min(), lon3.min(), lon4.min() ]).min(), np.array([ lon0.max(), lon1.max(), lon2.max(), lon3.max(), lon4.max() ]).max(), dlon)) dlat = np.mean([ np.abs(np.mean(np.gradient(lat0, axis=0))), np.abs(np.mean(np.gradient(lat1, axis=0))), np.abs(np.mean(np.gradient(lat2, axis=0))), np.abs(np.mean(np.gradient(lat3, axis=0))), np.abs(np.mean(np.gradient(lat4, axis=0))) ]) ny = len( np.arange( np.array([ lat0.min(), lat1.min(), lat2.min(), lat3.min(), lat4.min() ]).min(), np.array([ lat0.max(), lat1.max(), lat2.max(), lat3.max(), lat4.max() ]).max(), dlat)) if ny is None: ny = np.array([ nn[0].shape()[0], nn[1].shape()[0], nn[2].shape()[0], nn[3].shape()[0], nn[4].shape()[0] ]).max() ## DETTE VIRKER IKKE.. #sensor_view = np.sort( # np.append(np.append(np.append(np.append( # nn[0]['sensor_view'][0,:], # nn[1]['sensor_view'][0,:]), # nn[2]['sensor_view'][0,:]), # nn[3]['sensor_view'][0,:]), # nn[4]['sensor_view'][0,:])) #nx = sensor_view.size #x = np.arange(nx) #def func(x, a, b, c, d): # return a*x**3+b*x**2+c*x+d #def linear_func(x, a, b): # return a*x + b #azimuth_time = np.sort( # np.append(np.append(np.append(np.append( # nn[0].get_azimuth_time(), # nn[1].get_azimuth_time()), # nn[2].get_azimuth_time()), # nn[3].get_azimuth_time()), # nn[4].get_azimuth_time())) #dt = azimuth_time.max() - azimuth_time[0] #tt = np.arange(0, dt, dt/ny) #tt = np.append(np.array([-dt/ny], dtype='<m8[us]'), tt) #tt = np.append(tt, tt[-1]+np.array([dt/ny, 2*dt/ny], dtype='<m8[us]')) #ny = len(tt) ## AZIMUTH_TIME #azimuth_time = (np.datetime64(azimuth_time[0])+tt).astype(datetime) #popt, pcov = curve_fit(func, x, sensor_view) ## SENSOR VIEW ANGLE #alpha = np.ones((ny, sensor_view.size))*np.deg2rad(func(x, *popt)) #range_time = np.sort( # np.append(np.append(np.append(np.append( # nn[0].get_range_time(), # nn[1].get_range_time()), # nn[2].get_range_time()), # nn[3].get_range_time()), # nn[4].get_range_time())) #popt, pcov = curve_fit(linear_func, x, range_time) ## RANGE_TIME #range_time = linear_func(x, *popt) #ecefPos, ecefVel = Doppler.orbital_state_vectors(azimuth_time) #eciPos, eciVel = ecef2eci(ecefPos, ecefVel, azimuth_time) ## Get satellite hour angle #satHourAng = np.deg2rad(Doppler.satellite_hour_angle(azimuth_time, ecefPos, ecefVel)) ## Get attitude from the Envisat yaw steering law #psi, gamma, phi = np.deg2rad(Doppler.orbital_attitude_vectors(azimuth_time, satHourAng)) #U1, AX1, S1 = Doppler.step_one_calculations(alpha, psi, gamma, phi, eciPos) #S2, U2, AX2 = Doppler.step_two_calculations(satHourAng, S1, U1, AX1) #S3, U3, AX3 = Doppler.step_three_a_calculations(eciPos, eciVel, S2, U2, AX2) #U3g = Doppler.step_three_b_calculations(S3, U3, AX3) #P3, U3g, lookAng = Doppler.step_four_calculations(S3, U3g, AX3, range_time) #dcm = dcmeci2ecef(azimuth_time, 'IAU-2000/2006') #lat = np.zeros((ny, nx)) #lon = np.zeros((ny, nx)) #alt = np.zeros((ny, nx)) #for i in range(P3.shape[1]): # ecefPos = np.matmul(dcm[0], P3[:,i,:,0, np.newaxis]) # lla = ecef2lla(ecefPos) # lat[:,i] = lla[:,0] # lon[:,i] = lla[:,1] # alt[:,i] = lla[:,2] #lon = lon.round(decimals=5) #lat = lat.round(decimals=5) # DETTE VIRKER: lonmin = np.array( [lon0.min(), lon1.min(), lon2.min(), lon3.min(), lon4.min()]).min() lonmax = np.array( [lon0.max(), lon1.max(), lon2.max(), lon3.max(), lon4.max()]).max() latmin = np.array( [lat0.min(), lat1.min(), lat2.min(), lat3.min(), lat4.min()]).min() latmax = np.array( [lat0.max(), lat1.max(), lat2.max(), lat3.max(), lat4.max()]).max() if nx is None: nx = nn[0].shape()[1] + nn[1].shape()[1] + nn[2].shape()[1] + nn[3].shape()[1] + \ nn[4].shape()[1] # prepare geospatial grid merged = Nansat.from_domain( Domain( NSR(EPSG), '-lle %f %f %f %f -ts %d %d' % (lonmin, latmin, lonmax, latmax, nx, ny))) ## DETTE VIRKER IKKE.. #merged = Nansat.from_domain(Domain.from_lonlat(lon, lat, add_gcps=False)) #merged.add_band(array = np.rad2deg(alpha), parameters={'wkv': 'sensor_view'}) dfdg = np.ones((self.N_SUBSWATHS)) * 5 # Hz (5 Hz a priori) for i in range(self.N_SUBSWATHS): dfdg[i] = nn[i].get_uncertainty_of_fdg() nn[i].reproject(merged, tps=True, resample_alg=1, block_size=2) # Initialize band arrays inc = np.ones( (self.N_SUBSWATHS, merged.shape()[0], merged.shape()[1])) * np.nan fdg = np.ones( (self.N_SUBSWATHS, merged.shape()[0], merged.shape()[1])) * np.nan ur = np.ones( (self.N_SUBSWATHS, merged.shape()[0], merged.shape()[1])) * np.nan valid_sea_dop = np.ones( (self.N_SUBSWATHS, merged.shape()[0], merged.shape()[1])) * np.nan std_fdg = np.ones( (self.N_SUBSWATHS, merged.shape()[0], merged.shape()[1])) * np.nan std_ur = np.ones( (self.N_SUBSWATHS, merged.shape()[0], merged.shape()[1])) * np.nan for ii in range(self.N_SUBSWATHS): inc[ii] = nn[ii]['incidence_angle'] fdg[ii] = nn[ii]['fdg'] ur[ii] = nn[ii]['Ur'] valid_sea_dop[ii] = nn[ii]['valid_sea_doppler'] # uncertainty of fdg is a scalar std_fdg[ii][valid_sea_dop[ii] == 1] = dfdg[ii] # uncertainty of ur std_ur[ii] = nn[ii].get_uncertainty_of_radial_current(dfdg[ii]) # Calculate incidence angle as a simple average mean_inc = np.nanmean(inc, axis=0) merged.add_band(array=mean_inc, parameters={ 'name': 'incidence_angle', 'wkv': 'angle_of_incidence' }) # Calculate fdg as weighted average mean_fdg = nansumwrapper((fdg/np.square(std_fdg)).data, axis=0) / \ nansumwrapper((1./np.square(std_fdg)).data, axis=0) merged.add_band( array=mean_fdg, parameters={ 'name': 'fdg', 'wkv': 'surface_backwards_doppler_frequency_shift_of_radar_wave_due_to_surface_velocity' }) # Standard deviation of fdg std_mean_fdg = np.sqrt(1. / nansumwrapper( (1. / np.square(std_fdg)).data, axis=0)) merged.add_band(array=std_mean_fdg, parameters={'name': 'std_fdg'}) # Calculate ur as weighted average mean_ur = nansumwrapper((ur/np.square(std_ur)).data, axis=0) / \ nansumwrapper((1./np.square(std_ur)).data, axis=0) merged.add_band(array=mean_ur, parameters={ 'name': 'Ur', }) # Standard deviation of Ur std_mean_ur = np.sqrt(1. / nansumwrapper( (1. / np.square(std_ur)).data, axis=0)) merged.add_band(array=std_mean_ur, parameters={'name': 'std_ur'}) # Band of valid pixels vsd = np.nanmin(valid_sea_dop, axis=0) merged.add_band(array=vsd, parameters={ 'name': 'valid_sea_doppler', }) # Add file to db fn = os.path.join( product_path( self.module_name(), nansat_filename( ds.dataseturi_set.get(uri__endswith='.gsar').uri)), os.path.basename( nansat_filename( ds.dataseturi_set.get( uri__endswith='.gsar').uri)).split('.')[0] + '_merged.nc') merged.export(filename=fn) ncuri = 'file://localhost' + fn new_uri, created = DatasetURI.objects.get_or_create(uri=ncuri, dataset=ds) connection.close() return merged
def process(self, ds, force=False, *args, **kwargs): """ Create data products Returns ======= ds : geospaas.catalog.models.Dataset processed : Boolean Flag to indicate if the dataset was processed or not """ swath_data = {} # Set media path (where images will be stored) mp = media_path( self.module_name(), nansat_filename(ds.dataseturi_set.get(uri__endswith='.gsar').uri)) # Set product path (where netcdf products will be stored) ppath = product_path( self.module_name(), nansat_filename(ds.dataseturi_set.get(uri__endswith='.gsar').uri)) # Read subswaths dss = {1: None, 2: None, 3: None, 4: None, 5: None} processed = [True, True, True, True, True] failing = [False, False, False, False, False] for i in range(self.N_SUBSWATHS): # Check if the data has already been processed try: fn = nansat_filename( ds.dataseturi_set.get(uri__endswith='%d.nc' % i).uri) except DatasetURI.DoesNotExist: processed[i] = False else: dd = Nansat(fn) try: std_Ur = dd['std_Ur'] except ValueError: processed[i] = False if processed[i] and not force: continue # Process from scratch to avoid duplication of bands fn = nansat_filename( ds.dataseturi_set.get(uri__endswith='.gsar').uri) try: dd = Doppler(fn, subswath=i) except Exception as e: logging.error('%s (Filename, subswath [1-5]): (%s, %d)' % (str(e), fn, i + 1)) failing[i] = True continue # Check if the file is corrupted try: inc = dd['incidence_angle'] except Exception as e: logging.error('%s (Filename, subswath [1-5]): (%s, %d)' % (str(e), fn, i + 1)) failing[i] = True continue dss[i + 1] = dd if all(processed) and not force: logging.info("%s: The dataset has already been processed." % nansat_filename( ds.dataseturi_set.get(uri__endswith='.gsar').uri)) return ds, False if all(failing): logging.error( "Processing of all subswaths is failing: %s" % nansat_filename( ds.dataseturi_set.get(uri__endswith='.gsar').uri)) return ds, False if any(failing): logging.error( "Some but not all subswaths processed: %s" % nansat_filename( ds.dataseturi_set.get(uri__endswith='.gsar').uri)) return ds, False logging.info( "Processing %s" % nansat_filename(ds.dataseturi_set.get(uri__endswith='.gsar').uri)) # Loop subswaths, process each of them processed = False def get_overlap(d1, d2): b1 = d1.get_border_geometry() b2 = d2.get_border_geometry() intersection = b1.Intersection(b2) lo1, la1 = d1.get_geolocation_grids() overlap = np.zeros(lo1.shape) for i in range(lo1.shape[0]): for j in range(lo1.shape[1]): wkt_point = 'POINT(%.5f %.5f)' % (lo1[i, j], la1[i, j]) overlap[i, j] = intersection.Contains( ogr.CreateGeometryFromWkt(wkt_point)) return overlap for uri in ds.dataseturi_set.filter(uri__endswith='.nc'): logging.debug("%s" % nansat_filename(uri.uri)) # Find pixels in dss[1] which overlap with pixels in dss[2] overlap12 = get_overlap(dss[1], dss[2]) # Find pixels in dss[2] which overlap with pixels in dss[1] overlap21 = get_overlap(dss[2], dss[1]) # and so on.. overlap23 = get_overlap(dss[2], dss[3]) overlap32 = get_overlap(dss[3], dss[2]) overlap34 = get_overlap(dss[3], dss[4]) overlap43 = get_overlap(dss[4], dss[3]) overlap45 = get_overlap(dss[4], dss[5]) overlap54 = get_overlap(dss[5], dss[4]) # Get range bias corrected Doppler fdg = {} fdg[1] = dss[1].anomaly() - dss[1].range_bias() fdg[2] = dss[2].anomaly() - dss[2].range_bias() fdg[3] = dss[3].anomaly() - dss[3].range_bias() fdg[4] = dss[4].anomaly() - dss[4].range_bias() fdg[5] = dss[5].anomaly() - dss[5].range_bias() # Get median values at overlapping borders median12 = np.nanmedian(fdg[1][np.where(overlap12)]) median21 = np.nanmedian(fdg[2][np.where(overlap21)]) median23 = np.nanmedian(fdg[2][np.where(overlap23)]) median32 = np.nanmedian(fdg[3][np.where(overlap32)]) median34 = np.nanmedian(fdg[3][np.where(overlap34)]) median43 = np.nanmedian(fdg[4][np.where(overlap43)]) median45 = np.nanmedian(fdg[4][np.where(overlap45)]) median54 = np.nanmedian(fdg[5][np.where(overlap54)]) # Adjust levels to align at subswath borders fdg[1] -= median12 - np.nanmedian(np.array([median12, median21])) fdg[2] -= median21 - np.nanmedian(np.array([median12, median21])) fdg[1] -= median23 - np.nanmedian(np.array([median23, median32])) fdg[2] -= median23 - np.nanmedian(np.array([median23, median32])) fdg[3] -= median32 - np.nanmedian(np.array([median23, median32])) fdg[1] -= median34 - np.nanmedian(np.array([median34, median43])) fdg[2] -= median34 - np.nanmedian(np.array([median34, median43])) fdg[3] -= median34 - np.nanmedian(np.array([median34, median43])) fdg[4] -= median43 - np.nanmedian(np.array([median34, median43])) fdg[1] -= median45 - np.nanmedian(np.array([median45, median54])) fdg[2] -= median45 - np.nanmedian(np.array([median45, median54])) fdg[3] -= median45 - np.nanmedian(np.array([median45, median54])) fdg[4] -= median45 - np.nanmedian(np.array([median45, median54])) fdg[5] -= median54 - np.nanmedian(np.array([median45, median54])) # Correct by land or mean fww try: wind_fn = nansat_filename( Dataset.objects.get( source__platform__short_name='ERA15DAS', time_coverage_start__lte=ds.time_coverage_end, time_coverage_end__gte=ds.time_coverage_start). dataseturi_set.get().uri) except Exception as e: logging.error( "%s - in search for ERA15DAS data (%s, %s, %s) " % (str(e), nansat_filename( ds.dataseturi_set.get(uri__endswith=".gsar").uri), ds.time_coverage_start, ds.time_coverage_end)) return ds, False connection.close() land = np.array([]) fww = np.array([]) offset_corrected = 0 for key in dss.keys(): land = np.append( land, fdg[key][dss[key]['valid_land_doppler'] == 1].flatten()) if land.any(): logging.info('Using land for bias corrections') land_bias = np.nanmedian(land) offset_corrected = 1 else: logging.info('Using CDOP wind-waves Doppler for bias corrections') # correct by mean wind doppler for key in dss.keys(): ff = fdg[key].copy() # do CDOP correction ff[ dss[key]['valid_sea_doppler']==1 ] = \ ff[ dss[key]['valid_sea_doppler']==1 ] \ - dss[key].wind_waves_doppler(wind_fn)[0][ dss[key]['valid_sea_doppler']==1 ] ff[dss[key]['valid_doppler'] == 0] = np.nan fww = np.append(fww, ff.flatten()) land_bias = np.nanmedian(fww) if np.isnan(land_bias): offset_corrected = 0 raise Exception('land bias is NaN...') else: offset_corrected = 1 for key in dss.keys(): fdg[key] -= land_bias # Set unrealistically high/low values to NaN (ref issue #4 and #5) fdg[key][fdg[key] < -100] = np.nan fdg[key][fdg[key] > 100] = np.nan # Add fdg[key] as band dss[key].add_band( array=fdg[key], parameters={ 'wkv': 'surface_backwards_doppler_frequency_shift_of_radar_wave_due_to_surface_velocity', 'offset_corrected': str(offset_corrected) }) # Add Doppler anomaly dss[key].add_band( array=dss[key].anomaly(), parameters={ 'wkv': 'anomaly_of_surface_backwards_doppler_centroid_frequency_shift_of_radar_wave' }) # Add wind doppler and its uncertainty as bands fww, dfww = dss[key].wind_waves_doppler(wind_fn) dss[key].add_band( array=fww, parameters={ 'wkv': 'surface_backwards_doppler_frequency_shift_of_radar_wave_due_to_wind_waves' }) dss[key].add_band(array=dfww, parameters={'name': 'std_fww'}) # Calculate range current velocity component v_current, std_v, offset_corrected = \ dss[key].surface_radial_doppler_sea_water_velocity(wind_fn, fdg=fdg[key]) dss[key].add_band(array=v_current, parameters={ 'wkv': 'surface_radial_doppler_sea_water_velocity', 'offset_corrected': str(offset_corrected) }) dss[key].add_band(array=std_v, parameters={'name': 'std_Ur'}) # Set satellite pass lon, lat = dss[key].get_geolocation_grids() gg = np.gradient(lat, axis=0) dss[key].add_band(array=gg, parameters={ 'name': 'sat_pass', 'comment': 'ascending pass is >0, descending pass is <0' }) history_message = ( 'sar_doppler.models.Dataset.objects.process("%s") ' '[geospaas sar_doppler version %s]' % (ds, os.getenv('GEOSPAAS_SAR_DOPPLER_VERSION', 'dev'))) new_uri, created = self.export2netcdf( dss[key], ds, history_message=history_message) processed = True m = self.create_merged_swaths(ds) return ds, processed