def get_or_create(self, uri, n_points=10, uri_filter_args=None, *args, **kwargs): ''' Create dataset and corresponding metadata Parameters: ---------- uri : str URI to file or stream openable by Nansat n_points : int Number of border points (default is 10) uri_filter_args : dict Extra DatasetURI filter arguments if several datasets can refer to the same URI Returns: ------- dataset and flag ''' if not uri_filter_args: uri_filter_args = {} # Validate uri - this should raise an exception if the uri doesn't point to a valid # file or stream validate_uri(uri) # Several datasets can refer to the same uri (e.g., scatterometers and svp drifters), so we # need to pass uri_filter_args uris = DatasetURI.objects.filter(uri=uri, **uri_filter_args) if len(uris) > 0: return uris[0].dataset, False # Open file with Nansat n = Nansat(nansat_filename(uri), **kwargs) # get metadata from Nansat and get objects from vocabularies n_metadata = n.get_metadata() # set compulsory metadata (source) platform, _ = Platform.objects.get_or_create( json.loads(n_metadata['platform'])) instrument, _ = Instrument.objects.get_or_create( json.loads(n_metadata['instrument'])) specs = n_metadata.get('specs', '') source, _ = Source.objects.get_or_create(platform=platform, instrument=instrument, specs=specs) default_char_fields = { 'entry_id': lambda: 'NERSC_' + str(uuid.uuid4()), 'entry_title': lambda: 'NONE', 'summary': lambda: 'NONE', } # set optional CharField metadata from Nansat or from default_char_fields options = {} for name in default_char_fields: if name not in n_metadata: warnings.warn('%s is not provided in Nansat metadata!' % name) options[name] = default_char_fields[name]() else: options[name] = n_metadata[name] default_foreign_keys = { 'gcmd_location': { 'model': Location, 'value': pti.get_gcmd_location('SEA SURFACE') }, 'data_center': { 'model': DataCenter, 'value': pti.get_gcmd_provider('NERSC') }, 'ISO_topic_category': { 'model': ISOTopicCategory, 'value': pti.get_iso19115_topic_category('Oceans') }, } # set optional ForeignKey metadata from Nansat or from default_foreign_keys for name in default_foreign_keys: value = default_foreign_keys[name]['value'] model = default_foreign_keys[name]['model'] if name not in n_metadata: warnings.warn('%s is not provided in Nansat metadata!' % name) else: try: value = json.loads(n_metadata[name]) except: warnings.warn( '%s value of %s metadata provided in Nansat is wrong!' % (n_metadata[name], name)) options[name], _ = model.objects.get_or_create(value) # Find coverage to set number of points in the geolocation if len(n.vrt.dataset.GetGCPs()) > 0: n.reproject_gcps() geolocation = GeographicLocation.objects.get_or_create( geometry=WKTReader().read(n.get_border_wkt(nPoints=n_points)))[0] # create dataset ds, created = Dataset.objects.get_or_create( time_coverage_start=n.get_metadata('time_coverage_start'), time_coverage_end=n.get_metadata('time_coverage_end'), source=source, geographic_location=geolocation, **options) # create dataset URI ds_uri, _ = DatasetURI.objects.get_or_create(uri=uri, dataset=ds) return ds, created
def get_or_create(self, uri, n_points=10, uri_filter_args=None, uri_service_name=FILE_SERVICE_NAME, uri_service_type=LOCAL_FILE_SERVICE, *args, **kwargs): """ Create dataset and corresponding metadata Parameters: ---------- uri : str URI to file or stream openable by Nansat n_points : int Number of border points (default is 10) uri_filter_args : dict Extra DatasetURI filter arguments if several datasets can refer to the same URI uri_service_name : str name of the service which is used ('dapService', 'fileService', 'http' or 'wms') uri_service_type : str type of the service which is used ('OPENDAP', 'local', 'HTTPServer' or 'WMS') Returns: ------- dataset and flag """ if not uri_filter_args: uri_filter_args = {} # Validate uri - this should raise an exception if the uri doesn't point to a valid # file or stream validate_uri(uri) # Several datasets can refer to the same uri (e.g., scatterometers and svp drifters), so we # need to pass uri_filter_args uris = DatasetURI.objects.filter(uri=uri, **uri_filter_args) if len(uris) > 0: return uris[0].dataset, False # Open file with Nansat n = Nansat(nansat_filename(uri), **kwargs) # get metadata from Nansat and get objects from vocabularies n_metadata = n.get_metadata() entry_id = n_metadata.get('entry_id', None) # set compulsory metadata (source) platform, _ = Platform.objects.get_or_create( json.loads(n_metadata['platform'])) instrument, _ = Instrument.objects.get_or_create( json.loads(n_metadata['instrument'])) specs = n_metadata.get('specs', '') source, _ = Source.objects.get_or_create(platform=platform, instrument=instrument, specs=specs) default_char_fields = { # Adding NERSC_ in front of the id violates the string representation of the uuid #'entry_id': lambda: 'NERSC_' + str(uuid.uuid4()), 'entry_id': lambda: str(uuid.uuid4()), 'entry_title': lambda: 'NONE', 'summary': lambda: 'NONE', } # set optional CharField metadata from Nansat or from default_char_fields options = {} try: existing_ds = Dataset.objects.get(entry_id=entry_id) except Dataset.DoesNotExist: existing_ds = None for name in default_char_fields: if name not in n_metadata: warnings.warn('%s is not provided in Nansat metadata!' % name) # prevent overwriting of existing values by defaults if existing_ds: options[name] = existing_ds.__getattribute__(name) else: options[name] = default_char_fields[name]() else: options[name] = n_metadata[name] default_foreign_keys = { 'gcmd_location': { 'model': Location, 'value': pti.get_gcmd_location('SEA SURFACE') }, 'data_center': { 'model': DataCenter, 'value': pti.get_gcmd_provider('NERSC') }, 'ISO_topic_category': { 'model': ISOTopicCategory, 'value': pti.get_iso19115_topic_category('Oceans') }, } # set optional ForeignKey metadata from Nansat or from default_foreign_keys for name in default_foreign_keys: value = default_foreign_keys[name]['value'] model = default_foreign_keys[name]['model'] if name not in n_metadata: warnings.warn('%s is not provided in Nansat metadata!' % name) else: try: value = json.loads(n_metadata[name]) except: warnings.warn( '%s value of %s metadata provided in Nansat is wrong!' % (n_metadata[name], name)) if existing_ds: options[name] = existing_ds.__getattribute__(name) else: options[name], _ = model.objects.get_or_create(value) # Find coverage to set number of points in the geolocation if len(n.vrt.dataset.GetGCPs()) > 0: n.reproject_gcps() geolocation = GeographicLocation.objects.get_or_create( geometry=WKTReader().read(n.get_border_wkt(nPoints=n_points)))[0] # create dataset # - the get_or_create method should use get_or_create here as well, # or its name should be changed - see issue #127 ds, created = Dataset.objects.update_or_create( entry_id=options['entry_id'], defaults={ 'time_coverage_start': n.get_metadata('time_coverage_start'), 'time_coverage_end': n.get_metadata('time_coverage_end'), 'source': source, 'geographic_location': geolocation, 'gcmd_location': options["gcmd_location"], 'ISO_topic_category': options["ISO_topic_category"], "data_center": options["data_center"], 'entry_title': options["entry_title"], 'summary': options["summary"] }) # create parameter all_band_meta = n.bands() for band_id in range(1, len(all_band_meta) + 1): band_meta = all_band_meta[band_id] standard_name = band_meta.get('standard_name', None) short_name = band_meta.get('short_name', None) units = band_meta.get('units', None) if standard_name in ['latitude', 'longitude', None]: continue params = Parameter.objects.filter(standard_name=standard_name) if params.count() > 1 and short_name is not None: params = params.filter(short_name=short_name) if params.count() > 1 and units is not None: params = params.filter(units=units) if params.count() >= 1: ds.parameters.add(params[0]) # create dataset URI DatasetURI.objects.get_or_create(name=uri_service_name, service=uri_service_type, uri=uri, dataset=ds) return ds, created
def get_or_create(self, uri, reprocess=False, *args, **kwargs): # ingest file to db ds, created = super(DatasetManager, self).get_or_create(uri, *args, **kwargs) # set Dataset entry_title ds.entry_title = 'SAR NRCS' ds.save() # Unless reprocess==True, we may not need to do the following... (see # managers.py in sar doppler processor) #visExists = ... # check if visualization(s) already created #if visExists and not reprocess: # warnings.warn('NO VISUALISATIONS CREATED - update managers.py') # return ds, created n = Nansat(nansat_filename(uri)) n.reproject_GCPs() n.resize(pixelsize=500) lon, lat = n.get_corners() lat_max = min(lat.max(), 85) d = Domain( NSR(3857), '-lle %f %f %f %f -ts %d %d' % (lon.min(), lat.min(), lon.max(), lat_max, n.shape()[1], n.shape()[0])) # Get all NRCS bands s0bands = [] pp = [] for key, value in n.bands().iteritems(): try: if value['standard_name'] == standard_name: s0bands.append(key) pp.append(value['polarization']) except KeyError: continue ''' Create data products ''' mm = self.__module__.split('.') module = '%s.%s' % (mm[0], mm[1]) mp = media_path(module, n.fileName) # ppath = product_path(module, n.fileName) # Create png's for each band num_products = 0 for band in s0bands: print 'Visualize', band s0_tmp = n[band] n_tmp = Nansat(domain=n, array=s0_tmp) n_tmp.reproject_GCPs() n_tmp.reproject(d) s0 = n_tmp[1] n_tmp = None mask = np.ones(s0.shape, np.uint8) mask[np.isnan(s0) + (s0 <= 0)] = 0 s0 = np.log10(s0) * 10. meta = n.bands()[band] product_filename = '%s_%s.png' % (meta['short_name'], meta['polarization']) nansatFigure(s0, mask, polarization_clims[meta['polarization']][0], polarization_clims[meta['polarization']][1], mp, product_filename) # Get DatasetParameter param = Parameter.objects.get(short_name=meta['short_name']) dsp, created = DatasetParameter.objects.get_or_create( dataset=ds, parameter=param) # Create Visualization geom, created = GeographicLocation.objects.get_or_create( geometry=WKTReader().read(n.get_border_wkt())) vv, created = Visualization.objects.get_or_create( uri='file://localhost%s/%s' % (mp, product_filename), title='%s %s polarization' % (param.standard_name, meta['polarization']), geographic_location=geom) # Create VisualizationParameter vp, created = VisualizationParameter.objects.get_or_create( visualization=vv, ds_parameter=dsp) return ds, True
def get_or_create(self, uri, reprocess=False, *args, **kwargs): # ingest file to db ds, created = super(DatasetManager, self).get_or_create(uri, *args, **kwargs) fn = nansat_filename(uri) n = Nansat(fn) # Reproject to leaflet projection xlon, xlat = n.get_corners() d = Domain( NSR(3857), '-lle %f %f %f %f -tr 1000 1000' % (xlon.min(), xlat.min(), xlon.max(), xlat.max())) n.reproject(d) # Get band numbers of required bands according to standard names speedBandNum = n._get_band_number({'standard_name': 'wind_speed'}) dirBandNum = n._get_band_number( {'standard_name': 'wind_from_direction'}) # Get numpy arrays of the bands speed = n[speedBandNum] dir = n[dirBandNum] ## It probably wont work with nansatmap... #nmap = Nansatmap(n, resolution='l') #nmap.pcolormesh(speed, vmax=18) #nmap.quiver(-speed*np.sin(dir), speed*np.cos(dir), step=10, scale=300, # width=0.002) # Set paths - this code should be inherited but I think there is an # issue in generalising the first line that defines the current module mm = self.__module__.split('.') module = '%s.%s' % (mm[0], mm[1]) mp = media_path(module, n.fileName) ppath = product_path(module, n.fileName) filename = os.path.basename(n.fileName).split('.')[0] + '.' + \ os.path.basename(n.fileName).split('.')[1] + '.png' # check uniqueness of parameter param1 = Parameter.objects.get(standard_name=n.get_metadata( bandID=speedBandNum, key='standard_name')) param2 = Parameter.objects.get(standard_name=n.get_metadata( bandID=dirBandNum, key='standard_name')) n.write_figure(os.path.join(mp, filename), bands=speedBandNum, mask_array=n['swathmask'], mask_lut={0: [128, 128, 128]}, transparency=[128, 128, 128]) # Get DatasetParameter dsp1, created = DatasetParameter.objects.get_or_create( dataset=ds, parameter=param1) # Create Visualization geom, created = GeographicLocation.objects.get_or_create( geometry=WKTReader().read(n.get_border_wkt())) vv, created = Visualization.objects.get_or_create( uri='file://localhost%s/%s' % (mp, filename), title='%s' % (param1.standard_name), geographic_location=geom) # Create VisualizationParameter vp, created = VisualizationParameter.objects.get_or_create( visualization=vv, ds_parameter=dsp1) return ds, True
def get_or_create(self, uri, *args, **kwargs): """ Ingest gsar file to geo-spaas db """ ds, created = super(DatasetManager, self).get_or_create(uri, *args, **kwargs) # TODO: Check if the following is necessary if not type(ds) == Dataset: return ds, False # set Dataset entry_title ds.entry_title = 'SAR Doppler' ds.save() fn = nansat_filename(uri) n = Nansat(fn, subswath=0) gg = WKTReader().read(n.get_border_wkt()) if ds.geographic_location.geometry.area>gg.area: return ds, False # Update dataset border geometry # This must be done every time a Doppler file is processed. It is time # consuming but apparently the only way to do it. Could be checked # though... swath_data = {} lon = {} lat = {} astep = {} rstep = {} az_left_lon = {} ra_upper_lon = {} az_right_lon = {} ra_lower_lon = {} az_left_lat = {} ra_upper_lat = {} az_right_lat = {} ra_lower_lat = {} num_border_points = 10 border = 'POLYGON((' for i in range(self.N_SUBSWATHS): # Read subswaths swath_data[i] = Nansat(fn, subswath=i) # Should use nansat.domain.get_border - see nansat issue #166 # (https://github.com/nansencenter/nansat/issues/166) lon[i], lat[i] = swath_data[i].get_geolocation_grids() astep[i] = max(1, (lon[i].shape[0] / 2 * 2 - 1) / num_border_points) rstep[i] = max(1, (lon[i].shape[1] / 2 * 2 - 1) / num_border_points) az_left_lon[i] = lon[i][0:-1:astep[i], 0] az_left_lat[i] = lat[i][0:-1:astep[i], 0] az_right_lon[i] = lon[i][0:-1:astep[i], -1] az_right_lat[i] = lat[i][0:-1:astep[i], -1] ra_upper_lon[i] = lon[i][-1, 0:-1:rstep[i]] ra_upper_lat[i] = lat[i][-1, 0:-1:rstep[i]] ra_lower_lon[i] = lon[i][0, 0:-1:rstep[i]] ra_lower_lat[i] = lat[i][0, 0:-1:rstep[i]] lons = np.concatenate((az_left_lon[0], ra_upper_lon[0], ra_upper_lon[1], ra_upper_lon[2], ra_upper_lon[3], ra_upper_lon[4], np.flipud(az_right_lon[4]), np.flipud(ra_lower_lon[4]), np.flipud(ra_lower_lon[3]), np.flipud(ra_lower_lon[2]), np.flipud(ra_lower_lon[1]), np.flipud(ra_lower_lon[0]))) # apply 180 degree correction to longitude - code copied from # get_border_wkt... # TODO: simplify using np.mod? for ilon, llo in enumerate(lons): lons[ilon] = copysign(acos(cos(llo * pi / 180.)) / pi * 180, sin(llo * pi / 180.)) lats = np.concatenate((az_left_lat[0], ra_upper_lat[0], ra_upper_lat[1], ra_upper_lat[2], ra_upper_lat[3], ra_upper_lat[4], np.flipud(az_right_lat[4]), np.flipud(ra_lower_lat[4]), np.flipud(ra_lower_lat[3]), np.flipud(ra_lower_lat[2]), np.flipud(ra_lower_lat[1]), np.flipud(ra_lower_lat[0]))) poly_border = ','.join(str(llo) + ' ' + str(lla) for llo, lla in zip(lons, lats)) wkt = 'POLYGON((%s))' % poly_border new_geometry = WKTReader().read(wkt) # Get geolocation of dataset - this must be updated geoloc = ds.geographic_location # Check geometry, return if it is the same as the stored one created = False if geoloc.geometry != new_geometry: # Change the dataset geolocation to cover all subswaths geoloc.geometry = new_geometry geoloc.save() created = True return ds, created
def get_or_create(self, uri, *args, **kwargs): """ Ingest gsar file to geo-spaas db """ ds, created = super(DatasetManager, self).get_or_create(uri, *args, **kwargs) connection.close() # TODO: Check if the following is necessary if not type(ds) == Dataset: return ds, False fn = nansat_filename(uri) n = Nansat(fn, subswath=0) # set Dataset entry_title ds.entry_title = n.get_metadata('title') ds.save() if created: from sar_doppler.models import SARDopplerExtraMetadata # Store the polarization and associate the dataset extra, _ = SARDopplerExtraMetadata.objects.get_or_create( dataset=ds, polarization=n.get_metadata('polarization')) if not _: raise ValueError( 'Created new dataset but could not create instance of ExtraMetadata' ) ds.sardopplerextrametadata_set.add(extra) connection.close() gg = WKTReader().read(n.get_border_wkt()) #lon, lat = n.get_border() #ind_near_range = 0 #ind_far_range = int(lon.size/4) #import pyproj #geod = pyproj.Geod(ellps='WGS84') #angle1,angle2,img_width = geod.inv(lon[ind_near_range], lat[ind_near_range], # lon[ind_far_range], lat[ind_far_range]) # If the area of the dataset geometry is larger than the area of the subswath border, it means that the dataset # has already been created (the area should be the total area of all subswaths) if np.floor(ds.geographic_location.geometry.area) > np.round(gg.area): return ds, False swath_data = {} lon = {} lat = {} astep = {} rstep = {} az_left_lon = {} ra_upper_lon = {} az_right_lon = {} ra_lower_lon = {} az_left_lat = {} ra_upper_lat = {} az_right_lat = {} ra_lower_lat = {} num_border_points = 10 border = 'POLYGON((' for i in range(self.N_SUBSWATHS): # Read subswaths swath_data[i] = Nansat(fn, subswath=i) lon[i], lat[i] = swath_data[i].get_geolocation_grids() astep[i] = int( max(1, (lon[i].shape[0] / 2 * 2 - 1) / num_border_points)) rstep[i] = int( max(1, (lon[i].shape[1] / 2 * 2 - 1) / num_border_points)) az_left_lon[i] = lon[i][0:-1:astep[i], 0] az_left_lat[i] = lat[i][0:-1:astep[i], 0] az_right_lon[i] = lon[i][0:-1:astep[i], -1] az_right_lat[i] = lat[i][0:-1:astep[i], -1] ra_upper_lon[i] = lon[i][-1, 0:-1:rstep[i]] ra_upper_lat[i] = lat[i][-1, 0:-1:rstep[i]] ra_lower_lon[i] = lon[i][0, 0:-1:rstep[i]] ra_lower_lat[i] = lat[i][0, 0:-1:rstep[i]] lons = np.concatenate( (az_left_lon[0], ra_upper_lon[0], ra_upper_lon[1], ra_upper_lon[2], ra_upper_lon[3], ra_upper_lon[4], np.flipud(az_right_lon[4]), np.flipud(ra_lower_lon[4]), np.flipud(ra_lower_lon[3]), np.flipud(ra_lower_lon[2]), np.flipud(ra_lower_lon[1]), np.flipud(ra_lower_lon[0]))).round(decimals=3) # apply 180 degree correction to longitude - code copied from # get_border_wkt... # TODO: simplify using np.mod? for ilon, llo in enumerate(lons): lons[ilon] = copysign( acos(cos(llo * pi / 180.)) / pi * 180, sin(llo * pi / 180.)) lats = np.concatenate( (az_left_lat[0], ra_upper_lat[0], ra_upper_lat[1], ra_upper_lat[2], ra_upper_lat[3], ra_upper_lat[4], np.flipud(az_right_lat[4]), np.flipud(ra_lower_lat[4]), np.flipud(ra_lower_lat[3]), np.flipud(ra_lower_lat[2]), np.flipud(ra_lower_lat[1]), np.flipud(ra_lower_lat[0]))).round(decimals=3) poly_border = ','.join( str(llo) + ' ' + str(lla) for llo, lla in zip(lons, lats)) wkt = 'POLYGON((%s))' % poly_border new_geometry = WKTReader().read(wkt) # Get or create new geolocation of dataset # Returns False if it is the same as an already created one (this may happen when a lot of data is processed) ds.geographic_location, cr = GeographicLocation.objects.get_or_create( geometry=new_geometry) connection.close() return ds, True