def __init__(self, filename, gdal_dataset, metadata, quartile=0, *args, **kwargs): if not 'ascat' in metadata.get('NC_GLOBAL#source', '').lower(): raise WrongMapperError super(Mapper, self).__init__(filename, gdal_dataset, metadata, quartile=quartile, *args, **kwargs) lat = self.dataset.GetRasterBand(self._latitude_band_number(gdal_dataset)).ReadAsArray() lon = self.dataset.GetRasterBand(self._longitude_band_number(gdal_dataset)).ReadAsArray() lon = ScatterometryMapper.shift_longitudes(lon) self.set_gcps(lon, lat, gdal_dataset) # Get dictionary describing the instrument and platform according to # the GCMD keywords ii = pti.get_gcmd_instrument('ascat') pp = pti.get_gcmd_platform(metadata['NC_GLOBAL#source'].split(' ')[0]) provider = pti.get_gcmd_provider(re.split('[^a-zA-Z]', metadata['NC_GLOBAL#institution'])[0]) # TODO: Validate that the found instrument and platform are indeed what # we want.... self.dataset.SetMetadataItem('instrument', json.dumps(ii)) self.dataset.SetMetadataItem('platform', json.dumps(pp)) self.dataset.SetMetadataItem('data_center', json.dumps(provider)) self.dataset.SetMetadataItem('entry_title', metadata['NC_GLOBAL#title']) self.dataset.SetMetadataItem('ISO_topic_category', json.dumps(pti.get_iso19115_topic_category('Oceans')))
def __init__(self, filename, gdal_dataset, gdal_metadata, date=None, ds=None, bands=None, cachedir=None, *args, **kwargs): self.test_mapper(filename) timestamp = date if date else self.get_date(filename) ds = Dataset(filename) try: self.srcDSProjection = NSR(ds.variables['projection_3'].proj4 + ' +to_meter=0.0174532925199 +wktext') except KeyError: raise WrongMapperError self.create_vrt(filename, gdal_dataset, gdal_metadata, timestamp, ds, bands, cachedir) self.dataset.SetMetadataItem( 'instrument', json.dumps(pti.get_gcmd_instrument('Computer'))) self.dataset.SetMetadataItem( 'platform', json.dumps(pti.get_gcmd_platform('MODELS'))) self.dataset.SetMetadataItem( 'Data Center', json.dumps(pti.get_gcmd_provider('NO/MET'))) self.dataset.SetMetadataItem('Entry Title', str(ds.getncattr('title'))) self.dataset.SetMetadataItem( 'Entry Title', json.dumps(pti.get_iso19115_topic_category('Oceans'))) self.dataset.SetMetadataItem( 'gcmd_location', json.dumps(pti.get_gcmd_location('sea surface')))
def set_gcmd_dif_keywords(self): mditem = 'entry_title' if not self.dataset.GetMetadataItem(mditem): self.dataset.SetMetadataItem(mditem, self.input_filename) mditem = 'data_center' if not self.dataset.GetMetadataItem(mditem): self.dataset.SetMetadataItem( mditem, json.dumps(pti.get_gcmd_provider('NO/MET'))) mditem = 'ISO_topic_category' if not self.dataset.GetMetadataItem(mditem): self.dataset.SetMetadataItem( mditem, pti.get_iso19115_topic_category( 'Imagery/Base Maps/Earth Cover')['iso_topic_category']) mm = pti.get_gcmd_instrument('sar') if self.ds.MISSION_ID == 'S1A': ee = pti.get_gcmd_platform('sentinel-1a') else: ee = pti.get_gcmd_platform('sentinel-1b') self.dataset.SetMetadataItem('instrument', json.dumps(mm)) self.dataset.SetMetadataItem('platform', json.dumps(ee)) self.dataset.SetMetadataItem( 'time_coverage_start', self.dataset.GetMetadataItem('ACQUISITION_START_TIME')) self.dataset.SetMetadataItem( 'time_coverage_end', self.dataset.GetMetadataItem('ACQUISITION_STOP_TIME'))
def __init__(self, filename, gdal_dataset, metadata, quartile=0, *args, **kwargs): if not 'quikscat' in metadata.get('NC_GLOBAL#source', '').lower(): raise WrongMapperError super(Mapper, self).__init__(filename, gdal_dataset, metadata, quartile=quartile, *args, **kwargs) lat = self.dataset.GetRasterBand(self._latitude_band_number(gdal_dataset)).ReadAsArray() lon = self.dataset.GetRasterBand(self._longitude_band_number(gdal_dataset)).ReadAsArray() lon = ScatterometryMapper.shift_longitudes(lon) self.set_gcps(lon, lat, gdal_dataset) # Get dictionary describing the instrument and platform according to # the GCMD keywords mm = pti.get_gcmd_instrument('seawinds') ee = pti.get_gcmd_platform('quikscat') provider = metadata['NC_GLOBAL#institution'] if provider.lower()=='jpl': provider = 'NASA/JPL/QUIKSCAT' provider = pti.get_gcmd_provider(provider) self.dataset.SetMetadataItem('instrument', json.dumps(mm)) self.dataset.SetMetadataItem('platform', json.dumps(ee)) self.dataset.SetMetadataItem('data_center', json.dumps(provider)) self.dataset.SetMetadataItem('entry_title', metadata['NC_GLOBAL#title']) self.dataset.SetMetadataItem('ISO_topic_category', json.dumps(pti.get_iso19115_topic_category('Oceans')))
def __init__(self, filename, gdal_dataset, metadata, quartile=0, *args, **kwargs): if not 'ascat' in metadata.get('NC_GLOBAL#source', '').lower(): raise WrongMapperError super(Mapper, self).__init__(filename, gdal_dataset, metadata, quartile=quartile, *args, **kwargs) lat = self.dataset.GetRasterBand(self._latitude_band_number(gdal_dataset)).ReadAsArray() lon = self.dataset.GetRasterBand(self._longitude_band_number(gdal_dataset)).ReadAsArray() lon = ScatterometryMapper.shift_longitudes(lon) self.set_gcps(lon, lat, gdal_dataset) # Get dictionary describing the instrument and platform according to # the GCMD keywords ii = pti.get_gcmd_instrument('ascat') pp = pti.get_gcmd_platform(metadata['NC_GLOBAL#source'].split(' ')[0]) provider = pti.get_gcmd_provider(re.split('[^a-zA-Z]', metadata['NC_GLOBAL#institution'])[0]) # TODO: Validate that the found instrument and platform are indeed what # we want.... self.dataset.SetMetadataItem('instrument', json.dumps(ii)) self.dataset.SetMetadataItem('platform', json.dumps(pp)) self.dataset.SetMetadataItem('data_center', json.dumps(provider)) self.dataset.SetMetadataItem('entry_title', metadata['NC_GLOBAL#title']) self.dataset.SetMetadataItem('ISO_topic_category', json.dumps(pti.get_iso19115_topic_category('Oceans')))
def __init__(self, filename, gdal_dataset, metadata, quartile=0, *args, **kwargs): if not 'quikscat' in metadata.get('NC_GLOBAL#source', '').lower(): raise WrongMapperError super(Mapper, self).__init__(filename, gdal_dataset, metadata, quartile=quartile, *args, **kwargs) lat = self.dataset.GetRasterBand(self._latitude_band_number(gdal_dataset)).ReadAsArray() lon = self.dataset.GetRasterBand(self._longitude_band_number(gdal_dataset)).ReadAsArray() lon = ScatterometryMapper.shift_longitudes(lon) self.set_gcps(lon, lat, gdal_dataset) # Get dictionary describing the instrument and platform according to # the GCMD keywords mm = pti.get_gcmd_instrument('seawinds') ee = pti.get_gcmd_platform('quikscat') provider = metadata['NC_GLOBAL#institution'] if provider.lower()=='jpl': provider = 'NASA/JPL/QUIKSCAT' provider = pti.get_gcmd_provider(provider) self.dataset.SetMetadataItem('instrument', json.dumps(mm)) self.dataset.SetMetadataItem('platform', json.dumps(ee)) self.dataset.SetMetadataItem('data_center', json.dumps(provider)) self.dataset.SetMetadataItem('entry_title', metadata['NC_GLOBAL#title']) self.dataset.SetMetadataItem('ISO_topic_category', json.dumps(pti.get_iso19115_topic_category('Oceans')))
def _init_empty(self, manifest_data, annotation_data): """ Fast initialization from minimum of information Parameters ---------- manifest_data : dict data from the manifest file (time_coverage_start, etc) annotation_data : dict data from annotation file (longitude, latitude, x_size, etc) Note ---- Calls VRT.__init__, Adds GCPs, metadata """ # init empty dataset super(Mapper, self).__init__(annotation_data['x_size'], annotation_data['y_size']) # add GCPs from (corrected) geolocation data gcps = Mapper.create_gcps(annotation_data['longitude'], annotation_data['latitude'], annotation_data['height'], annotation_data['pixel'], annotation_data['line']) self.dataset.SetGCPs(gcps, NSR().wkt) # set metadata self.dataset.SetMetadataItem('time_coverage_start', manifest_data['time_coverage_start']) self.dataset.SetMetadataItem('time_coverage_end', manifest_data['time_coverage_end']) platform_name = manifest_data['platform_family_name'] + manifest_data['platform_number'] self.dataset.SetMetadataItem('platform', json.dumps(pti.get_gcmd_platform(platform_name))) self.dataset.SetMetadataItem('instrument', json.dumps(pti.get_gcmd_instrument('SAR'))) self.dataset.SetMetadataItem('entry_title', platform_name + ' SAR') self.dataset.SetMetadataItem('data_center', json.dumps(pti.get_gcmd_provider('ESA/EO'))) self.dataset.SetMetadataItem('iso_topic_category', json.dumps(pti.get_iso19115_topic_category('Oceans'))) self.dataset.SetMetadataItem('summary', platform_name + ' SAR data') self.dataset.FlushCache()
def get_or_create(self, uri, force): # Validate uri - this should raise an exception if the uri doesn't # point to a valid file or stream validate_uri(uri) # Several datasets can refer to the same uri (e.g., scatterometers and svp drifters), so we # need to pass uri_filter_args uris = DatasetURI.objects.filter(uri=uri) # If the ingested uri is already in the database and not <force> ingestion then stop if uris.exists() and not force: return uris[0].dataset, False elif uris.exists() and force: uris[0].dataset.delete() # Open file with Nansat n = Nansat(nansat_filename(uri)) # get metadata from Nansat and get objects from vocabularies n_metadata = n.get_metadata() # set compulsory metadata (source) platform, _ = Platform.objects.get_or_create( json.loads(n_metadata['platform'])) instrument, _ = Instrument.objects.get_or_create( json.loads(n_metadata['instrument'])) specs = n_metadata.get('specs', '') source, _ = Source.objects.get_or_create(platform=platform, instrument=instrument, specs=specs) footprint = Polygon(list(zip(*n.get_border()))) geolocation = GeographicLocation.objects.get_or_create( geometry=footprint)[0] data_center = DataCenter.objects.get_or_create( json.loads(n_metadata['Data Center']))[0] iso_category = ISOTopicCategory.objects.get_or_create( pti.get_iso19115_topic_category('Oceans'))[0] location = Location.objects.get_or_create( json.loads(n_metadata['gcmd_location']))[0] # create dataset ds, created = Dataset.objects.get_or_create( time_coverage_start=make_aware(n.time_coverage_start), time_coverage_end=make_aware( n.time_coverage_start + timedelta(hours=23, minutes=59, seconds=59)), source=source, geographic_location=geolocation, ISO_topic_category=iso_category, data_center=data_center, summary='', gcmd_location=location, access_constraints='', entry_id=lambda: 'NERSC_' + str(uuid.uuid4())) ds_uri, _ = DatasetURI.objects.get_or_create( name=FILE_SERVICE_NAME, service=LOCAL_FILE_SERVICE, uri=uri, dataset=ds) return ds, created
def _init_empty(self, manifest_data, annotation_data): """ Fast initialization from minimum of information Parameters ---------- manifest_data : dict data from the manifest file (time_coverage_start, etc) annotation_data : dict data from annotation file (longitude, latitude, x_size, etc) Note ---- Calls VRT.__init__, Adds GCPs, metadata """ # init empty dataset super(Mapper, self).__init__(annotation_data['x_size'], annotation_data['y_size']) # add GCPs from (corrected) geolocation data gcps = Mapper.create_gcps(annotation_data['longitude'], annotation_data['latitude'], annotation_data['height'], annotation_data['pixel'], annotation_data['line']) self.dataset.SetGCPs(gcps, NSR().wkt) # set metadata self.dataset.SetMetadataItem('time_coverage_start', manifest_data['time_coverage_start']) self.dataset.SetMetadataItem('time_coverage_end', manifest_data['time_coverage_end']) platform_name = manifest_data['platform_family_name'] + manifest_data[ 'platform_number'] self.dataset.SetMetadataItem( 'platform', json.dumps(pti.get_gcmd_platform(platform_name))) self.dataset.SetMetadataItem( 'instrument', json.dumps(pti.get_gcmd_instrument('SAR'))) self.dataset.SetMetadataItem('entry_title', platform_name + ' SAR') self.dataset.SetMetadataItem( 'data_center', json.dumps(pti.get_gcmd_provider('ESA/EO'))) self.dataset.SetMetadataItem( 'iso_topic_category', json.dumps(pti.get_iso19115_topic_category('Oceans'))) self.dataset.SetMetadataItem('summary', platform_name + ' SAR data') self.dataset.FlushCache()
def __init__(self, filename, gdal_dataset, gdal_metadata, date=None, ds=None, bands=None, cachedir=None, *args, **kwargs): self.test_mapper(filename) timestamp = date if date else self.get_date(filename) ds = Dataset(filename) self.create_vrt(filename, gdal_dataset, gdal_metadata, timestamp, ds, bands, cachedir) self.dataset.SetMetadataItem('entry_title', str(ds.getncattr('title'))) self.dataset.SetMetadataItem('data_center', json.dumps(pti.get_gcmd_provider('UK/MOD/MET'))) self.dataset.SetMetadataItem('ISO_topic_category', pti.get_iso19115_topic_category('oceans')['iso_topic_category']) self.dataset.SetMetadataItem('gcmd_location', json.dumps(pti.get_gcmd_location('sea surface'))) #mm = pti.get_gcmd_instrument('amsr-e') #ee = pti.get_gcmd_platform('aqua') #self.dataset.SetMetadataItem('instrument', json.dumps(mm)) #self.dataset.SetMetadataItem('platform', json.dumps(ee)) self.dataset.SetMetadataItem('platform/instrument', json.dumps(self.get_platform_and_instrument_list(ds)))
def __init__(self, filename, gdal_dataset, gdal_metadata, date=None, ds=None, bands=None, cachedir=None, *args, **kwargs): self.test_mapper(filename) timestamp = date if date else self.get_date(filename) self.create_vrt(filename, gdal_dataset, gdal_metadata, timestamp, ds, bands, cachedir) mditem = 'entry_title' if not self.dataset.GetMetadataItem(mditem): try: self.dataset.SetMetadataItem(mditem, str(self.ds.getncattr('title'))) except AttributeError: self.dataset.SetMetadataItem(mditem, filename) mditem = 'data_center' if not self.dataset.GetMetadataItem(mditem): self.dataset.SetMetadataItem( 'data_center', json.dumps(pti.get_gcmd_provider('NO/MET'))) mditem = 'ISO_topic_category' if not self.dataset.GetMetadataItem(mditem): self.dataset.SetMetadataItem( mditem, pti.get_iso19115_topic_category( 'Imagery/Base Maps/Earth Cover')['iso_topic_category']) mm = pti.get_gcmd_instrument('multi-spectral') ee = pti.get_gcmd_platform('sentinel-2') self.dataset.SetMetadataItem('instrument', json.dumps(mm)) self.dataset.SetMetadataItem('platform', json.dumps(ee))
def get_or_create(self, uri, n_points=10, uri_filter_args=None, uri_service_name=FILE_SERVICE_NAME, uri_service_type=LOCAL_FILE_SERVICE, *args, **kwargs): """ Create dataset and corresponding metadata Parameters: ---------- uri : str URI to file or stream openable by Nansat n_points : int Number of border points (default is 10) uri_filter_args : dict Extra DatasetURI filter arguments if several datasets can refer to the same URI uri_service_name : str name of the service which is used ('dapService', 'fileService', 'http' or 'wms') uri_service_type : str type of the service which is used ('OPENDAP', 'local', 'HTTPServer' or 'WMS') Returns: ------- dataset and flag """ if not uri_filter_args: uri_filter_args = {} # Validate uri - this should raise an exception if the uri doesn't point to a valid # file or stream validate_uri(uri) # Several datasets can refer to the same uri (e.g., scatterometers and svp drifters), so we # need to pass uri_filter_args uris = DatasetURI.objects.filter(uri=uri, **uri_filter_args) if len(uris) > 0: return uris[0].dataset, False # Open file with Nansat n = Nansat(nansat_filename(uri), **kwargs) # get metadata from Nansat and get objects from vocabularies n_metadata = n.get_metadata() entry_id = n_metadata.get('entry_id', None) # set compulsory metadata (source) platform, _ = Platform.objects.get_or_create( json.loads(n_metadata['platform'])) instrument, _ = Instrument.objects.get_or_create( json.loads(n_metadata['instrument'])) specs = n_metadata.get('specs', '') source, _ = Source.objects.get_or_create(platform=platform, instrument=instrument, specs=specs) default_char_fields = { # Adding NERSC_ in front of the id violates the string representation of the uuid #'entry_id': lambda: 'NERSC_' + str(uuid.uuid4()), 'entry_id': lambda: str(uuid.uuid4()), 'entry_title': lambda: 'NONE', 'summary': lambda: 'NONE', } # set optional CharField metadata from Nansat or from default_char_fields options = {} try: existing_ds = Dataset.objects.get(entry_id=entry_id) except Dataset.DoesNotExist: existing_ds = None for name in default_char_fields: if name not in n_metadata: warnings.warn('%s is not provided in Nansat metadata!' % name) # prevent overwriting of existing values by defaults if existing_ds: options[name] = existing_ds.__getattribute__(name) else: options[name] = default_char_fields[name]() else: options[name] = n_metadata[name] default_foreign_keys = { 'gcmd_location': { 'model': Location, 'value': pti.get_gcmd_location('SEA SURFACE') }, 'data_center': { 'model': DataCenter, 'value': pti.get_gcmd_provider('NERSC') }, 'ISO_topic_category': { 'model': ISOTopicCategory, 'value': pti.get_iso19115_topic_category('Oceans') }, } # set optional ForeignKey metadata from Nansat or from default_foreign_keys for name in default_foreign_keys: value = default_foreign_keys[name]['value'] model = default_foreign_keys[name]['model'] if name not in n_metadata: warnings.warn('%s is not provided in Nansat metadata!' % name) else: try: value = json.loads(n_metadata[name]) except: warnings.warn( '%s value of %s metadata provided in Nansat is wrong!' % (n_metadata[name], name)) if existing_ds: options[name] = existing_ds.__getattribute__(name) else: options[name], _ = model.objects.get_or_create(value) # Find coverage to set number of points in the geolocation if len(n.vrt.dataset.GetGCPs()) > 0: n.reproject_gcps() geolocation = GeographicLocation.objects.get_or_create( geometry=WKTReader().read(n.get_border_wkt(nPoints=n_points)))[0] # create dataset # - the get_or_create method should use get_or_create here as well, # or its name should be changed - see issue #127 ds, created = Dataset.objects.update_or_create( entry_id=options['entry_id'], defaults={ 'time_coverage_start': n.get_metadata('time_coverage_start'), 'time_coverage_end': n.get_metadata('time_coverage_end'), 'source': source, 'geographic_location': geolocation, 'gcmd_location': options["gcmd_location"], 'ISO_topic_category': options["ISO_topic_category"], "data_center": options["data_center"], 'entry_title': options["entry_title"], 'summary': options["summary"] }) # create parameter all_band_meta = n.bands() for band_id in range(1, len(all_band_meta) + 1): band_meta = all_band_meta[band_id] standard_name = band_meta.get('standard_name', None) short_name = band_meta.get('short_name', None) units = band_meta.get('units', None) if standard_name in ['latitude', 'longitude', None]: continue params = Parameter.objects.filter(standard_name=standard_name) if params.count() > 1 and short_name is not None: params = params.filter(short_name=short_name) if params.count() > 1 and units is not None: params = params.filter(units=units) if params.count() >= 1: ds.parameters.add(params[0]) # create dataset URI DatasetURI.objects.get_or_create(name=uri_service_name, service=uri_service_type, uri=uri, dataset=ds) return ds, created
def test_get_iso19115_topic_category(self): item = 'Oceans' self.assertIsInstance(pti.get_iso19115_topic_category(item), collections.OrderedDict)
def get_iso_topic_category(self, raw_metadata): """Get the ISO topic category from the raw metadata""" return pti.get_iso19115_topic_category('Oceans')
def _get_normalized_attributes(self, dataset_info, *args, **kwargs): """Gets dataset attributes using nansat""" normalized_attributes = {} n_points = int(kwargs.get('n_points', 10)) nansat_options = kwargs.get('nansat_options', {}) url_scheme = urlparse(dataset_info).scheme if not 'http' in url_scheme and not 'ftp' in url_scheme: normalized_attributes['geospaas_service_name'] = FILE_SERVICE_NAME normalized_attributes['geospaas_service'] = LOCAL_FILE_SERVICE elif 'http' in url_scheme and not 'ftp' in url_scheme: normalized_attributes['geospaas_service_name'] = DAP_SERVICE_NAME normalized_attributes['geospaas_service'] = OPENDAP_SERVICE elif 'ftp' in url_scheme: raise ValueError( f"Can't ingest '{dataset_info}': nansat can't open remote ftp files" ) # Open file with Nansat nansat_object = Nansat(nansat_filename(dataset_info), log_level=self.LOGGER.getEffectiveLevel(), **nansat_options) # get metadata from Nansat and get objects from vocabularies n_metadata = nansat_object.get_metadata() # set compulsory metadata (source) normalized_attributes['entry_title'] = n_metadata.get( 'entry_title', 'NONE') normalized_attributes['summary'] = n_metadata.get('summary', 'NONE') normalized_attributes['time_coverage_start'] = dateutil.parser.parse( n_metadata['time_coverage_start']).replace(tzinfo=tzutc()) normalized_attributes['time_coverage_end'] = dateutil.parser.parse( n_metadata['time_coverage_end']).replace(tzinfo=tzutc()) normalized_attributes['platform'] = json.loads(n_metadata['platform']) normalized_attributes['instrument'] = json.loads( n_metadata['instrument']) normalized_attributes['specs'] = n_metadata.get('specs', '') normalized_attributes['entry_id'] = n_metadata.get( 'entry_id', 'NERSC_' + str(uuid.uuid4())) # set optional ForeignKey metadata from Nansat or from defaults normalized_attributes['gcmd_location'] = n_metadata.get( 'gcmd_location', pti.get_gcmd_location('SEA SURFACE')) normalized_attributes['provider'] = pti.get_gcmd_provider( n_metadata.get('provider', 'NERSC')) normalized_attributes['iso_topic_category'] = n_metadata.get( 'ISO_topic_category', pti.get_iso19115_topic_category('Oceans')) # Find coverage to set number of points in the geolocation if nansat_object.vrt.dataset.GetGCPs(): nansat_object.reproject_gcps() normalized_attributes['location_geometry'] = GEOSGeometry( nansat_object.get_border_wkt(n_points=n_points), srid=4326) json_dumped_dataset_parameters = n_metadata.get( 'dataset_parameters', None) if json_dumped_dataset_parameters: json_loads_result = json.loads(json_dumped_dataset_parameters) if isinstance(json_loads_result, list): normalized_attributes['dataset_parameters'] = [ get_cf_or_wkv_standard_name(dataset_param) for dataset_param in json_loads_result ] else: raise TypeError( f"Can't ingest '{dataset_info}': the 'dataset_parameters' section of the " "metadata returned by nansat is not a JSON list") else: normalized_attributes['dataset_parameters'] = [] return normalized_attributes
def get_or_create(self, uri, n_points=10, uri_filter_args=None, *args, **kwargs): ''' Create dataset and corresponding metadata Parameters: ---------- uri : str URI to file or stream openable by Nansat n_points : int Number of border points (default is 10) uri_filter_args : dict Extra DatasetURI filter arguments if several datasets can refer to the same URI Returns: ------- dataset and flag ''' if not uri_filter_args: uri_filter_args = {} # Validate uri - this should raise an exception if the uri doesn't point to a valid # file or stream validate_uri(uri) # Several datasets can refer to the same uri (e.g., scatterometers and svp drifters), so we # need to pass uri_filter_args uris = DatasetURI.objects.filter(uri=uri, **uri_filter_args) if len(uris) > 0: return uris[0].dataset, False # Open file with Nansat n = Nansat(nansat_filename(uri), **kwargs) # get metadata from Nansat and get objects from vocabularies n_metadata = n.get_metadata() # set compulsory metadata (source) platform, _ = Platform.objects.get_or_create( json.loads(n_metadata['platform'])) instrument, _ = Instrument.objects.get_or_create( json.loads(n_metadata['instrument'])) specs = n_metadata.get('specs', '') source, _ = Source.objects.get_or_create(platform=platform, instrument=instrument, specs=specs) default_char_fields = { 'entry_id': lambda: 'NERSC_' + str(uuid.uuid4()), 'entry_title': lambda: 'NONE', 'summary': lambda: 'NONE', } # set optional CharField metadata from Nansat or from default_char_fields options = {} for name in default_char_fields: if name not in n_metadata: warnings.warn('%s is not provided in Nansat metadata!' % name) options[name] = default_char_fields[name]() else: options[name] = n_metadata[name] default_foreign_keys = { 'gcmd_location': { 'model': Location, 'value': pti.get_gcmd_location('SEA SURFACE') }, 'data_center': { 'model': DataCenter, 'value': pti.get_gcmd_provider('NERSC') }, 'ISO_topic_category': { 'model': ISOTopicCategory, 'value': pti.get_iso19115_topic_category('Oceans') }, } # set optional ForeignKey metadata from Nansat or from default_foreign_keys for name in default_foreign_keys: value = default_foreign_keys[name]['value'] model = default_foreign_keys[name]['model'] if name not in n_metadata: warnings.warn('%s is not provided in Nansat metadata!' % name) else: try: value = json.loads(n_metadata[name]) except: warnings.warn( '%s value of %s metadata provided in Nansat is wrong!' % (n_metadata[name], name)) options[name], _ = model.objects.get_or_create(value) # Find coverage to set number of points in the geolocation if len(n.vrt.dataset.GetGCPs()) > 0: n.reproject_gcps() geolocation = GeographicLocation.objects.get_or_create( geometry=WKTReader().read(n.get_border_wkt(nPoints=n_points)))[0] # create dataset ds, created = Dataset.objects.get_or_create( time_coverage_start=n.get_metadata('time_coverage_start'), time_coverage_end=n.get_metadata('time_coverage_end'), source=source, geographic_location=geolocation, **options) # create dataset URI ds_uri, _ = DatasetURI.objects.get_or_create(uri=uri, dataset=ds) return ds, created
def test_get_iso19115_topic_category(self): item = 'Oceans' self.assertIsInstance(pti.get_iso19115_topic_category(item), collections.OrderedDict)