def read_data(self, tile_specifications, file_path, output_tile): with Dataset(file_path) as ds: for section_spec, dimtoslice in tile_specifications: tile = nexusproto.TimeSeriesTile() instance_dimension = next( iter([dim for dim in ds[self.variable_to_read].dimensions if dim != self.time])) tile.latitude.CopyFrom( to_shaped_array(numpy.ma.filled(ds[self.latitude][dimtoslice[instance_dimension]], numpy.NaN))) tile.longitude.CopyFrom( to_shaped_array(numpy.ma.filled(ds[self.longitude][dimtoslice[instance_dimension]], numpy.NaN))) # Before we read the data we need to make sure the dimensions are in the proper order so we don't # have any indexing issues ordered_slices = get_ordered_slices(ds, self.variable_to_read, dimtoslice) # Read data using the ordered slices, replacing masked values with NaN data_array = numpy.ma.filled(ds[self.variable_to_read][tuple(ordered_slices.values())], numpy.NaN) tile.variable_data.CopyFrom(to_shaped_array(data_array)) if self.metadata is not None: tile.meta_data.add().CopyFrom( to_metadata(self.metadata, ds[self.metadata][tuple(ordered_slices.values())])) tile.time.CopyFrom( to_shaped_array(numpy.ma.filled(ds[self.time][dimtoslice[self.time]], numpy.NaN))) output_tile.tile.time_series_tile.CopyFrom(tile) yield output_tile
def test_run_chain_partial_empty(self): processor_list = [{ 'name': 'GridReadingProcessor', 'config': { 'latitude': 'lat', 'longitude': 'lon', 'time': 'time', 'variable_to_read': 'analysed_sst' } }, { 'name': 'EmptyTileFilter', 'config': {} }, { 'name': 'KelvinToCelsius', 'config': {} }, { 'name': 'TileSummarizingProcessor', 'config': {} }] processorchain = ProcessorChain(processor_list) test_file = path.join(path.dirname(__file__), 'datafiles', 'partial_empty_mur.nc4') input_tile = nexusproto.NexusTile() tile_summary = nexusproto.TileSummary() tile_summary.granule = "file:%s" % test_file tile_summary.section_spec = "time:0:1,lat:489:499,lon:0:10" input_tile.summary.CopyFrom(tile_summary) results = list(processorchain.process(input_tile)) self.assertEqual(1, len(results)) tile = results[0] self.assertTrue(tile.summary.HasField('bbox'), "bbox is missing") input_tile = nexusproto.NexusTile() tile_summary = nexusproto.TileSummary() tile_summary.granule = "file:%s" % test_file tile_summary.section_spec = "time:0:1,lat:0:10,lon:0:10" input_tile.summary.CopyFrom(tile_summary) results = list(processorchain.process(input_tile)) self.assertEqual(0, len(results))
def test_read_not_empty_mur(self): test_file = path.join(path.dirname(__file__), 'datafiles', 'not_empty_mur.nc4') input_tile = nexusproto.NexusTile() tile_summary = nexusproto.TileSummary() tile_summary.granule = "file:%s" % test_file tile_summary.section_spec = "time:0:1,lat:0:10,lon:0:10" input_tile.summary.CopyFrom(tile_summary) results = list(self.module.process(input_tile)) self.assertEqual(1, len(results)) tile1_data = np.ma.masked_invalid( from_shaped_array(results[0].tile.grid_tile.variable_data)) self.assertEqual((1, 10, 10), tile1_data.shape) self.assertEqual(100, np.ma.count(tile1_data))
def process_nexus_tile(self, nexus_tile): the_tile_type = nexus_tile.tile.WhichOneof("tile_type") the_tile_data = getattr(nexus_tile.tile, the_tile_type) latitudes = numpy.ma.masked_invalid(from_shaped_array(the_tile_data.latitude)) longitudes = numpy.ma.masked_invalid(from_shaped_array(the_tile_data.longitude)) data = from_shaped_array(the_tile_data.variable_data) if nexus_tile.HasField("summary"): tilesummary = nexus_tile.summary else: tilesummary = nexusproto.TileSummary() tilesummary.bbox.lat_min = numpy.nanmin(latitudes).item() tilesummary.bbox.lat_max = numpy.nanmax(latitudes).item() tilesummary.bbox.lon_min = numpy.nanmin(longitudes).item() tilesummary.bbox.lon_max = numpy.nanmax(longitudes).item() tilesummary.stats.min = numpy.nanmin(data).item() tilesummary.stats.max = numpy.nanmax(data).item() # In order to accurately calculate the average we need to weight the data based on the cosine of its latitude # This is handled slightly differently for swath vs. grid data if the_tile_type == 'swath_tile': # For Swath tiles, len(data) == len(latitudes) == len(longitudes). So we can simply weight each element in the # data array tilesummary.stats.mean = numpy.ma.average(numpy.ma.masked_invalid(data), weights=numpy.cos(numpy.radians(latitudes))).item() elif the_tile_type == 'grid_tile': # Grid tiles need to repeat the weight for every longitude # TODO This assumes data axis' are ordered as latitude x longitude tilesummary.stats.mean = numpy.ma.average(numpy.ma.masked_invalid(data).flatten(), weights=numpy.cos( numpy.radians( numpy.repeat(latitudes, len(longitudes))))).item() else: # Default to simple average with no weighting tilesummary.stats.mean = numpy.nanmean(data).item() tilesummary.stats.count = data.size - numpy.count_nonzero(numpy.isnan(data)) try: min_time, max_time = find_time_min_max(the_tile_data) tilesummary.stats.min_time = min_time tilesummary.stats.max_time = max_time except NoTimeException: pass try: tilesummary.data_var_name = self.stored_var_name except TypeError: pass nexus_tile.summary.CopyFrom(tilesummary) yield nexus_tile
def test_run_chain_promote_var(self): processor_list = [{ 'name': 'GridReadingProcessor', 'config': { 'latitude': 'lat', 'longitude': 'lon', 'time': 'time', 'variable_to_read': 'analysed_sst' } }, { 'name': 'EmptyTileFilter', 'config': {} }, { 'name': 'KelvinToCelsius', 'config': {} }, { 'name': 'PromoteVariableToGlobalAttribute', 'config': { 'attribute_name': 'time_i', 'variable_name': 'time', 'dimensioned_by.0': 'time' } }, { 'name': 'TileSummarizingProcessor', 'config': {} }] processorchain = ProcessorChain(processor_list) test_file = path.join(path.dirname(__file__), 'datafiles', 'partial_empty_mur.nc4') input_tile = nexusproto.NexusTile() tile_summary = nexusproto.TileSummary() tile_summary.granule = "file:%s" % test_file tile_summary.section_spec = "time:0:1,lat:489:499,lon:0:10" input_tile.summary.CopyFrom(tile_summary) results = list(processorchain.process(input_tile)) self.assertEqual(1, len(results)) tile = results[0] self.assertEqual("1104483600", tile.summary.global_attributes[0].values[0])
def test_read_not_empty_wswm(self): test_file = path.join(path.dirname(__file__), 'datafiles', 'not_empty_wswm.nc') wswm_reader = sdap.processors.TimeSeriesReadingProcessor('Qout', 'lat', 'lon', 'time') input_tile = nexusproto.NexusTile() tile_summary = nexusproto.TileSummary() tile_summary.granule = "file:%s" % test_file tile_summary.section_spec = "time:0:5832,rivid:0:1" input_tile.summary.CopyFrom(tile_summary) results = list(wswm_reader.process(input_tile)) self.assertEqual(1, len(results)) for nexus_tile in results: self.assertTrue(nexus_tile.HasField('tile')) self.assertTrue(nexus_tile.tile.HasField('time_series_tile')) tile = nexus_tile.tile.time_series_tile self.assertEqual(1, from_shaped_array(tile.latitude).size) self.assertEqual(1, from_shaped_array(tile.longitude).size) self.assertEqual((5832, 1), from_shaped_array(tile.variable_data).shape) tile1_data = np.ma.masked_invalid(from_shaped_array(results[0].tile.time_series_tile.variable_data)) self.assertEqual(5832, np.ma.count(tile1_data)) self.assertAlmostEqual(45.837, np.ma.min( np.ma.masked_invalid(from_shaped_array(results[0].tile.time_series_tile.latitude))), places=3) self.assertAlmostEqual(-122.789, np.ma.max( np.ma.masked_invalid(from_shaped_array(results[0].tile.time_series_tile.longitude))), places=3) tile1_times = from_shaped_array(results[0].tile.time_series_tile.time) self.assertEqual(852098400, tile1_times[0]) self.assertEqual(915073200, tile1_times[-1]) self.assertAlmostEqual(1.473, np.ma.masked_invalid(from_shaped_array(results[0].tile.time_series_tile.variable_data))[ 0, 0], places=3)
def test_read_not_empty_smap(self): test_file = path.join(path.dirname(__file__), 'datafiles', 'not_empty_smap.h5') swath_reader = sdap.processors.SwathReadingProcessor('smap_sss', 'lat', 'lon', time='row_time', glblattr_day='REV_START_TIME', glblattr_day_format='%Y-%jT%H:%M:%S.%f') input_tile = nexusproto.NexusTile() tile_summary = nexusproto.TileSummary() tile_summary.granule = "file:%s" % test_file tile_summary.section_spec = "phony_dim_0:0:76,phony_dim_1:0:1" input_tile.summary.CopyFrom(tile_summary) results = list(swath_reader.process(input_tile)) self.assertEqual(1, len(results)) # with open('./smap_nonempty_nexustile.bin', 'w') as f: # f.write(results[0]) for nexus_tile in results: self.assertTrue(nexus_tile.HasField('tile')) self.assertTrue(nexus_tile.tile.HasField('swath_tile')) self.assertEqual(0, len(nexus_tile.tile.swath_tile.meta_data)) tile = nexus_tile.tile.swath_tile self.assertEqual(76, from_shaped_array(tile.latitude).size) self.assertEqual(76, from_shaped_array(tile.longitude).size) tile1_data = np.ma.masked_invalid(from_shaped_array(results[0].tile.swath_tile.variable_data)) self.assertEqual((76, 1), tile1_data.shape) self.assertEqual(43, np.ma.count(tile1_data)) self.assertAlmostEqual(-50.056, np.ma.min(np.ma.masked_invalid(from_shaped_array(results[0].tile.swath_tile.latitude))), places=3) self.assertAlmostEqual(-47.949, np.ma.max(np.ma.masked_invalid(from_shaped_array(results[0].tile.swath_tile.latitude))), places=3) self.assertEqual(1427820162, np.ma.masked_invalid(from_shaped_array(results[0].tile.swath_tile.time))[0])
def process_nexus_tile(self, input_tile): tile_specifications, file_path = parse_input(input_tile, self.temp_dir) output_tile = nexusproto.NexusTile() output_tile.CopyFrom(input_tile) for tile in self.read_data(tile_specifications, file_path, output_tile): yield tile # If temp dir is defined, delete the temporary file if self.temp_dir is not None: remove(file_path)
def to_shaped_array(data_array): shaped_array = nexusproto.ShapedArray() shaped_array.shape.extend([dimension_size for dimension_size in data_array.shape]) shaped_array.dtype = str(data_array.dtype) memfile = StringIO() numpy.save(memfile, data_array) shaped_array.array_data = memfile.getvalue() memfile.close() return shaped_array
def read_data(self, tile_specifications, file_path, output_tile): with xr.decode_cf(xr.open_dataset(file_path, decode_cf=False), decode_times=False) as ds: for section_spec, dimtoslice in tile_specifications: tile = nexusproto.SwathTile() # Time Lat Long Data and metadata should all be indexed by the same dimensions, order the incoming spec once using the data variable ordered_slices = get_ordered_slices(ds, self.variable_to_read, dimtoslice) tile.latitude.CopyFrom( to_shaped_array( numpy.ma.filled( ds[self.latitude].data[tuple( ordered_slices.values())], numpy.NaN))) tile.longitude.CopyFrom( to_shaped_array( numpy.ma.filled( ds[self.longitude].data[tuple( ordered_slices.values())], numpy.NaN))) timetile = ds[self.time][tuple([ ordered_slices[time_dim] for time_dim in ds[self.time].dims ])].astype('float64', casting='same_kind', copy=False) timeunits = ds[self.time].attrs['units'] try: start_of_day_date = datetime.datetime.strptime( ds.attrs[self.start_of_day], self.start_of_day_pattern) except Exception: start_of_day_date = None for index in numpy.ndindex(timetile.shape): timetile[index] = to_seconds_from_epoch( timetile[index].item(), timeunits=timeunits, start_day=start_of_day_date, timeoffset=self.time_offset) tile.time.CopyFrom(to_shaped_array(timetile)) # Read the data converting masked values to NaN data_array = numpy.ma.filled( ds[self.variable_to_read].data[tuple( ordered_slices.values())], numpy.NaN) tile.variable_data.CopyFrom(to_shaped_array(data_array)) if self.metadata is not None: tile.meta_data.add().CopyFrom( to_metadata( self.metadata, ds[self.metadata].data[tuple( ordered_slices.values())])) output_tile.tile.swath_tile.CopyFrom(tile) yield output_tile
def process(self, tile, dataset, *args, **kwargs): tile_type = tile.tile.WhichOneof("tile_type") tile_data = getattr(tile.tile, tile_type) latitudes = numpy.ma.masked_invalid( from_shaped_array(tile_data.latitude)) longitudes = numpy.ma.masked_invalid( from_shaped_array(tile_data.longitude)) data = from_shaped_array(tile_data.variable_data) tile_summary = tile.summary if tile.HasField( "summary") else nexusproto.TileSummary() tile_summary.dataset_name = self._dataset_name tile_summary.bbox.lat_min = numpy.nanmin(latitudes).item() tile_summary.bbox.lat_max = numpy.nanmax(latitudes).item() tile_summary.bbox.lon_min = numpy.nanmin(longitudes).item() tile_summary.bbox.lon_max = numpy.nanmax(longitudes).item() tile_summary.stats.min = numpy.nanmin(data).item() tile_summary.stats.max = numpy.nanmax(data).item() tile_summary.stats.count = data.size - numpy.count_nonzero( numpy.isnan(data)) # In order to accurately calculate the average we need to weight the data based on the cosine of its latitude # This is handled slightly differently for swath vs. grid data if tile_type == 'swath_tile': # For Swath tiles, len(data) == len(latitudes) == len(longitudes). # So we can simply weight each element in the data array tile_summary.stats.mean = type(self).calculate_mean_for_swath_tile( data, latitudes) elif tile_type == 'grid_tile': # Grid tiles need to repeat the weight for every longitude # TODO This assumes data axis' are ordered as latitude x longitude tile_summary.stats.mean = type(self).calculate_mean_for_grid_tile( data, latitudes, longitudes) else: # Default to simple average with no weighting tile_summary.stats.mean = numpy.nanmean(data).item() try: min_time, max_time = find_time_min_max(tile_data) tile_summary.stats.min_time = min_time tile_summary.stats.max_time = max_time except NoTimeException: pass standard_name = dataset.variables[ tile_summary.data_var_name].attrs.get('standard_name') if standard_name: tile_summary.standard_name = standard_name tile.summary.CopyFrom(tile_summary) return tile
def test_read_not_empty_avhrr(self): test_file = path.join(path.dirname(__file__), 'datafiles', 'not_empty_avhrr.nc4') avhrr_reader = sdap.processors.GridReadingProcessor('analysed_sst', 'lat', 'lon', time='time') input_tile = nexusproto.NexusTile() tile_summary = nexusproto.TileSummary() tile_summary.granule = "file:%s" % test_file tile_summary.section_spec = "time:0:1,lat:0:10,lon:0:10" input_tile.summary.CopyFrom(tile_summary) results = list(avhrr_reader.process(input_tile)) self.assertEqual(1, len(results)) for nexus_tile in results: self.assertTrue(nexus_tile.HasField('tile')) self.assertTrue(nexus_tile.tile.HasField('grid_tile')) tile = nexus_tile.tile.grid_tile self.assertEqual(10, from_shaped_array(tile.latitude).size) self.assertEqual(10, from_shaped_array(tile.longitude).size) self.assertEqual((1, 10, 10), from_shaped_array(tile.variable_data).shape) tile1_data = np.ma.masked_invalid(from_shaped_array(results[0].tile.grid_tile.variable_data)) self.assertEqual(100, np.ma.count(tile1_data)) self.assertAlmostEqual(-39.875, np.ma.min(np.ma.masked_invalid(from_shaped_array(results[0].tile.grid_tile.latitude))), places=3) self.assertAlmostEqual(-37.625, np.ma.max(np.ma.masked_invalid(from_shaped_array(results[0].tile.grid_tile.latitude))), places=3) self.assertEqual(1462060800, results[0].tile.grid_tile.time) self.assertAlmostEqual(289.71, np.ma.masked_invalid(from_shaped_array(results[0].tile.grid_tile.variable_data))[ 0, 0, 0], places=3)
def test_read_not_empty_ccmp(self): test_file = path.join(path.dirname(__file__), 'datafiles', 'not_empty_ccmp.nc') ccmp_reader = sdap.processors.GridReadingProcessor('uwnd', 'latitude', 'longitude', time='time', meta='vwnd') input_tile = nexusproto.NexusTile() tile_summary = nexusproto.TileSummary() tile_summary.granule = "file:%s" % test_file tile_summary.section_spec = "time:0:1,longitude:0:87,latitude:0:38" input_tile.summary.CopyFrom(tile_summary) results = list(ccmp_reader.process(input_tile)) self.assertEqual(1, len(results)) # with open('./ccmp_nonempty_nexustile.bin', 'w') as f: # f.write(results[0]) for nexus_tile in results: self.assertTrue(nexus_tile.HasField('tile')) self.assertTrue(nexus_tile.tile.HasField('grid_tile')) self.assertEqual(1, len(nexus_tile.tile.grid_tile.meta_data)) tile = nexus_tile.tile.grid_tile self.assertEqual(38, from_shaped_array(tile.latitude).size) self.assertEqual(87, from_shaped_array(tile.longitude).size) self.assertEqual((1, 38, 87), from_shaped_array(tile.variable_data).shape) tile1_data = np.ma.masked_invalid(from_shaped_array(results[0].tile.grid_tile.variable_data)) self.assertEqual(3306, np.ma.count(tile1_data)) self.assertAlmostEqual(-78.375, np.ma.min(np.ma.masked_invalid(from_shaped_array(results[0].tile.grid_tile.latitude))), places=3) self.assertAlmostEqual(-69.125, np.ma.max(np.ma.masked_invalid(from_shaped_array(results[0].tile.grid_tile.latitude))), places=3) self.assertEqual(1451606400, results[0].tile.grid_tile.time)
def test_build_solr_doc_no_standard_name(self): """ When TileSummary.standard_name isn't available, the solr field tile_var_name_s should use TileSummary.data_var_name """ tile = nexusproto.NexusTile() tile.summary.tile_id = 'test_id' tile.summary.data_var_name = 'test_variable' tile.tile.ecco_tile.depth = 10.5 metadata_store = SolrStore() solr_doc = metadata_store._build_solr_doc(tile) self.assertEqual('test_variable', solr_doc['tile_var_name_s'])
def test_process(self): processor = GenerateTileId() tile = nexusproto.NexusTile() tile.summary.granule = 'test_dir/test_granule.nc' tile.summary.data_var_name = 'test_variable' tile.summary.section_spec = 'i:0:90,j:0:90,k:8:9,nv:0:2,tile:4:5,time:8:9' expected_id = uuid.uuid3( uuid.NAMESPACE_DNS, 'test_granule.nc' + 'test_variable' + 'i:0:90,j:0:90,k:8:9,nv:0:2,tile:4:5,time:8:9') self.assertEqual(str(expected_id), processor.process(tile).summary.tile_id)
def process(self, tile, dataset: xr.Dataset, *args, **kwargs): try: dimensions_to_slices = self._convert_spec_to_slices( tile.summary.section_spec) output_tile = nexusproto.NexusTile() output_tile.CopyFrom(tile) output_tile.summary.data_var_name = self.variable return self._generate_tile(dataset, dimensions_to_slices, output_tile) except Exception as e: raise TileProcessingError( f"Could not generate tiles from the granule because of the following error: {e}." )
def test_read_not_empty_ascatb_meta(self): # with open('./ascat_longitude_more_than_180.bin', 'w') as f: # results = list(self.module.read_swath_data(None, # "NUMROWS:0:1,NUMCELLS:0:82;NUMROWS:1:2,NUMCELLS:0:82;file:///Users/greguska/Downloads/ascat_longitude_more_than_180.nc4")) # f.write(results[0]) test_file = path.join(path.dirname(__file__), 'datafiles', 'not_empty_ascatb.nc4') swath_reader = sdap.processors.SwathReadingProcessor('wind_speed', 'lat', 'lon', time='time', meta='wind_dir') input_tile = nexusproto.NexusTile() tile_summary = nexusproto.TileSummary() tile_summary.granule = "file:%s" % test_file tile_summary.section_spec = "NUMROWS:0:1,NUMCELLS:0:82" input_tile.summary.CopyFrom(tile_summary) results = list(swath_reader.process(input_tile)) self.assertEqual(1, len(results)) for nexus_tile in results: self.assertTrue(nexus_tile.HasField('tile')) self.assertTrue(nexus_tile.tile.HasField('swath_tile')) self.assertLess(0, len(nexus_tile.tile.swath_tile.meta_data)) self.assertEqual(1, len(results[0].tile.swath_tile.meta_data)) tile1_meta_data = np.ma.masked_invalid( from_shaped_array( results[0].tile.swath_tile.meta_data[0].meta_data)) self.assertEqual((1, 82), tile1_meta_data.shape) self.assertEqual(82, np.ma.count(tile1_meta_data))
def read_data(self, tile_specifications, file_path, output_tile): # Time is optional for Grid data time = self.environ['TIME'] with xr.decode_cf(xr.open_dataset(file_path, decode_cf=False), decode_times=False) as ds: for section_spec, dimtoslice in tile_specifications: tile = nexusproto.GridTile() tile.latitude.CopyFrom( to_shaped_array( numpy.ma.filled( ds[self.latitude].data[dimtoslice[self.y_dim]], numpy.NaN))) tile.longitude.CopyFrom( to_shaped_array( numpy.ma.filled( ds[self.longitude].data[dimtoslice[self.x_dim]], numpy.NaN))) # Before we read the data we need to make sure the dimensions are in the proper order so we don't have any # indexing issues ordered_slices = get_ordered_slices(ds, self.variable_to_read, dimtoslice) # Read data using the ordered slices, replacing masked values with NaN data_array = numpy.ma.filled( ds[self.variable_to_read].data[tuple( ordered_slices.values())], numpy.NaN) tile.variable_data.CopyFrom(to_shaped_array(data_array)) if self.metadata is not None: tile.meta_data.add().CopyFrom( to_metadata( self.metadata, ds[self.metadata].data[tuple( ordered_slices.values())])) if time is not None: timevar = ds[time] # Note assumption is that index of time is start value in dimtoslice tile.time = to_seconds_from_epoch( timevar.data[dimtoslice[time].start], timeunits=timevar.attrs['units'], timeoffset=self.time_offset) output_tile.tile.grid_tile.CopyFrom(tile) yield output_tile
def test_build_solr_doc(self): tile = nexusproto.NexusTile() tile.summary.tile_id = 'test_id' tile.summary.dataset_name = 'test_dataset' tile.summary.dataset_uuid = 'test_dataset_id' tile.summary.data_var_name = 'test_variable' tile.summary.granule = 'test_granule_path' tile.summary.section_spec = 'time:0:1,j:0:20,i:200:240' tile.summary.bbox.lat_min = -180.1 tile.summary.bbox.lat_max = 180.2 tile.summary.bbox.lon_min = -90.5 tile.summary.bbox.lon_max = 90.0 tile.summary.stats.min = -10.0 tile.summary.stats.max = 25.5 tile.summary.stats.mean = 12.5 tile.summary.stats.count = 100 tile.summary.stats.min_time = 694224000 tile.summary.stats.max_time = 694310400 tile.summary.standard_name = 'sea_surface_temperature' tile.tile.ecco_tile.depth = 10.5 metadata_store = SolrStore() solr_doc = metadata_store._build_solr_doc(tile) self.assertEqual('sea_surface_temp', solr_doc['table_s']) self.assertEqual( 'POLYGON((-90.500 -180.100, 90.000 -180.100, 90.000 180.200, -90.500 180.200, -90.500 -180.100))', solr_doc['geo']) self.assertEqual('test_id', solr_doc['id']) self.assertEqual('test_dataset!test_id', solr_doc['solr_id_s']) self.assertEqual('time:0:1,j:0:20,i:200:240', solr_doc['sectionSpec_s']) self.assertEqual('test_granule_path', solr_doc['granule_s']) self.assertEqual('sea_surface_temperature', solr_doc['tile_var_name_s']) self.assertAlmostEqual(-90.5, solr_doc['tile_min_lon']) self.assertAlmostEqual(90.0, solr_doc['tile_max_lon']) self.assertAlmostEqual(-180.1, solr_doc['tile_min_lat'], delta=1E-5) self.assertAlmostEqual(180.2, solr_doc['tile_max_lat'], delta=1E-5) self.assertEqual('1992-01-01T00:00:00Z', solr_doc['tile_min_time_dt']) self.assertEqual('1992-01-02T00:00:00Z', solr_doc['tile_max_time_dt']) self.assertAlmostEqual(-10.0, solr_doc['tile_min_val_d']) self.assertAlmostEqual(25.5, solr_doc['tile_max_val_d']) self.assertAlmostEqual(12.5, solr_doc['tile_avg_val_d']) self.assertEqual(100, solr_doc['tile_count_i']) self.assertAlmostEqual(10.5, solr_doc['tile_depth'])
def test_read_not_empty_wswm(self): reading_processor = TimeSeriesReadingProcessor('Qout', 'lat', 'lon', time='time') granule_path = path.join(path.dirname(__file__), '../granules/not_empty_wswm.nc') input_tile = nexusproto.NexusTile() input_tile.summary.granule = granule_path dimensions_to_slices = { 'time': slice(0, 5832), 'rivid': slice(0, 1), } with xr.open_dataset(granule_path) as ds: output_tile = reading_processor._generate_tile(ds, dimensions_to_slices, input_tile) self.assertEqual(granule_path, output_tile.summary.granule, granule_path) self.assertEqual([5832], output_tile.tile.time_series_tile.time.shape) self.assertEqual([5832, 1], output_tile.tile.time_series_tile.variable_data.shape) self.assertEqual([1], output_tile.tile.time_series_tile.latitude.shape) self.assertEqual([1], output_tile.tile.time_series_tile.longitude.shape)
def _generate_tile(self, ds: xr.Dataset, dimensions_to_slices: Dict[str, slice], input_tile): new_tile = nexusproto.GridTile() lat_subset = ds[self.latitude][type(self)._slices_for_variable( ds[self.latitude], dimensions_to_slices)] lon_subset = ds[self.longitude][type(self)._slices_for_variable( ds[self.longitude], dimensions_to_slices)] lat_subset = np.ma.filled(np.squeeze(lat_subset), np.NaN) lon_subset = np.ma.filled(np.squeeze(lon_subset), np.NaN) data_subset = ds[self.variable][type(self)._slices_for_variable( ds[self.variable], dimensions_to_slices)] data_subset = np.ma.filled(np.squeeze(data_subset), np.NaN) if self.depth: depth_dim, depth_slice = list( type(self)._slices_for_variable( ds[self.depth], dimensions_to_slices).items())[0] depth_slice_len = depth_slice.stop - depth_slice.start if depth_slice_len > 1: raise RuntimeError( "Depth slices must have length 1, but '{dim}' has length {dim_len}." .format(dim=depth_dim, dim_len=depth_slice_len)) new_tile.depth = ds[self.depth][depth_slice].item() if self.time: time_slice = dimensions_to_slices[self.time] time_slice_len = time_slice.stop - time_slice.start if time_slice_len > 1: raise RuntimeError( "Time slices must have length 1, but '{dim}' has length {dim_len}." .format(dim=self.time, dim_len=time_slice_len)) new_tile.time = int(ds[self.time][time_slice.start].item() / 1e9) new_tile.latitude.CopyFrom(to_shaped_array(lat_subset)) new_tile.longitude.CopyFrom(to_shaped_array(lon_subset)) new_tile.variable_data.CopyFrom(to_shaped_array(data_subset)) input_tile.tile.grid_tile.CopyFrom(new_tile) return input_tile
def read_tile(self): reading_processor = GridReadingProcessor('B03', 'lat', 'lon', time='time') granule_path = path.join( path.dirname(__file__), '../granules/HLS.S30.T11SPC.2020001.v1.4.hdf.nc') input_tile = nexusproto.NexusTile() input_tile.summary.granule = granule_path dimensions_to_slices = { 'time': slice(0, 1), 'lat': slice(0, 30), 'lon': slice(0, 30) } with xr.open_dataset(granule_path) as ds: return reading_processor._generate_tile(ds, dimensions_to_slices, input_tile)
def to_metadata(name, data_array): metadata = nexusproto.MetaData() metadata.name = name metadata.meta_data.CopyFrom(to_shaped_array(data_array)) return metadata