def test_includes_provided_regridded_subsetted_ext(self): url = 'https://example.com/fake-path/abc.123.nc/?query=true' ext = 'zarr' # Basic cases variables = [] self.assertEqual(util.generate_output_filename(url, ext), 'abc.123.zarr') self.assertEqual( util.generate_output_filename(url, ext, is_subsetted=True), 'abc.123_subsetted.zarr') self.assertEqual( util.generate_output_filename(url, ext, is_regridded=True), 'abc.123_regridded.zarr') self.assertEqual( util.generate_output_filename(url, ext, is_subsetted=True, is_regridded=True), 'abc.123_regridded_subsetted.zarr') self.assertEqual( util.generate_output_filename(url, ext, variable_subset=variables, is_subsetted=True, is_regridded=True), 'abc.123_regridded_subsetted.zarr')
def test_avoids_overwriting_single_suffixes(self): ext = 'zarr' # URL already containing a suffix variables = ['VarA'] url = 'https://example.com/fake-path/abc.123_regridded.zarr' self.assertEqual( util.generate_output_filename(url, ext, is_subsetted=True), 'abc.123_regridded_subsetted.zarr') self.assertEqual( util.generate_output_filename(url, ext, variable_subset=variables, is_subsetted=True, is_regridded=True), 'abc.123_VarA_regridded_subsetted.zarr')
def test_allows_variable_objects(self): ext = 'zarr' # URL already containing all suffixes variables = [Variable({'name': 'VarA'})] url = 'https://example.com/fake-path/abc.123.zarr' self.assertEqual( util.generate_output_filename(url, ext, variable_subset=variables), 'abc.123_VarA.zarr')
def test_excludes_multiple_variable(self): url = 'https://example.com/fake-path/abc.123.nc/?query=true' ext = 'zarr' # Multiple variable cases (no variable name in suffix) variables = ['VarA', 'VarB'] self.assertEqual( util.generate_output_filename(url, ext, is_subsetted=True, is_regridded=True), 'abc.123_regridded_subsetted.zarr') self.assertEqual( util.generate_output_filename(url, ext, variable_subset=variables, is_subsetted=True, is_regridded=True), 'abc.123_regridded_subsetted.zarr')
def process_item(self, item, source): """ Converts an input STAC Item's data into Zarr, returning an output STAC item Parameters ---------- item : pystac.Item the item that should be converted source : harmony.message.Source the input source defining the variables, if any, to subset from the item Returns ------- pystac.Item a STAC item containing the Zarr output """ result = item.clone() result.assets = {} # Create a temporary dir for processing we may do workdir = mkdtemp() try: # Get the data file asset = next(v for k, v in item.assets.items() if 'data' in (v.roles or [])) input_filename = download(asset.href, workdir, logger=self.logger, access_token=self.message.accessToken, cfg=self.config) name = generate_output_filename(asset.href, ext='.zarr') root = self.message.stagingLocation + name try: store = self.s3.get_mapper(root=root, check=False, create=True) netcdf_to_zarr(input_filename, store) except Exception as e: # Print the real error and convert to user-facing error that's more digestible self.logger.error(e, exc_info=1) filename = asset.href.split('?')[0].rstrip('/').split('/')[-1] raise ZarrException('Could not convert file to Zarr: %s' % (filename)) # Update the STAC record result.assets['data'] = Asset(root, title=name, media_type='application/x-zarr', roles=['data']) # Return the STAC record return result finally: # Clean up any intermediate resources shutil.rmtree(workdir)
def test_includes_single_variable_name_replacing_slashes(self): url = 'https://example.com/fake-path/abc.123.nc/?query=true' ext = 'zarr' # Variable name contains full path with '/' ('/' replaced with '_') variables = ['/path/to/VarB'] self.assertEqual( util.generate_output_filename(url, ext, variable_subset=variables, is_subsetted=True, is_regridded=True), 'abc.123__path_to_VarB_regridded_subsetted.zarr')
def test_includes_single_variable(self): url = 'https://example.com/fake-path/abc.123.nc/?query=true' ext = 'zarr' # Single variable cases variables = ['VarA'] self.assertEqual(util.generate_output_filename(url, ext), 'abc.123.zarr') self.assertEqual( util.generate_output_filename(url, ext, is_subsetted=True, is_regridded=True), 'abc.123_regridded_subsetted.zarr') self.assertEqual( util.generate_output_filename(url, ext, variable_subset=variables), 'abc.123_VarA.zarr') self.assertEqual( util.generate_output_filename(url, ext, variable_subset=variables, is_subsetted=True, is_regridded=True), 'abc.123_VarA_regridded_subsetted.zarr')
def process_item(self, item, source): """ Processes a single input item. Services that are not aggregating multiple input files should prefer to implement this method rather than #invoke This example copies its input to the output, marking "dpi" and "variables" message attributes as having been processed Parameters ---------- item : pystac.Item the item that should be processed source : harmony.message.Source the input source defining the variables, if any, to subset from the item Returns ------- pystac.Item a STAC catalog whose metadata and assets describe the service output """ result = item.clone() result.assets = {} # Create a temporary dir for processing we may do workdir = mkdtemp() try: # Get the data file asset = next(v for k, v in item.assets.items() if 'data' in (v.roles or [])) input_filename = download(asset.href, workdir, logger=self.logger, access_token=self.message.accessToken) # Mark any fields the service processes so later services do not repeat work dpi = self.message.format.process('dpi') # Variable subsetting variables = source.process('variables') # Do the work here! var_names = [v.name for v in variables] print('Processing item %s, DPI=%d, vars=[%s]' % (item.id, dpi, ', '.join(var_names))) working_filename = os.path.join(workdir, 'tmp.txt') shutil.copyfile(input_filename, working_filename) # Stage the output file with a conventional filename output_filename = generate_output_filename(asset.href, ext=None, variable_subset=None, is_regridded=False, is_subsetted=False) url = stage(working_filename, output_filename, 'text/plain', location=self.message.stagingLocation, logger=self.logger) # Update the STAC record result.assets['data'] = Asset(url, title=output_filename, media_type='text/plain', roles=['data']) # Other metadata updates may be appropriate, such as result.bbox and result.geometry # if a spatial subset was performed # Return the STAC record return result finally: # Clean up any intermediate resources shutil.rmtree(workdir)