def test_includes_provided_regridded_subsetted_ext(self):
        url = 'https://example.com/fake-path/abc.123.nc/?query=true'
        ext = 'zarr'

        # Basic cases
        variables = []
        self.assertEqual(util.generate_output_filename(url, ext),
                         'abc.123.zarr')
        self.assertEqual(
            util.generate_output_filename(url, ext, is_subsetted=True),
            'abc.123_subsetted.zarr')
        self.assertEqual(
            util.generate_output_filename(url, ext, is_regridded=True),
            'abc.123_regridded.zarr')
        self.assertEqual(
            util.generate_output_filename(url,
                                          ext,
                                          is_subsetted=True,
                                          is_regridded=True),
            'abc.123_regridded_subsetted.zarr')
        self.assertEqual(
            util.generate_output_filename(url,
                                          ext,
                                          variable_subset=variables,
                                          is_subsetted=True,
                                          is_regridded=True),
            'abc.123_regridded_subsetted.zarr')
    def test_avoids_overwriting_single_suffixes(self):
        ext = 'zarr'

        # URL already containing a suffix
        variables = ['VarA']
        url = 'https://example.com/fake-path/abc.123_regridded.zarr'
        self.assertEqual(
            util.generate_output_filename(url, ext, is_subsetted=True),
            'abc.123_regridded_subsetted.zarr')
        self.assertEqual(
            util.generate_output_filename(url,
                                          ext,
                                          variable_subset=variables,
                                          is_subsetted=True,
                                          is_regridded=True),
            'abc.123_VarA_regridded_subsetted.zarr')
 def test_allows_variable_objects(self):
     ext = 'zarr'
     # URL already containing all suffixes
     variables = [Variable({'name': 'VarA'})]
     url = 'https://example.com/fake-path/abc.123.zarr'
     self.assertEqual(
         util.generate_output_filename(url, ext, variable_subset=variables),
         'abc.123_VarA.zarr')
    def test_excludes_multiple_variable(self):
        url = 'https://example.com/fake-path/abc.123.nc/?query=true'
        ext = 'zarr'

        # Multiple variable cases (no variable name in suffix)
        variables = ['VarA', 'VarB']
        self.assertEqual(
            util.generate_output_filename(url,
                                          ext,
                                          is_subsetted=True,
                                          is_regridded=True),
            'abc.123_regridded_subsetted.zarr')
        self.assertEqual(
            util.generate_output_filename(url,
                                          ext,
                                          variable_subset=variables,
                                          is_subsetted=True,
                                          is_regridded=True),
            'abc.123_regridded_subsetted.zarr')
    def process_item(self, item, source):
        """
        Converts an input STAC Item's data into Zarr, returning an output STAC item

        Parameters
        ----------
        item : pystac.Item
            the item that should be converted
        source : harmony.message.Source
            the input source defining the variables, if any, to subset from the item

        Returns
        -------
        pystac.Item
            a STAC item containing the Zarr output
        """
        result = item.clone()
        result.assets = {}

        # Create a temporary dir for processing we may do
        workdir = mkdtemp()
        try:
            # Get the data file
            asset = next(v for k, v in item.assets.items()
                         if 'data' in (v.roles or []))
            input_filename = download(asset.href,
                                      workdir,
                                      logger=self.logger,
                                      access_token=self.message.accessToken,
                                      cfg=self.config)

            name = generate_output_filename(asset.href, ext='.zarr')
            root = self.message.stagingLocation + name

            try:
                store = self.s3.get_mapper(root=root, check=False, create=True)
                netcdf_to_zarr(input_filename, store)
            except Exception as e:
                # Print the real error and convert to user-facing error that's more digestible
                self.logger.error(e, exc_info=1)
                filename = asset.href.split('?')[0].rstrip('/').split('/')[-1]
                raise ZarrException('Could not convert file to Zarr: %s' %
                                    (filename))

            # Update the STAC record
            result.assets['data'] = Asset(root,
                                          title=name,
                                          media_type='application/x-zarr',
                                          roles=['data'])

            # Return the STAC record
            return result
        finally:
            # Clean up any intermediate resources
            shutil.rmtree(workdir)
    def test_includes_single_variable_name_replacing_slashes(self):
        url = 'https://example.com/fake-path/abc.123.nc/?query=true'
        ext = 'zarr'

        # Variable name contains full path with '/' ('/' replaced with '_')
        variables = ['/path/to/VarB']
        self.assertEqual(
            util.generate_output_filename(url,
                                          ext,
                                          variable_subset=variables,
                                          is_subsetted=True,
                                          is_regridded=True),
            'abc.123__path_to_VarB_regridded_subsetted.zarr')
    def test_includes_single_variable(self):
        url = 'https://example.com/fake-path/abc.123.nc/?query=true'
        ext = 'zarr'

        # Single variable cases
        variables = ['VarA']
        self.assertEqual(util.generate_output_filename(url, ext),
                         'abc.123.zarr')
        self.assertEqual(
            util.generate_output_filename(url,
                                          ext,
                                          is_subsetted=True,
                                          is_regridded=True),
            'abc.123_regridded_subsetted.zarr')
        self.assertEqual(
            util.generate_output_filename(url, ext, variable_subset=variables),
            'abc.123_VarA.zarr')
        self.assertEqual(
            util.generate_output_filename(url,
                                          ext,
                                          variable_subset=variables,
                                          is_subsetted=True,
                                          is_regridded=True),
            'abc.123_VarA_regridded_subsetted.zarr')
Beispiel #8
0
    def process_item(self, item, source):
        """
        Processes a single input item.  Services that are not aggregating multiple input files
        should prefer to implement this method rather than #invoke

        This example copies its input to the output, marking "dpi" and "variables" message
        attributes as having been processed

        Parameters
        ----------
        item : pystac.Item
            the item that should be processed
        source : harmony.message.Source
            the input source defining the variables, if any, to subset from the item

        Returns
        -------
        pystac.Item
            a STAC catalog whose metadata and assets describe the service output
        """
        result = item.clone()
        result.assets = {}

        # Create a temporary dir for processing we may do
        workdir = mkdtemp()
        try:
            # Get the data file
            asset = next(v for k, v in item.assets.items()
                         if 'data' in (v.roles or []))
            input_filename = download(asset.href,
                                      workdir,
                                      logger=self.logger,
                                      access_token=self.message.accessToken)

            # Mark any fields the service processes so later services do not repeat work
            dpi = self.message.format.process('dpi')
            # Variable subsetting
            variables = source.process('variables')

            # Do the work here!
            var_names = [v.name for v in variables]
            print('Processing item %s, DPI=%d, vars=[%s]' %
                  (item.id, dpi, ', '.join(var_names)))
            working_filename = os.path.join(workdir, 'tmp.txt')
            shutil.copyfile(input_filename, working_filename)

            # Stage the output file with a conventional filename
            output_filename = generate_output_filename(asset.href,
                                                       ext=None,
                                                       variable_subset=None,
                                                       is_regridded=False,
                                                       is_subsetted=False)
            url = stage(working_filename,
                        output_filename,
                        'text/plain',
                        location=self.message.stagingLocation,
                        logger=self.logger)

            # Update the STAC record
            result.assets['data'] = Asset(url,
                                          title=output_filename,
                                          media_type='text/plain',
                                          roles=['data'])
            # Other metadata updates may be appropriate, such as result.bbox and result.geometry
            # if a spatial subset was performed

            # Return the STAC record
            return result
        finally:
            # Clean up any intermediate resources
            shutil.rmtree(workdir)