コード例 #1
0
ファイル: ozone.py プロジェクト: ASVincent/swfo
def convert(indir, out_fname, compression, filter_opts):
    """
    Convert GA's ozone TIFF's to HDF5.
    The TIFF's will be converted into HDF5 IMAGE Datasets,
    and contained within a single HDF5 file.
    """
    # convert to Path object
    indir = Path(indir)

    # create empty or copy the user supplied filter options
    if not filter_opts:
        filter_opts = dict()
    else:
        filter_opts = filter_opts.copy()

    with h5py.File(str(out_fname), 'w') as fid:
        for fname in indir.glob('*.tif'):
            with rasterio.open(str(fname)) as rds:
                # the files have small dimensions, so store as a single chunk
                if 'chunks' not in filter_opts:
                    filter_opts['chunks'] = (rds.height, rds.width)

                attrs = {
                    'description':
                    'Ozone data compiled by Geoscience Australia',
                    'geotransform': rds.transform.to_gdal(),
                    'crs_wkt': rds.crs.wkt
                }

                # output
                dname = fname.stem
                write_h5_image(rds.read(1), dname, fid, compression, attrs,
                               filter_opts)
コード例 #2
0
    def test_write_h5_image_minmax(self):
        """
        Test the IMAGE_MINMAXRANGE attribute is correct.
        """
        minmax = numpy.array([self.image_data.min(), self.image_data.max()])

        fname = "test_write_h5_image_minmax.h5"
        with h5py.File(fname, "w", **self.memory_kwargs) as fid:
            hdf5.write_h5_image(self.image_data, "image", fid)

            test = fid["image"].attrs["IMAGE_MINMAXRANGE"]

            self.assertTrue((minmax == test).all())
コード例 #3
0
def convert(indir, out_h5: h5py.Group, compression, filter_opts):
    """
    Convert GA's ozone TIFF's to HDF5.
    The TIFF's will be converted into HDF5 IMAGE Datasets,
    and contained within a single HDF5 file.
    """
    # convert to Path object
    indir = Path(indir)
    dataset_names = []
    metadata = []

    # create empty or copy the user supplied filter options
    if not filter_opts:
        filter_opts = dict()
    else:
        filter_opts = filter_opts.copy()

    for fname in sorted(indir.glob("*.tif"), key=_month_sort):
        with rasterio.open(str(fname)) as rds:
            # the files have small dimensions, so store as a single chunk
            if "chunks" not in filter_opts:
                filter_opts["chunks"] = (rds.height, rds.width)

            attrs = {
                "description": "Ozone data compiled by Geoscience Australia",
                "geotransform": rds.transform.to_gdal(),
                "crs_wkt": rds.crs.wkt,
            }

            # output
            dname = fname.stem
            write_h5_image(rds.read(1), dname, out_h5, compression, attrs,
                           filter_opts)

            # src checksum; used to help derive fallback uuid
            with fname.open("rb") as src:
                src_checksum = generate_md5sum(src).hexdigest()

            dataset_names.append(dname)
            metadata.append({
                "id":
                str(
                    generate_fallback_uuid(PRODUCT_HREF,
                                           path=str(dname),
                                           md5=src_checksum))
            })

    return metadata, dataset_names
コード例 #4
0
 def test_write_h5_image(self):
     """
     Test the write_h5_image function.
     """
     data = self.image_data
     fname = "test_write_h5_image.h5"
     with h5py.File(fname, "w", **self.memory_kwargs) as fid:
         self.assertIsNone(hdf5.write_h5_image(data, "image", fid))
コード例 #5
0
    def test_write_h5_image_attributes(self):
        """
        Test the image attributes of the write_h5_image function.
        """
        attrs = {'CLASS': 'IMAGE',
                 'IMAGE_VERSION': '1.2',
                 'DISPLAY_ORIGIN': 'UL'}

        fname = 'test_write_h5_image_attributes.h5'
        with h5py.File(fname, **self.memory_kwargs) as fid:
            hdf5.write_h5_image(self.image_data, 'image', fid)
            test = {k: v for k, v in fid['image'].attrs.items()}

            # assertDictEqual can't compare a numpy array, so test elsewhere
            del test['IMAGE_MINMAXRANGE']

            self.assertDictEqual(test, attrs)
コード例 #6
0
    def save_as_h5_dataset(self, out_group, acq, product,
                           compression=H5CompressionFilter.LZF,
                           filter_opts=None):
        """
        Save the PQ result and attribute information in a HDF5
        `IMAGE` Class dataset.
        """
        if filter_opts is None:
            fopts = {}
        else:
            fopts = filter_opts.copy()

        fopts['chunks'] = acq.tile_size
        attrs = self.aux_data.copy()
        attrs['crs_wkt'] = self.geobox.crs.ExportToWkt()
        attrs['geotransform'] = self.geobox.transform.to_gdal()
        dname = DatasetName.PQ_FMT.value.format(product=product.value)
        write_h5_image(self.array, dname, out_group, compression, attrs, fopts)
コード例 #7
0
    def test_write_h5_image_attributes(self):
        """
        Test the image attributes of the write_h5_image function.
        """
        attrs = {
            "CLASS": "IMAGE",
            "IMAGE_VERSION": "1.2",
            "DISPLAY_ORIGIN": "UL"
        }

        fname = "test_write_h5_image_attributes.h5"
        with h5py.File(fname, "w", **self.memory_kwargs) as fid:
            hdf5.write_h5_image(self.image_data, "image", fid)
            test = {k: v for k, v in fid["image"].attrs.items()}

            # assertDictEqual can't compare a numpy array, so test elsewhere
            del test["IMAGE_MINMAXRANGE"]

            self.assertDictEqual(test, attrs)
コード例 #8
0
def _convert_2d(rds, fid, dataset_name, compression, filter_opts):
    """
    Private routine for converting the 2D GRIB file to HDF5.
    """
    attrs = {
        'geotransform': rds.transform.to_gdal(),
        'crs_wkt': rds.crs.wkt,
        'history': 'Converted to HDF5'
    }
    data = rds.read(1)
    write_h5_image(data, dataset_name, fid, compression, attrs, filter_opts)

    # add dimension labels, but should we also include dimension scales?
    dataset = fid[dataset_name]
    dataset.dims[0].label = 'Y'
    dataset.dims[1].label = 'X'

    # metadata
    metadata = metadata_dataframe(rds)
    write_dataframe(metadata, 'METADATA', fid, compression)
コード例 #9
0
def _convert_2d(rds, fid, dataset_name, compression, filter_opts):
    """
    Private routine for converting the 2D GRIB file to HDF5.
    """
    attrs = {
        "geotransform": rds.transform.to_gdal(),
        "crs_wkt": rds.crs.wkt,
        "history": "Converted to HDF5",
    }
    data = rds.read(1)
    write_h5_image(data, dataset_name, fid, compression, attrs, filter_opts)

    # add dimension labels, but should we also include dimension scales?
    dataset = fid[dataset_name]
    dataset.dims[0].label = "Y"
    dataset.dims[1].label = "X"

    # metadata
    metadata = metadata_dataframe(rds)
    write_dataframe(metadata, "METADATA", fid, compression)
コード例 #10
0
ファイル: brdf.py プロジェクト: ASVincent/wagl
    def convert_format(self,
                       dataset_name,
                       group,
                       attrs=None,
                       compression=H5CompressionFilter.LZF,
                       filter_opts=None):
        """
        Convert the HDF file to a HDF5 dataset.
        """
        if attrs is None:
            attrs = {}

        # Get the UL corner of the UL pixel co-ordinate
        ul_lon = self.ul[0]
        ul_lat = self.ul[1]

        # pixel size x & y
        pixsz_x = self.delta_lon
        pixsz_y = self.delta_lat

        # Setup the projection; assuming Geographics WGS84
        # (Tests have shown that this appears to be the case)
        # (unfortunately it is not expicitly defined in the HDF file)
        sr = osr.SpatialReference()
        sr.SetWellKnownGeogCS("WGS84")
        prj = sr.ExportToWkt()

        # Setup the geobox
        dims = self.data[0].shape
        res = (abs(pixsz_x), abs(pixsz_y))
        geobox = GriddedGeoBox(shape=dims,
                               origin=(ul_lon, ul_lat),
                               pixelsize=res,
                               crs=prj)

        # Write the dataset
        attrs['description'] = 'Converted BRDF data from H4 to H5.'
        attrs['crs_wkt'] = prj
        attrs['geotransform'] = geobox.transform.to_gdal()
        write_h5_image(self.data[0], dataset_name, group, compression, attrs,
                       filter_opts)
コード例 #11
0
    def test_write_h5_image_multiband(self):
        """
        Test the {BAND}_MINMAXRANGE attribute is correct.
        """
        band_names = ["ISO", "VOL", "GEO"]
        dtype = numpy.dtype([(bname, "int16") for bname in band_names])

        dataset = numpy.ndarray(shape=self.image_data.shape, dtype=dtype)
        for bname in band_names:
            dataset[bname] = self.image_data
        minmax = numpy.array([self.image_data.min(), self.image_data.max()])

        fname = "test_write_h5_multi.h5"
        with h5py.File(fname, "w", **self.memory_kwargs) as fid:
            hdf5.write_h5_image(dataset, "image", fid)

            self.assertFalse("IMAGE_MINMAXRANGE" in fid["image"].attrs)

            for bname in band_names:
                test = fid["image"].attrs["{}_MINMAXRANGE".format(bname)]
                self.assertTrue((minmax == test).all())
コード例 #12
0
ファイル: test_hdf5.py プロジェクト: truth-quark/wagl
    def test_write_h5_image_multiband(self):
        """
        Test the {BAND}_MINMAXRANGE attribute is correct.
        """
        band_names = ['ISO', 'VOL', 'GEO']
        dtype = numpy.dtype([(bname, 'int16') for bname in band_names])

        dataset = numpy.ndarray(shape=self.image_data.shape, dtype=dtype)
        for bname in band_names:
            dataset[bname] = self.image_data
        minmax = numpy.array([self.image_data.min(), self.image_data.max()])

        fname = 'test_write_h5_multi.h5'
        with h5py.File(fname, **self.memory_kwargs) as fid:
            hdf5.write_h5_image(dataset, 'image', fid)

            self.assertFalse('IMAGE_MINMAXRANGE' in fid['image'].attrs)

            for bname in band_names:
                test = fid['image'].attrs['{}_MINMAXRANGE'.format(bname)]
                self.assertTrue((minmax == test).all())
コード例 #13
0
def prwtr_average(indir,
                  outdir,
                  compression=H5CompressionFilter.LZF,
                  filter_opts=None):
    """
    Take the 4 hourly daily average from all files.
    """
    df = build_index(indir)

    # grouping
    groups = df.groupby([df.index.month, df.index.day, df.index.hour])

    # create directories as needed
    out_fname = Path(outdir).joinpath("pr_wtr_average.h5")
    if not out_fname.parent.exists():
        out_fname.parent.mkdir(parents=True)

    # create output file
    with h5py.File(str(out_fname), 'w') as fid:

        # the data is ordered so we can safely use BAND-1 = Jan-1
        for band_index, item in enumerate(groups):
            grp_name, grp_df = item

            # synthesised leap year timestamp (use year 2000)
            fmt = "2000 {:02d} {:02d} {:02d}"
            dtime = datetime.strptime(fmt.format(*grp_name), "%Y %m %d %H")

            # mean
            mean, geobox, chunks = calculate_average(grp_df)

            # dataset name format "%B-%d/%H%M" eg FEBRUARY-06/1800 for Feb 6th 1800 hrs
            dname = "AVERAGE/{}".format(dtime.strftime("%B-%d/%H%M").upper())

            # dataset description
            description = ("Average data for {year_month} {hour}00 hours, "
                           "over the timeperiod {dt_min} to {dt_max}")
            description = description.format(
                year_month=dtime.strftime("%B-%d"),
                hour=dtime.strftime("%H"),
                dt_min=grp_df.index.min(),
                dt_max=grp_df.index.max())

            # dataset attributes
            attrs = {
                "description": description,
                "timestamp": dtime,
                "date_format": "2000 %B-%d/%H%M",
                "band_name": "BAND-{}".format(band_index + 1),
                "geotransform": geobox.transform.to_gdal(),
                "crs_wkt": geobox.crs.ExportToWkt()
            }

            # create empty or copy the user supplied filter options
            if not filter_opts:
                f_opts = dict()
            else:
                f_opts = filter_opts.copy()

            # use original chunks if none are provided
            if 'chunks' not in f_opts:
                f_opts['chunks'] = chunks

            # write
            write_h5_image(mean,
                           dname,
                           fid,
                           attrs=attrs,
                           compression=compression,
                           filter_opts=f_opts)
コード例 #14
0
def convert_tile(fname, out_fname, compression, filter_opts):
    """
    Convert a MCD43A1 HDF4 tile into HDF5.
    Global and datasetl level metadata are copied across.

    :param fname:
        A str containing the MCD43A1 filename.

    :param out_fname:
        A str containing the output filename for the HDF5 file.

    :param compression:
        The compression filter to use.
        Default is H5CompressionFilter.LZF

    :filter_opts:
        A dict of key value pairs available to the given configuration
        instance of H5CompressionFilter. For example
        H5CompressionFilter.LZF has the keywords *chunks* and *shuffle*
        available.
        Default is None, which will use the default settings for the
        chosen H5CompressionFilter instance.

    :return:
        None. Content is written directly to disk.
    """
    with h5py.File(out_fname, 'w') as fid:
        with rasterio.open(fname) as ds:
            # global attributes
            attach_attributes(fid, ds.tags())

            # find and convert every subsdataset (sds)
            for sds_name in ds.subdatasets:
                with rasterio.open(sds_name) as sds:
                    ds_name = Path(sds_name.replace(':', '/')).name

                    # create empty or copy the user supplied filter options
                    if not filter_opts:
                        f_opts = dict()
                    else:
                        f_opts = filter_opts.copy()

                    # use sds native chunks if none are provided
                    if 'chunks' not in f_opts:
                        f_opts['chunks'] = list(sds.block_shapes[0])

                    # modify to have 3D chunks if we have a multiband sds
                    if sds.count == 3:
                        # something could go wrong if a user supplies
                        # a 3D chunk eg (2, 256, 340)
                        f_opts['chunks'].insert(0, 1)
                        f_opts['chunks'] = tuple(f_opts['chunks'])
                    else:
                        f_opts['chunks'] = tuple(f_opts['chunks'])

                    # subdataset attributes and spatial attributes
                    attrs = sds.tags()
                    attrs['geotransform'] = sds.transform.to_gdal()
                    attrs['crs_wkt'] = sds.crs.wkt

                    # ensure single band sds is read a 2D not 3D
                    data = sds.read() if sds.count == 3 else sds.read(1)

                    # write to disk as an IMAGE Class Dataset
                    write_h5_image(data,
                                   ds_name,
                                   fid,
                                   attrs=attrs,
                                   compression=compression,
                                   filter_opts=f_opts)
コード例 #15
0
ファイル: mcd43a1.py プロジェクト: sixy6e/swfo
def convert_tile(fname, out_h5: h5py.Group, compression, filter_opts):
    """
    Convert a MCD43A1 HDF4 tile into HDF5.
    Global and datasetl level metadata are copied across.

    :param fname:
        A str containing the MCD43A1 filename.

    :param out_h5:
        A h5py.Group to write the output data to

    :param compression:
        The compression filter to use.
        Default is H5CompressionFilter.LZF

    :filter_opts:
        A dict of key value pairs available to the given configuration
        instance of H5CompressionFilter. For example
        H5CompressionFilter.LZF has the keywords *chunks* and *shuffle*
        available.
        Default is None, which will use the default settings for the
        chosen H5CompressionFilter instance.

    :return:
        None. Content is written directly to disk.
    """
    # read the geo-spatial information beforehand
    # relying on gdal to parse it
    geospatial = {}
    with rasterio.open(fname) as ds:
        for sds_name in ds.subdatasets:
            with rasterio.open(sds_name) as sds:
                band_name = sds_name.split(":")[-1]
                geospatial[band_name] = {
                    "geotransform": sds.transform.to_gdal(),
                    "crs_wkt": sds.crs.wkt,
                }

    # convert data
    with netCDF4.Dataset(fname) as ds:
        ds.set_auto_scale(False)

        # global attributes
        global_attrs = {key: ds.getncattr(key) for key in ds.ncattrs()}
        attach_attributes(out_h5, global_attrs)

        # find and convert every subsdataset (sds)
        for sds_name in sorted(ds.variables, key=_brdf_netcdf_band_orderer):
            sds = ds.variables[sds_name]

            # create empty or copy the user supplied filter options
            if not filter_opts:
                f_opts = dict()
            else:
                f_opts = filter_opts.copy()

            # Recreate datasets as 2-dimensional dataset
            dim1, dim2, *_ = sds.shape
            if "chunks" not in f_opts:
                assert dim1 == 2400 and dim2 == 2400
                f_opts["chunks"] = (240, 240)
            else:
                f_opts["chunks"] = (f_opts[0], f_opts[1])

            # subdataset attributes and spatial attributes
            attrs = {key: sds.getncattr(key) for key in sds.ncattrs()}
            # attrs['geotransform'] = sds.transform.to_gdal()
            # attrs['crs_wkt'] = sds.crs.wkt
            attrs.update(geospatial[sds_name])

            in_arr = sds[:]
            if len(in_arr.shape) == 3:
                data = numpy.ndarray(shape=(dim1, dim2), dtype=OUT_DTYPE)
                for idx, band_name in enumerate(OUT_DTYPE.names):
                    data[band_name] = in_arr[:, :, idx]
            else:
                data = in_arr

            # write to disk as an IMAGE Class Dataset
            write_h5_image(
                data,
                sds_name,
                out_h5,
                attrs=attrs,
                compression=compression,
                filter_opts=f_opts,
            )
コード例 #16
0
ファイル: prwtr.py プロジェクト: sixy6e/swfo
def fallback(indir,
             outdir,
             compression=H5CompressionFilter.LZF,
             filter_opts: Optional[Dict] = None):
    """
    Take the 4 hourly daily average from all files.
    """
    df = _build_index(indir)

    # grouping
    groups = df.groupby([df.index.month, df.index.day, df.index.hour])

    # create directories as needed
    out_fname = Path(outdir).joinpath("pr_wtr.eatm.average.h5")
    out_fname.parent.mkdir(exist_ok=True, parents=True)

    # Set one creation datetime for all datasets
    creation_dt = datetime.utcnow().replace(tzinfo=timezone.utc)

    # create output file
    with h5utils.atomic_h5_write(out_fname, "w", track_order=True) as fid:

        # the data is ordered so we can safely use BAND-1 = Jan-1
        for band_index, item in enumerate(groups):
            grp_name, grp_df = item

            # synthesised leap year timestamp (use year 2000)
            fmt = "2000 {:02d} {:02d} {:02d} +0000"
            dtime = datetime.strptime(fmt.format(*grp_name), "%Y %m %d %H %z")

            # mean
            mean, geobox, chunks = _average(grp_df)

            # dataset name format "%B-%d/%H%M" eg FEBRUARY-06/1800 for Feb 6th 1800 hrs
            dname = "AVERAGE/{}".format(dtime.strftime("%B-%d/%H%M").upper())

            # dataset description
            description = ("Average data for {year_month} {hour} hours, "
                           "over the time period {dt_min} to {dt_max}")
            description = description.format(
                year_month=dtime.strftime("%B-%d"),
                hour=dtime.strftime("%H%M"),
                dt_min=grp_df.index.min().date(),
                dt_max=grp_df.index.max().date(),
            )

            # dataset attributes
            attrs = {
                "description": description,
                "timestamp": dtime,
                "date_format": "2000 %B-%d/%H%M",
                "band_name": "BAND-{}".format(band_index + 1),
                "geotransform": geobox.transform.to_gdal(),
                "crs_wkt": geobox.crs.ExportToWkt(),
            }

            # create empty or copy the user supplied filter options
            if not filter_opts:
                f_opts = dict()
            else:
                f_opts = filter_opts.copy()

            # use original chunks if none are provided
            if "chunks" not in f_opts:
                f_opts["chunks"] = chunks

            # write
            h5utils.create_groups(fid,
                                  dname.rsplit("/", 1)[0],
                                  track_order=True)
            write_h5_image(mean,
                           dname,
                           fid,
                           attrs=attrs,
                           compression=compression,
                           filter_opts=f_opts)
            # Generate metadata
            lineage_ids, src_md = _get_lineage_md(grp_df)
            md = generate_fallback_metadata(
                dname,
                lineage_ids=lineage_ids,
                obs_dt=dtime,
                start_dt=grp_df.index.min().replace(tzinfo=timezone.utc),
                end_dt=grp_df.index.max().replace(tzinfo=timezone.utc),
                creation_dt=creation_dt,
                src_md=src_md,
            )
            h5utils.write_h5_md(fid, [md], [dname], track_order=True)
コード例 #17
0
ファイル: prwtr.py プロジェクト: sixy6e/swfo
def convert_file(fname,
                 out_h5: h5py.Group,
                 compression,
                 filter_opts: Optional[Dict] = None):
    """
    Convert a PR_WTR NetCDF file into HDF5.

    :param fname:
        A str containing the PR_WTR filename.

    :param out_fname:
        A h5py.Group to write output datasets to

    :param compression:
        The compression filter to use.
        Default is H5CompressionFilter.LZF

    :filter_opts:
        A dict of key value pairs available to the given configuration
        instance of H5CompressionFilter. For example
        H5CompressionFilter.LZF has the keywords *chunks* and *shuffle*
        available.
        Default is None, which will use the default settings for the
        chosen H5CompressionFilter instance.

    :return:
        None. Content is written directly to disk.
    """
    with rasterio.open(fname) as ds:
        name_fmt = "BAND-{}"

        # global attributes
        # TODO update the history attrs
        # TODO remove the NC_GLOBAL str and just have plain attr names
        g_attrs = ds.tags()

        # get timestamp info
        origin = g_attrs.pop("time#units").replace("hours since ", "")
        hours = json.loads(
            g_attrs.pop("NETCDF_DIM_time_VALUES").replace("{", "[").replace(
                "}", "]"))
        df = pandas.DataFrame({
            "timestamp":
            pandas.to_datetime(hours, unit="h", origin=origin),
            "band_name": [name_fmt.format(i + 1) for i in range(ds.count)],
        })
        df["dataset_name"] = df.timestamp.dt.strftime("%Y/%B-%d/%H%M")
        df["dataset_name"] = df["dataset_name"].str.upper()

        # create a timestamp and band name index table dataset
        desc = "Timestamp and Band Name index information."
        attrs = {"description": desc}
        write_dataframe(df, "INDEX", out_h5, compression, attrs=attrs)

        attach_attributes(out_h5, g_attrs)

        # process every band
        for i in range(1, ds.count + 1):
            ds_name = df.iloc[i - 1].dataset_name

            # create empty or copy the user supplied filter options
            if not filter_opts:
                f_opts = dict()
            else:
                f_opts = filter_opts.copy()

            # band attributes
            # TODO remove NETCDF tags
            # TODO add fillvalue attr
            attrs = ds.tags(i)
            attrs["timestamp"] = df.iloc[i - 1]["timestamp"].replace(
                tzinfo=timezone.utc)
            attrs["band_name"] = df.iloc[i - 1]["band_name"]
            attrs["geotransform"] = ds.transform.to_gdal()
            attrs["crs_wkt"] = CRS.ExportToWkt()

            # use ds native chunks if none are provided
            if "chunks" not in f_opts:
                try:
                    f_opts["chunks"] = ds.block_shapes[i]
                except IndexError:
                    print("Chunk error: {}".format(fname))
                    f_opts["chunks"] = (73, 144)

            # write to disk as an IMAGE Class Dataset
            write_h5_image(
                ds.read(i),
                ds_name,
                out_h5,
                attrs=attrs,
                compression=compression,
                filter_opts=f_opts,
            )
コード例 #18
0
def convert_file(fname, out_fname, compression, filter_opts):
    """
    Convert a PR_WTR NetCDF file into HDF5.

    :param fname:
        A str containing the PR_WTR filename.

    :param out_fname:
        A str containing the output filename for the HDF5 file.

    :param compression:
        The compression filter to use.
        Default is H5CompressionFilter.LZF

    :filter_opts:
        A dict of key value pairs available to the given configuration
        instance of H5CompressionFilter. For example
        H5CompressionFilter.LZF has the keywords *chunks* and *shuffle*
        available.
        Default is None, which will use the default settings for the
        chosen H5CompressionFilter instance.

    :return:
        None. Content is written directly to disk.
    """
    with h5py.File(out_fname, 'w') as fid:
        with rasterio.open(fname) as ds:
            name_fmt = 'BAND-{}'

            # global attributes
            # TODO update the history attrs
            # TODO remove the NC_GLOBAL str and just have plain attr names
            g_attrs = ds.tags()

            # get timestamp info
            origin = g_attrs.pop('time#units').replace('hours since ', '')
            hours = json.loads(
                g_attrs.pop('NETCDF_DIM_time_VALUES').replace('{', '[').replace('}', ']')
            )
            df = pandas.DataFrame(
                {
                    'timestamp': pandas.to_datetime(hours, unit='h', origin=origin),
                    'band_name': [name_fmt.format(i+1) for i in range(ds.count)]
                }
            )
            df['dataset_name'] = df.timestamp.dt.strftime('%Y/%B-%d/%H%M')
            df['dataset_name'] = df['dataset_name'].str.upper()

            # create a timestamp and band name index table dataset
            desc = "Timestamp and Band Name index information."
            attrs = {
                'description': desc
            }
            write_dataframe(df, 'INDEX', fid, compression, attrs=attrs)

            attach_attributes(fid, g_attrs)

            # process every band
            for i in range(1, ds.count + 1):
                ds_name = df.iloc[i-1].dataset_name

                # create empty or copy the user supplied filter options
                if not filter_opts:
                    f_opts = dict()
                else:
                    f_opts = filter_opts.copy()


                # band attributes
                # TODO remove NETCDF tags
                # TODO add fillvalue attr
                attrs = ds.tags(i)
                attrs['timestamp'] = df.iloc[i-1]['timestamp']
                attrs['band_name'] = df.iloc[i-1]['band_name']
                attrs['geotransform'] = ds.transform.to_gdal()
                attrs['crs_wkt'] = CRS.ExportToWkt()

                # use ds native chunks if none are provided
                if 'chunks' not in f_opts:
                    try:
                        f_opts['chunks'] = ds.block_shapes[i]
                    except IndexError:
                        print("Chunk error: {}".format(fname))
                        f_opts['chunks'] = (73, 144)

                # write to disk as an IMAGE Class Dataset
                write_h5_image(ds.read(i), ds_name, fid, attrs=attrs,
                               compression=compression, filter_opts=f_opts)
コード例 #19
0
def interpolate(acq, coefficient, ancillary_group, satellite_solar_group,
                coefficients_group, out_group=None,
                compression=H5CompressionFilter.LZF, filter_opts=None,
                method=Method.SHEARB):
    # TODO: more docstrings
    """Perform interpolation."""
    if method not in Method:
        msg = 'Interpolation method {} not available.'
        raise Exception(msg.format(method.name))

    geobox = acq.gridded_geo_box()
    cols, rows = geobox.get_shape_xy()

    # read the relevant tables into DataFrames
    coordinator = read_h5_table(ancillary_group, DatasetName.COORDINATOR.value)
    boxline = read_h5_table(satellite_solar_group, DatasetName.BOXLINE.value)

    if coefficient in Workflow.NBAR.atmos_coefficients:
        dataset_name = DatasetName.NBAR_COEFFICIENTS.value
    elif coefficient in Workflow.SBT.atmos_coefficients:
        dataset_name = DatasetName.SBT_COEFFICIENTS.value
    else:
        msg = "Factor name not found in available coefficients: {}"
        raise ValueError(msg.format(Workflow.STANDARD.atmos_coefficients))

    coefficients = read_h5_table(coefficients_group, dataset_name)

    coord = np.zeros((coordinator.shape[0], 2), dtype='int')
    map_x = coordinator.map_x.values
    map_y = coordinator.map_y.values
    coord[:, 1], coord[:, 0] = (map_x, map_y) * ~geobox.transform
    centre = boxline.bisection_index.values
    start = boxline.start_index.values
    end = boxline.end_index.values

    band_records = coefficients.band_name == acq.band_name
    samples = coefficients[coefficient.value][band_records].values

    func_map = {Method.BILINEAR: sheared_bilinear_interpolate,
                Method.FBILINEAR: fortran_bilinear_interpolate,
                Method.SHEAR: sheared_bilinear_interpolate,
                Method.SHEARB: sheared_bilinear_interpolate,
                Method.RBF: rbf_interpolate}

    args = [cols, rows, coord, samples, start, end, centre]
    if method == Method.BILINEAR:
        args.extend([False, False])
    elif method == Method.SHEARB:
        args.extend([True, True])
    else:
        pass

    result = func_map[method](*args)

    # setup the output file/group as needed
    if out_group is None:
        fid = h5py.File('interpolated-coefficients.h5', driver='core',
                        backing_store=False)
    else:
        fid = out_group

    if GroupName.INTERP_GROUP.value not in fid:
        fid.create_group(GroupName.INTERP_GROUP.value)

    if filter_opts is None:
        filter_opts = {}
    else:
        filter_opts = filter_opts.copy()
    filter_opts['chunks'] = acq.tile_size

    group = fid[GroupName.INTERP_GROUP.value]

    fmt = DatasetName.INTERPOLATION_FMT.value
    dset_name = fmt.format(coefficient=coefficient.value, band_name=acq.band_name)
    no_data = -999
    attrs = {'crs_wkt': geobox.crs.ExportToWkt(),
             'geotransform': geobox.transform.to_gdal(),
             'no_data_value': no_data,
             'interpolation_method': method.name,
             'band_id': acq.band_id,
             'band_name': acq.band_name,
             'alias': acq.alias,
             'coefficient': coefficient.value}
    desc = ("Contains the interpolated result of coefficient {} "
            "for band {} from sensor {}.")
    attrs['description'] = desc.format(coefficient.value, acq.band_id,
                                       acq.sensor_id)

    # convert any NaN's to -999 (for float data, NaN would be more ideal ...)
    result[~np.isfinite(result)] = no_data
    write_h5_image(result, dset_name, group, compression, attrs, filter_opts)

    if out_group is None:
        return fid
コード例 #20
0
def image_residual(ref_fid,
                   test_fid,
                   pathname,
                   out_fid,
                   compression=H5CompressionFilter.LZF,
                   save_inputs=False,
                   filter_opts=None):
    """
    Undertake residual analysis for IMAGE CLASS Datasets.
    A histogram and a cumulative histogram of the residuals are
    calculated and recorded as TABLE CLASS Datasets.
    Any NaN's in IMAGE datasets will be handled automatically.

    :param ref_fid:
        A h5py file object (essentially the root Group), containing
        the reference data.

    :param test_fid:
        A h5py file object (essentially the root Group), containing
        the test data.

    :param pathname:
        A `str` containing the pathname to the IMAGE Dataset.

    :param out_fid:
        A h5py file object (essentially the root Group), opened for
        writing the output data.

    :param compression:
        The compression filter to use.
        Default is H5CompressionFilter.LZF

    :param save_inputs:
        A `bool` indicating whether or not to save the input datasets
        used for evaluating the residuals alongside the results.
        Default is False.

    :filter_opts:
        A dict of key value pairs available to the given configuration
        instance of H5CompressionFilter. For example
        H5CompressionFilter.LZF has the keywords *chunks* and *shuffle*
        available.
        Default is None, which will use the default settings for the
        chosen H5CompressionFilter instance.

    :return:
        None; This routine will only return None or a print statement,
        this is essential for the HDF5 visit routine.
    """
    def evaluate(ref_dset, test_dset):
        """
        Evaluate the image residual.
        Caters for boolean types.
        TODO: geobox intersection if dimensions are different.
        TODO: handle no data values
        TODO: handle classification datasets
        TODO: handle bitwise datasets
        """
        if ref_dset.dtype.name == 'bool':
            result = numpy.logical_xor(ref_dset, test_dset).astype('uint8')
        else:
            result = ref_dset[:] - test_dset
        return result

    class_name = 'IMAGE'
    ref_dset = ref_fid[pathname]
    test_dset = test_fid[pathname]

    # ignore no data values for the time being
    residual = evaluate(ref_dset, test_dset)
    min_residual = numpy.nanmin(residual)
    max_residual = numpy.nanmax(residual)
    pct_difference = (residual != 0).sum() / residual.size * 100

    if filter_opts is None:
        fopts = {}
    else:
        fopts = filter_opts.copy()
    fopts['chunks'] = ref_dset.chunks

    geobox = GriddedGeoBox.from_dataset(ref_dset)

    # output residual
    attrs = {
        'crs_wkt': geobox.crs.ExportToWkt(),
        'geotransform': geobox.transform.to_gdal(),
        'description': 'Residual',
        'min_residual': min_residual,
        'max_residual': max_residual,
        'percent_difference': pct_difference
    }

    base_dname = pbasename(pathname)
    group_name = ref_dset.parent.name.strip('/')
    dname = ppjoin('RESULTS', class_name, 'RESIDUALS', group_name, base_dname)
    write_h5_image(residual, dname, out_fid, compression, attrs, fopts)

    # residuals distribution
    h = distribution(residual)
    hist = h['histogram']

    attrs = {
        'description': 'Frequency distribution of the residuals',
        'omin': h['omin'],
        'omax': h['omax']
    }
    dtype = numpy.dtype([('bin_locations', h['loc'].dtype.name),
                         ('residuals_distribution', hist.dtype.name)])
    table = numpy.zeros(hist.shape, dtype=dtype)
    table['bin_locations'] = h['loc']
    table['residuals_distribution'] = hist

    # output
    del fopts['chunks']
    dname = ppjoin('RESULTS', class_name, 'FREQUENCY-DISTRIBUTIONS',
                   group_name, base_dname)
    write_h5_table(table,
                   dname,
                   out_fid,
                   compression,
                   attrs=attrs,
                   filter_opts=fopts)

    # cumulative distribution
    h = distribution(numpy.abs(residual))
    hist = h['histogram']
    cdf = numpy.cumsum(hist / hist.sum())

    attrs = {
        'description': 'Cumulative distribution of the residuals',
        'omin': h['omin'],
        'omax': h['omax'],
        '90th_percentile': h['loc'][numpy.searchsorted(cdf, 0.9)],
        '99th_percentile': h['loc'][numpy.searchsorted(cdf, 0.99)]
    }
    dtype = numpy.dtype([('bin_locations', h['loc'].dtype.name),
                         ('cumulative_distribution', cdf.dtype.name)])
    table = numpy.zeros(cdf.shape, dtype=dtype)
    table['bin_locations'] = h['loc']
    table['cumulative_distribution'] = cdf

    # output
    dname = ppjoin('RESULTS', class_name, 'CUMULATIVE-DISTRIBUTIONS',
                   group_name, base_dname)
    write_h5_table(table,
                   dname,
                   out_fid,
                   compression=compression,
                   attrs=attrs,
                   filter_opts=fopts)

    if save_inputs:
        # copy the reference data
        out_grp = out_fid.require_group(ppjoin('REFERENCE-DATA', group_name))
        ref_fid.copy(ref_dset, out_grp)

        # copy the test data
        out_grp = out_fid.require_group(ppjoin('TEST-DATA', group_name))
        test_fid.copy(test_dset, out_grp)