Ejemplo n.º 1
0
def test_fractional_cover_lazy(sr_filepath, fc_filepath):
    print(sr_filepath)
    print(fc_filepath)

    sr_dataset = open_dataset(sr_filepath, chunks={'x': 50, 'y': 50})

    measurements = [
        Measurement(name='PV', dtype='int8', nodata=-1, units='percent'),
        Measurement(name='NPV', dtype='int8', nodata=-1, units='percent'),
        Measurement(name='BS', dtype='int8', nodata=-1, units='percent'),
        Measurement(name='UE', dtype='int8', nodata=-1, units='1'),
    ]

    fc_dataset = fractional_cover(sr_dataset, measurements)

    assert fc_dataset.PV.data.dask
    assert fc_dataset.NPV.data.dask
    assert fc_dataset.BS.data.dask
    assert fc_dataset.UE.data.dask

    fc_dataset.load()

    assert set(
        fc_dataset.data_vars.keys()) == {m['name']
                                         for m in measurements}

    validation_ds = open_dataset(fc_filepath)

    assert validation_ds == fc_dataset

    assert validation_ds.equals(fc_dataset)
Ejemplo n.º 2
0
 def measurements(self, input_measurements):
     return {
         self.var_name:
         Measurement(name=self.var_name,
                     dtype='float32',
                     nodata=-9999,
                     units='1'),
         'water':
         Measurement(name='water', dtype='int16', nodata=0, units='1'),
         'pmask':
         Measurement(name='pmask', dtype='bool', nodata=False, units='1')
     }
Ejemplo n.º 3
0
 def measurements(self, m):
     mm = [
         Measurement(name="sdev", dtype="float32", nodata=np.nan,
                     units="1"),
         Measurement(name="edev", dtype="float32", nodata=np.nan,
                     units="1"),
         Measurement(name="bcdev",
                     dtype="float32",
                     nodata=np.nan,
                     units="1"),
     ]
     LOG.debug("Returning measurements: %s", mm)
     return mm
Ejemplo n.º 4
0
def fuse_measurement(dest: np.ndarray,
                     datasets: List[Dataset],
                     geobox: GeoBox,
                     measurement: Measurement,
                     mk_new: Callable[[BandInfo], DataSource],
                     skip_broken_datasets: bool = False):
    reproject_and_fuse([mk_new(BandInfo(dataset, measurement.name)) for dataset in datasets],
                       dest,
                       geobox,
                       dest.dtype.type(measurement.nodata),
                       resampling=measurement.get('resampling_method', 'nearest'),
                       fuse_func=measurement.get('fuser', None),
                       skip_broken_datasets=skip_broken_datasets)
Ejemplo n.º 5
0
    def output_measurements(
            self,
            product_definitions: Dict[str,
                                      DatasetType]) -> Dict[str, Measurement]:
        self._assert(
            self._product in product_definitions,
            "product {} not found in definitions".format(self._product))

        product = product_definitions[self._product]
        measurement_dicts = {
            measurement['name']: Measurement(**measurement)
            for measurement in product.definition['measurements']
        }

        if self.get('measurements') is None:
            return measurement_dicts

        try:
            return {
                name: measurement_dicts[product.canonical_measurement(name)]
                for name in self['measurements']
            }
        except KeyError as ke:
            raise VirtualProductException(
                "could not find measurement: {}".format(ke.args))
Ejemplo n.º 6
0
    def output_measurements(
            self,
            product_definitions: Dict[str,
                                      DatasetType]) -> Dict[str, Measurement]:
        input_measurement_list = [
            child.output_measurements(product_definitions)
            for child in self._children
        ]

        first, *rest = input_measurement_list

        for child in rest:
            self._assert(
                set(child) == set(first),
                "child datasets do not all have the same set of measurements")

        name = self.get('index_measurement_name')
        if name is None:
            return first

        self._assert(
            name not in first,
            "source index measurement '{}' already present".format(name))

        first.update(
            {name: Measurement(name=name, dtype='int8', nodata=-1, units='1')})
        return first
Ejemplo n.º 7
0
 def measurements(self, input_measurements):
     return [
         Measurement(name='smad',
                     dtype='float32',
                     nodata=np.nan,
                     units='1')
     ]
Ejemplo n.º 8
0
def mk_sample_xr_dataset(crs="EPSG:3578",
                         shape=(33, 74),
                         resolution=None,
                         xy=(0, 0),
                         time='2020-02-13T11:12:13.1234567Z',
                         name='band',
                         dtype='int16',
                         nodata=-999,
                         units='1'):
    """ Note that resolution is in Y,X order to match that of GeoBox.

        shape (height, width)
        resolution (y: float, x: float) - in YX, to match GeoBox/shape notation

        xy (x: float, y: float) -- location of the top-left corner of the top-left pixel in CRS units
    """

    if isinstance(crs, str):
        crs = CRS(crs)

    if resolution is None:
        resolution = (-10, 10) if crs is None or crs.projected else (-0.01, 0.01)

    t_coords = {}
    if time is not None:
        t_coords['time'] = mk_time_coord([time])

    transform = Affine.translation(*xy)*Affine.scale(*resolution[::-1])
    h, w = shape
    geobox = GeoBox(w, h, transform, crs)

    return Datacube.create_storage(t_coords, geobox, [Measurement(name=name, dtype=dtype, nodata=nodata, units=units)])
Ejemplo n.º 9
0
        def fetch_child(child, source_index, r):
            if any([x == 0 for x in r.box.shape]):
                # empty raster
                return None
            else:
                result = child.fetch(r, **load_settings)
                name = self.get('index_measurement_name')

                if name is None:
                    return result

                # implication for dask?
                measurement = Measurement(name=name,
                                          dtype='int8',
                                          nodata=-1,
                                          units='1')
                shape = select_unique(
                    [result[band].shape for band in result.data_vars])
                array = numpy.full(shape,
                                   source_index,
                                   dtype=measurement.dtype)
                first = result[list(result.data_vars)[0]]
                result[name] = xarray.DataArray(array,
                                                dims=first.dims,
                                                coords=first.coords,
                                                name=name).assign_attrs(
                                                    units=measurement.units,
                                                    nodata=measurement.nodata)
                return result
Ejemplo n.º 10
0
def odc_style_xr_dataset():
    """An xarray.Dataset with ODC style coordinates and CRS, and no time dimension.

    Contains an EPSG:4326, single variable 'B10' of 100x100 int16 pixels."""
    affine = Affine.scale(0.1, 0.1) * Affine.translation(20, 30)
    geobox = geometry.GeoBox(100, 100, affine, geometry.CRS(GEO_PROJ))

    return Datacube.create_storage({}, geobox, [Measurement(name='B10', dtype='int16', nodata=0, units='1')])
Ejemplo n.º 11
0
    def measurements(self, input_measurements):
        renamed = [
            Measurement(**{
                **vars(m), 'name': m.name + '_PC_' + str(q)
            }) for q in self.qs for m in input_measurements
        ]

        return PerBandIndexStat(
            per_pixel_metadata=self.per_pixel_metadata).measurements(renamed)
Ejemplo n.º 12
0
 def measurements(self, input_measurements):
     measurement_names = [
         'pct_exceedance_brightness', 'pct_exceedance_greenness',
         'pct_exceedance_wetness', 'mean_brightness', 'mean_greenness',
         'mean_wetness', 'std_brightness', 'std_greenness', 'std_wetness'
     ]
     return [
         Measurement(name=m_name, dtype='float32', nodata=-1, units='1')
         for m_name in measurement_names
     ]
Ejemplo n.º 13
0
    def morph_measurement(src_measurements, spec):
        src_varname = spec.get('src_varname',
                               spec.get('name', None))
        assert src_varname is not None

        measurement = src_measurements.get(src_varname, None)
        if measurement is None:
            raise ValueError("No such variable in the source product: {}".format(src_varname))

        measurement.update({k: spec.get(k, measurement[k]) for k in ('name', 'nodata', 'dtype')})
        return Measurement(**measurement)
Ejemplo n.º 14
0
    def measurements(self, input_measurements):
        measurement_names = set(m.name for m in input_measurements)
        assert 'water' in measurement_names

        wet = Measurement(name='count_wet',
                          dtype='int16',
                          nodata=-1,
                          units='1')
        dry = Measurement(name='count_clear',
                          dtype='int16',
                          nodata=-1,
                          units='1')
        frequency = Measurement(name='frequency',
                                dtype='float32',
                                nodata=-1,
                                units='1')

        if self.freq_only:
            return [frequency]
        else:
            return [wet, dry, frequency]
Ejemplo n.º 15
0
def test_normalised_difference_stats(dataset, output_name):
    var1, var2 = list(dataset.data_vars)
    ndstat = NormalisedDifferenceStats(var1, var2, output_name)
    result = ndstat.compute(dataset)

    assert isinstance(result, xr.Dataset)
    assert 'time' not in result.dims
    assert dataset.crs == result.crs

    expected_output_varnames = set(f'{output_name}_{stat_name}' for stat_name in ndstat.stats)
    assert set(result.data_vars) == expected_output_varnames

    # Check the measurements() function raises an error on bad input_measurements
    with pytest.raises(StatsConfigurationError):
        invalid_names = [Measurement(name='foo', **FAKE_MEASUREMENT_INFO)]
        ndstat.measurements(invalid_names)

    # Check the measurements() function returns something reasonable
    input_measurements = [Measurement(name=name, **FAKE_MEASUREMENT_INFO) for name in (var1, var2)]
    output_measurements = ndstat.measurements(input_measurements)
    measurement_names = set(m.name for m in output_measurements)
    assert expected_output_varnames == measurement_names
Ejemplo n.º 16
0
    def measurements(self, input_measurements):
        index_measurements = [
            Measurement(name=measurement.name + '_source',
                        dtype='int8',
                        nodata=-1,
                        units='1') for measurement in input_measurements
        ]

        date_measurements = [
            Measurement(name=measurement.name + '_observed',
                        dtype='float64',
                        nodata=0,
                        units='seconds since 1970-01-01 00:00:00')
            for measurement in input_measurements
        ]

        text_measurements = [
            Measurement(name=measurement.name + '_observed_date',
                        dtype='int32',
                        nodata=0,
                        units='Date as YYYYMMDD')
            for measurement in input_measurements
        ]

        all_measurements = super(PerBandIndexStat,
                                 self).measurements(input_measurements)

        metadata = self.per_pixel_metadata

        if 'source' in metadata:
            all_measurements += index_measurements

        if 'observed' in metadata:
            all_measurements += date_measurements

        if 'observed_date' in metadata:
            all_measurements += text_measurements

        return all_measurements
Ejemplo n.º 17
0
    def measurements(self, input_measurements):
        measurement_names = set(m.name for m in input_measurements)
        if self.band1 not in measurement_names or self.band2 not in measurement_names:
            raise StatsConfigurationError(
                'Input measurements for %s must include "%s" and "%s"' %
                (self.name, self.band1, self.band2))

        return [
            Measurement(name='_'.join([self.name, stat]),
                        dtype='float32',
                        nodata=-1,
                        units='1') for stat in self.stats
        ]
Ejemplo n.º 18
0
def test_fractional_cover(sr_filepath, fc_filepath):
    # print(sr_filepath)
    # print(fc_filepath)

    sr_dataset = open_dataset(sr_filepath)

    measurements = [
        Measurement(name='PV', dtype='int8', nodata=-1, units='percent'),
        Measurement(name='NPV', dtype='int8', nodata=-1, units='percent'),
        Measurement(name='BS', dtype='int8', nodata=-1, units='percent'),
        Measurement(name='UE', dtype='int8', nodata=-1, units='1'),
    ]

    fc_dataset = fractional_cover(sr_dataset, measurements)

    assert set(
        fc_dataset.data_vars.keys()) == {m['name']
                                         for m in measurements}

    validation_ds = open_dataset(fc_filepath)

    assert validation_ds == fc_dataset

    assert validation_ds.equals(fc_dataset)
Ejemplo n.º 19
0
    def __init__(self,
                 metadata_type,
                 product_type,
                 input_measurements,
                 storage,
                 name,
                 file_path_template,
                 stat_name,
                 statistic,
                 output_params=None,
                 extras=None,
                 stats_metadata=None,
                 custom_metadata=None):

        #: The product name.
        self.name = name

        self.file_path_template = file_path_template

        #: The name of the statistic. Eg, mean, max, medoid, percentile_10
        self.stat_name = stat_name

        #: The implementation of a statistic. See :class:`Statistic`.
        #: Will provide `compute` and `measurements` functions.
        self.statistic = statistic

        inputs = [
            Measurement(**measurement) for measurement in input_measurements
        ]
        self.data_measurements = [
            dict(output) for output in statistic.measurements(inputs)
        ]

        #: The ODC Product (formerly DatasetType)
        self.product = self._create_product(metadata_type,
                                            product_type,
                                            self.data_measurements,
                                            storage,
                                            stats_metadata=stats_metadata
                                            or {},
                                            custom_metadata=custom_metadata
                                            or {})

        self.output_params = output_params

        #: A dictionary of extra arguments to be used through the processing chain
        #: Will be available as named argument when producing the output filename
        self.extras = extras or {}
Ejemplo n.º 20
0
    def measurements(self, input_measurements):
        renamed = []
        for m in input_measurements:
            if m.dtype == 'int8':
                data_type = 'int16'
            else:
                data_type = m.dtype
            for q in self.qs:
                renamed.append(
                    Measurement(**{
                        **m, 'name': m.name + '_PC_' + str(q),
                        'dtype': data_type
                    }))

        return PerBandIndexStat(
            per_pixel_metadata=self.per_pixel_metadata).measurements(renamed)
Ejemplo n.º 21
0
def test_wofs_stats(dataset):
    wofsstat = WofsStats()
    # Check that measurements() does something useful
    output_measurements = wofsstat.measurements([Measurement(name='water', **FAKE_MEASUREMENT_INFO)])
    assert len(output_measurements) == 3
    expected_vars = {'count_wet', 'count_clear', 'frequency'}
    output_names = set(m.name for m in output_measurements)
    assert expected_vars == output_names

    # Check the computation returns something reasonable
    result = wofsstat.compute(dataset)

    assert isinstance(result, xr.Dataset)
    assert 'time' not in result.dims
    assert all(result_dim in dataset.dims for result_dim in result.dims)
    assert dataset.crs == result.crs

    assert set(result.data_vars) == expected_vars
Ejemplo n.º 22
0
    def measurements(self, input_measurements):
        nodata = -1
        bit_defs = input_measurements[0].flags_definition

        if self._nodata_flags is not None:
            self._valid_pq_mask = mk_masker(*create_mask_value(
                bit_defs, **self._nodata_flags),
                                            invert=True)

        for v in self._vars:
            flags = v['flags']
            v['_mask'] = create_mask_value(bit_defs, **flags)
            v['mask'] = mk_masker(*v['_mask'])

        return [
            Measurement(name=v['name'],
                        dtype='int16',
                        units='1',
                        nodata=nodata) for v in self._vars
        ]
Ejemplo n.º 23
0
def test_measurement():
    # Can create a measurement
    m = Measurement(name='t', dtype='uint8', nodata=255, units='1')

    # retrieve it's vital stats
    assert m.name == 't'
    assert m.dtype == 'uint8'
    assert m.nodata == 255
    assert m.units == '1'

    # retrieve the information required for filling a DataArray
    assert m.dataarray_attrs() == {'nodata': 255, 'units': '1'}

    # Can add a new attribute by name and ensure it updates the DataArray attrs too
    m['bob'] = 10
    assert m.bob == 10
    assert m.dataarray_attrs() == {'nodata': 255, 'units': '1', 'bob': 10}

    m['none'] = None
    assert m.none is None

    # Resampling method is special and *not* needed for DataArray attrs
    m['resampling_method'] = 'cubic'
    assert 'resampling_method' not in m.dataarray_attrs()

    # It's possible to copy and update a Measurement instance
    m2 = m.copy()
    assert m2.bob == 10
    assert m2.dataarray_attrs() == m.dataarray_attrs()

    assert repr(m2) == repr(m)

    # Must specify *all* required keys. name, dtype, nodata and units
    with pytest.raises(ValueError) as e:
        Measurement(name='x', units='1', nodata=0)

    assert 'required keys missing:' in str(e.value)
    assert 'dtype' in str(e.value)
Ejemplo n.º 24
0
 def measurements(self, input_measurements):
     return [Measurement(name=band, dtype='int16', nodata=0, units='1') for band in self.bands]
Ejemplo n.º 25
0
    'swir1': [0.00256, 0.99467],
    'swir2': [-0.00327, 1.02551]
}

dataset_constants = {
    'product_type': 'ls8_usgs_fc_scene',
    'format': {
        'name': 'GeoTIFF'
    },
    'lineage': {
        'source_datasets': {}
    }
}

fc_measurements = [
    Measurement(name='BS', units='percent', dtype='int16', nodata=-1),
    Measurement(name='PV', units='percent', dtype='int16', nodata=-1),
    Measurement(name='NPV', units='percent', dtype='int16', nodata=-1),
    Measurement(name='UE', units='1', dtype='int16', nodata=-1)
]


def fishnet(geometry, threshold):
    bounds = geometry.bounds
    xmin = int(bounds[0] // threshold)
    xmax = int(bounds[2] // threshold)
    ymin = int(bounds[1] // threshold)
    ymax = int(bounds[3] // threshold)
    ncols = int(xmax - xmin + 1)
    nrows = int(ymax - ymin + 1)
    result = []
Ejemplo n.º 26
0
 def merge_measurement(measurement, spec):
     measurement.update(
         {k: spec.get(k) or measurement[k]
          for k in ('nodata', 'dtype')})
     return Measurement(**measurement)
Ejemplo n.º 27
0
 def measurement(self):
     return Measurement(name=self._var_name,
                        dtype='int8',
                        nodata=-1,
                        units='1')
Ejemplo n.º 28
0
 def measurement(self):
     return Measurement(name=self._var_name,
                        dtype='int32',
                        nodata=0,
                        units='Date as YYYYMMDD')
Ejemplo n.º 29
0
 def measurement(self):
     return Measurement(name=self._var_name,
                        dtype='int16',
                        nodata=0,
                        units='days since {:%Y-%m-%d %H:%M:%S}'.format(
                            self._since))
Ejemplo n.º 30
0
    def output_measurements(self, product_definitions):
        # type: (Dict[str, Dict]) -> Dict[str, Measurement]
        """
        A dictionary mapping names to measurement metadata.
        :param product_definitions: a dictionary mapping product names to definitions
        """
        get = self.get

        if 'product' in self:
            self._assert(
                self._product in product_definitions,
                "product {} not found in definitions".format(self._product))

            measurement_docs = product_definitions[
                self._product]['measurements']
            measurements = {
                measurement['name']: Measurement(**measurement)
                for measurement in measurement_docs
            }

            if get('measurements') is None:
                return measurements

            try:
                return {
                    name: measurements[name]
                    for name in get('measurements')
                }
            except KeyError as ke:
                raise VirtualProductException(
                    "could not find measurement: {}".format(ke.args))

        elif 'transform' in self:
            input_measurements = self._input.output_measurements(
                product_definitions)

            return self._transformation.measurements(input_measurements)

        elif 'collate' in self:
            input_measurements = [
                child.output_measurements(product_definitions)
                for child in self._children
            ]

            first, *rest = input_measurements

            for child in rest:
                self._assert(
                    set(child) == set(first),
                    "child datasets do not all have the same set of measurements"
                )

            name = get('index_measurement_name')
            if name is None:
                return first

            self._assert(
                name not in first,
                "source index measurement '{}' already present".format(name))

            first.update({
                name:
                Measurement(name=name, dtype='int8', nodata=-1, units='1')
            })
            return first

        elif 'juxtapose' in self:
            input_measurements = [
                child.output_measurements(product_definitions)
                for child in self._children
            ]

            result = {}
            for measurements in input_measurements:
                common = set(result) & set(measurements)
                self._assert(
                    not common,
                    "common measurements {} between children".format(common))

                result.update(measurements)

            return result

        else:
            raise VirtualProductException("virtual product was not validated")