Ejemplo n.º 1
0
def test_query_kwargs():
    from mock import MagicMock

    mock_index = MagicMock()
    mock_index.datasets.get_field_names = lambda: {u'product', u'lat', u'sat_path', 'type_id', u'time', u'lon',
                                                   u'orbit', u'instrument', u'sat_row', u'platform', 'metadata_type',
                                                   u'gsi', 'type', 'id'}

    query = Query(index=mock_index, product='ls5_nbar_albers')
    assert str(query)
    assert query.product == 'ls5_nbar_albers'
    assert query.search_terms['product'] == 'ls5_nbar_albers'

    query = Query(index=mock_index, latitude=(-35, -36), longitude=(148, 149))
    assert query.geopolygon
    assert 'lat' in query.search_terms
    assert 'lon' in query.search_terms

    query = Query(index=mock_index, latitude=-35, longitude=148)
    assert query.geopolygon
    assert 'lat' in query.search_terms
    assert 'lon' in query.search_terms

    query = Query(index=mock_index, y=(-4174726, -4180011), x=(1515184, 1523263), crs='EPSG:3577')
    assert query.geopolygon
    assert 'lat' in query.search_terms
    assert 'lon' in query.search_terms

    query = Query(index=mock_index, y=-4174726, x=1515184, crs='EPSG:3577')
    assert query.geopolygon
    assert 'lat' in query.search_terms
    assert 'lon' in query.search_terms

    query = Query(index=mock_index, y=-4174726, x=1515184, crs='EPSG:3577')
    assert query.geopolygon
    assert 'lat' in query.search_terms
    assert 'lon' in query.search_terms

    query = Query(index=mock_index, time='2001')
    assert 'time' in query.search

    query = Query(index=mock_index, time=('2001', '2002'))
    assert 'time' in query.search

    with pytest.raises(ValueError):
        Query(index=mock_index,
              y=-4174726, coordinate_reference_system='WGS84',
              x=1515184, crs='EPSG:3577')

    with pytest.raises(LookupError):
        Query(index=mock_index, y=-4174726, x=1515184, crs='EPSG:3577', made_up_key='NotReal')

    with pytest.raises(LookupError):
        query_group_by(group_by='magic')

    gb = query_group_by('time')
    assert isinstance(gb, GroupBy)
    assert query_group_by(group_by=gb) is gb
Ejemplo n.º 2
0
def check_legacy_open(index):
    from datacube.api.core import Datacube
    dc = Datacube(index=index)

    data_array = dc.load(product='ls5_nbar_albers',
                         measurements=['blue'],
                         time='1992-03-23T23:14:25.500000',
                         use_threads=True)
    assert data_array['blue'].shape[0] == 1
    assert (data_array.blue != -999).any()

    # force fusing load by duplicating dataset
    dss = dc.find_datasets(product='ls5_nbar_albers',
                           time='1992-03-23T23:14:25.500000')

    assert len(dss) == 1

    dss = dss*2
    sources = dc.group_datasets(dss, query_group_by('time'))

    gbox = data_array.geobox
    mm = [dss[0].type.measurements['blue']]
    xx = dc.load_data(sources, gbox, mm)
    assert (xx == data_array).all()

    with rasterio.Env():
        xx_lazy = dc.load_data(sources, gbox, mm, dask_chunks={'time': 1})
        assert xx_lazy['blue'].data.dask
        assert xx_lazy.blue[0, :, :].equals(xx.blue[0, :, :])
Ejemplo n.º 3
0
def check_data_with_api(index, time_slices):
    """Chek retrieved data for specific values.

    We scale down by 100 and check for predefined values in the
    corners.
    """
    from datacube import Datacube
    dc = Datacube(index=index)

    # Make the retrieved data 100 less granular
    shape_x = int(GEOTIFF['shape']['x'] / 100.0)
    shape_y = int(GEOTIFF['shape']['y'] / 100.0)
    pixel_x = int(GEOTIFF['pixel_size']['x'] * 100)
    pixel_y = int(GEOTIFF['pixel_size']['y'] * 100)

    input_type_name = 'ls5_nbar_albers'
    input_type = dc.index.products.get_by_name(input_type_name)
    geobox = geometry.GeoBox(
        shape_x + 1, shape_y + 1,
        Affine(pixel_x, 0.0, GEOTIFF['ul']['x'], 0.0, pixel_y,
               GEOTIFF['ul']['y']), geometry.CRS(GEOTIFF['crs']))
    observations = dc.find_datasets(product='ls5_nbar_albers',
                                    geopolygon=geobox.extent)
    group_by = query_group_by('time')
    sources = dc.group_datasets(observations, group_by)
    data = dc.load_data(sources, geobox, input_type.measurements.values())
    assert hashlib.md5(
        data.green.data).hexdigest() == '7f5ace486e88d33edf3512e8de6b6996'
    assert hashlib.md5(
        data.blue.data).hexdigest() == 'b58204f1e10dd678b292df188c242c7e'
    for time_slice in range(time_slices):
        assert data.blue.values[time_slice][-1, -1] == -999
Ejemplo n.º 4
0
    def __call__(self, index, product, time, group_by) -> Tile:
        # Do for a specific poly whose boundary is known
        output_crs = CRS(self.storage['crs'])
        filtered_items = [
            'geopolygon', 'lon', 'lat', 'longitude', 'latitude', 'x', 'y'
        ]
        filtered_dict = {
            k: v
            for k, v in self.input_region.items() if k in filtered_items
        }
        if self.feature is not None:
            filtered_dict['geopolygon'] = self.feature.geopolygon
            geopoly = filtered_dict['geopolygon']
        else:
            geopoly = query_geopolygon(**self.input_region)

        dc = Datacube(index=index)
        datasets = dc.find_datasets(product=product,
                                    time=time,
                                    group_by=group_by,
                                    **filtered_dict)
        group_by = query_group_by(group_by=group_by)
        sources = dc.group_datasets(datasets, group_by)
        output_resolution = [
            self.storage['resolution'][dim] for dim in output_crs.dimensions
        ]
        geopoly = geopoly.to_crs(output_crs)
        geobox = GeoBox.from_geopolygon(geopoly, resolution=output_resolution)

        return Tile(sources, geobox)
Ejemplo n.º 5
0
    def group(self, datasets: VirtualDatasetBag, **group_settings: Dict[str, Any]) -> VirtualDatasetBox:
        geopolygon = datasets.geopolygon
        selected = list(datasets.bag)

        # geobox
        merged = merge_search_terms(self, group_settings)

        try:
            geobox = output_geobox(datasets=selected,
                                   grid_spec=datasets.product_definitions[self._product].grid_spec,
                                   geopolygon=geopolygon, **select_keys(merged, self._GEOBOX_KEYS))
            load_natively = False

        except ValueError:
            # we are not calculating geoboxes here for the moment
            # since it may require filesystem access
            # in ODC 2.0 the dataset should know the information required
            geobox = None
            load_natively = True

        # group by time
        group_query = query_group_by(**select_keys(merged, self._GROUPING_KEYS))

        # information needed for Datacube.load_data
        return VirtualDatasetBox(Datacube.group_datasets(selected, group_query),
                                 geobox,
                                 load_natively,
                                 datasets.product_definitions,
                                 geopolygon=None if not load_natively else geopolygon)
Ejemplo n.º 6
0
        def _product_group_():
            # select only those inside the ROI
            # ROI could be smaller than the query for the `query` method

            if query_geopolygon(**search_terms) is not None:
                geopolygon = query_geopolygon(**search_terms)
                selected = list(
                    select_datasets_inside_polygon(datasets.pile, geopolygon))
            else:
                geopolygon = datasets.geopolygon
                selected = list(datasets.pile)

            # geobox
            merged = merge_search_terms(
                select_keys(self, self._NON_SPATIAL_KEYS),
                select_keys(search_terms, self._NON_SPATIAL_KEYS))

            geobox = output_geobox(datasets=selected,
                                   grid_spec=datasets.grid_spec,
                                   geopolygon=geopolygon,
                                   **select_keys(merged, self._GEOBOX_KEYS))

            # group by time
            group_query = query_group_by(
                **select_keys(merged, self._GROUPING_KEYS))

            # information needed for Datacube.load_data
            return VirtualDatasetBox(
                Datacube.group_datasets(selected, group_query), geobox,
                datasets.product_definitions)
Ejemplo n.º 7
0
    def __call__(self, product, time, group_by) -> Tile:
        # Do for a specific poly whose boundary is known
        output_crs = CRS(self.storage['crs'])
        filtered_item = [
            'geopolygon', 'lon', 'lat', 'longitude', 'latitude', 'x', 'y'
        ]
        filtered_dict = {
            k: v
            for k, v in filter(lambda t: t[0] in filtered_item,
                               self.input_region.items())
        }
        if 'feature_id' in self.input_region:
            filtered_dict['geopolygon'] = Geometry(
                self.input_region['geom_feat'],
                CRS(self.input_region['crs_txt']))
            geopoly = filtered_dict['geopolygon']
        else:
            geopoly = query_geopolygon(**self.input_region)
        datasets = self.dc.find_datasets(product=product,
                                         time=time,
                                         group_by=group_by,
                                         **filtered_dict)
        group_by = query_group_by(group_by=group_by)
        sources = self.dc.group_datasets(datasets, group_by)
        output_resolution = [
            self.storage['resolution'][dim] for dim in output_crs.dimensions
        ]
        geopoly = geopoly.to_crs(output_crs)
        geobox = GeoBox.from_geopolygon(geopoly, resolution=output_resolution)

        return Tile(sources, geobox)
Ejemplo n.º 8
0
    def __init__(self, cache, group_by='time', key_fmt=None, grid_spec=None):
        from datacube.api.query import query_group_by

        self._cache = cache
        self._grouper = query_group_by(group_by=group_by)
        self._grid_spec = gs_albers() if grid_spec is None else grid_spec
        self._key_fmt = 'albers/{:03d}_{:03d}' if key_fmt is None else key_fmt
Ejemplo n.º 9
0
def test_load_data(tmpdir):
    tmpdir = Path(str(tmpdir))

    group_by = query_group_by('time')
    spatial = dict(resolution=(15, -15),
                   offset=(11230, 1381110),)

    nodata = -999
    aa = mk_test_image(96, 64, 'int16', nodata=nodata)

    ds, gbox = gen_tiff_dataset([SimpleNamespace(name='aa', values=aa, nodata=nodata)],
                                tmpdir,
                                prefix='ds1-',
                                timestamp='2018-07-19',
                                **spatial)
    assert ds.time is not None

    ds2, _ = gen_tiff_dataset([SimpleNamespace(name='aa', values=aa, nodata=nodata)],
                              tmpdir,
                              prefix='ds2-',
                              timestamp='2018-07-19',
                              **spatial)
    assert ds.time is not None
    assert ds.time == ds2.time

    sources = Datacube.group_datasets([ds], 'time')
    sources2 = Datacube.group_datasets([ds, ds2], group_by)

    mm = ['aa']
    mm = [ds.type.measurements[k] for k in mm]

    ds_data = Datacube.load_data(sources, gbox, mm)
    assert ds_data.aa.nodata == nodata
    np.testing.assert_array_equal(aa, ds_data.aa.values[0])

    custom_fuser_call_count = 0

    def custom_fuser(dest, delta):
        nonlocal custom_fuser_call_count
        custom_fuser_call_count += 1
        dest[:] += delta

    progress_call_data = []

    def progress_cbk(n, nt):
        progress_call_data.append((n, nt))

    ds_data = Datacube.load_data(sources2, gbox, mm, fuse_func=custom_fuser,
                                 progress_cbk=progress_cbk)
    assert ds_data.aa.nodata == nodata
    assert custom_fuser_call_count > 0
    np.testing.assert_array_equal(nodata + aa + aa, ds_data.aa.values[0])

    assert progress_call_data == [(1, 2), (2, 2)]
Ejemplo n.º 10
0
def check_open_with_api(driver_manager, time_slices):
    from datacube import Datacube
    dc = Datacube(driver_manager=driver_manager)

    input_type_name = 'ls5_nbar_albers'
    input_type = dc.index.products.get_by_name(input_type_name)
    geobox = geometry.GeoBox(200, 200, Affine(25, 0.0, 638000, 0.0, -25, 6276000), geometry.CRS('EPSG:28355'))
    observations = dc.find_datasets(product='ls5_nbar_albers', geopolygon=geobox.extent)
    group_by = query_group_by('time')
    sources = dc.group_datasets(observations, group_by)
    data = dc.load_data(sources, geobox, input_type.measurements.values(), driver_manager=driver_manager)
    assert data.blue.shape == (time_slices, 200, 200)
Ejemplo n.º 11
0
def check_open_with_api(index):
    from datacube import Datacube
    dc = Datacube(index=index)

    input_type_name = 'ls5_nbar_albers'
    input_type = dc.index.products.get_by_name(input_type_name)

    geobox = GeoBox(200, 200, Affine(25, 0.0, 1500000, 0.0, -25, -3900000),
                    CRS('EPSG:3577'))
    observations = dc.find_datasets(product='ls5_nbar_albers',
                                    geopolygon=geobox.extent)
    group_by = query_group_by('time')
    sources = dc.group_datasets(observations, group_by)
    data = dc.load_data(sources, geobox, input_type.measurements.values())
    assert data.blue.shape == (1, 200, 200)
Ejemplo n.º 12
0
def list_gqa_filtered_cells(index, gw, pix_th=None, cell_index=None, **indexers):
    geobox = gw.grid_spec.tile_geobox(cell_index)
    query = Query(index=index, geopolygon=None, **indexers)
    observations = index.datasets.search_eager(**query.search_terms)
    # filter now with pixel threshold value
    datasets = {}
    if pix_th is None:
        pix_th = 1
    print ("pix_th value", str(pix_th))
    for dataset in observations:                                                          
        if check_intersect(geobox.extent, dataset.extent.to_crs(gw.grid_spec.crs)):
            if get_gqa(index, dataset.id) < pix_th:                                  
                #datasets.append(dataset)
                datasets.setdefault(cell_index,{'datasets': [],
                                    'geobox': geobox})['datasets'].append(dataset)
    return gw.cell_sources(datasets, query_group_by(**indexers))
Ejemplo n.º 13
0
def check_open_with_api(index, time_slices):
    with rasterio.Env():
        from datacube import Datacube
        dc = Datacube(index=index)

        input_type_name = 'ls5_nbar_albers'
        input_type = dc.index.products.get_by_name(input_type_name)
        geobox = geometry.GeoBox(200, 200, Affine(25, 0.0, 638000, 0.0, -25, 6276000), geometry.CRS('EPSG:28355'))
        observations = dc.find_datasets(product='ls5_nbar_albers', geopolygon=geobox.extent)
        group_by = query_group_by('time')
        sources = dc.group_datasets(observations, group_by)
        data = dc.load_data(sources, geobox, input_type.measurements.values())
        assert data.blue.shape == (time_slices, 200, 200)

        chunk_profile = {'time': 1, 'x': 100, 'y': 100}
        lazy_data = dc.load_data(sources, geobox, input_type.measurements.values(), dask_chunks=chunk_profile)
        assert lazy_data.blue.shape == (time_slices, 200, 200)
        assert (lazy_data.blue.load() == data.blue).all()
Ejemplo n.º 14
0
def check_data_with_api(index, time_slices):
    """Chek retrieved data for specific values.

    We scale down by 100 and check for predefined values in the
    corners.
    """
    from datacube import Datacube
    dc = Datacube(index=index)

    # TODO: this test needs to change, it tests that results are exactly the
    #       same as some time before, but with the current zoom out factor it's
    #       hard to verify that results are as expected even with human
    #       judgement. What it should test is that reading native from the
    #       ingested product gives exactly the same results as reading into the
    #       same GeoBox from the original product. Separate to that there
    #       should be a read test that confirms that what you read from native
    #       product while changing projection is of expected value

    # Make the retrieved data lower res
    ss = 100
    shape_x = int(GEOTIFF['shape']['x'] / ss)
    shape_y = int(GEOTIFF['shape']['y'] / ss)
    pixel_x = int(GEOTIFF['pixel_size']['x'] * ss)
    pixel_y = int(GEOTIFF['pixel_size']['y'] * ss)

    input_type_name = 'ls5_nbar_albers'
    input_type = dc.index.products.get_by_name(input_type_name)
    geobox = geometry.GeoBox(
        shape_x + 2, shape_y + 2,
        Affine(pixel_x, 0.0, GEOTIFF['ul']['x'], 0.0, pixel_y,
               GEOTIFF['ul']['y']), geometry.CRS(GEOTIFF['crs']))
    observations = dc.find_datasets(product='ls5_nbar_albers',
                                    geopolygon=geobox.extent)
    group_by = query_group_by('time')
    sources = dc.group_datasets(observations, group_by)
    data = dc.load_data(sources, geobox, input_type.measurements.values())
    assert hashlib.md5(
        data.green.data).hexdigest() == '0f64647bad54db4389fb065b2128025e'
    assert hashlib.md5(
        data.blue.data).hexdigest() == '41a7b50dfe5c4c1a1befbc378225beeb'
    for time_slice in range(time_slices):
        assert data.blue.values[time_slice][-1, -1] == -999
Ejemplo n.º 15
0
    def group(self, datasets: VirtualDatasetBag,
              **search_terms: Dict[str, Any]) -> VirtualDatasetBox:
        geopolygon = datasets.geopolygon
        selected = list(datasets.pile)

        # geobox
        merged = merge_search_terms(self, search_terms)

        geobox = output_geobox(
            datasets=selected,
            grid_spec=datasets.product_definitions[self._product].grid_spec,
            geopolygon=geopolygon,
            **select_keys(merged, self._GEOBOX_KEYS))

        # group by time
        group_query = query_group_by(
            **select_keys(merged, self._GROUPING_KEYS))

        # information needed for Datacube.load_data
        return VirtualDatasetBox(
            Datacube.group_datasets(selected, group_query), geobox,
            datasets.product_definitions)
Ejemplo n.º 16
0
def multi_product_list_cells(products,
                             gw,
                             cell_index=None,
                             product_query=None,
                             **query):
    """This is similar to GridWorkflow.list_cells but generalised to multiple
    products. Only datasets that are available in all of the products are
    reported.

    Datasets that do not have a full set across all products are returned in a
    separate group.


    products      -- list of product names
    gw            -- Preconfigured GridWorkflow object
    cell_index    -- Limit search area to a single cell
    product_query -- Product specific query, dict product_name => product specific query
    **query       -- Common query parameters across all products

    Returns:

    co_common     -- Cell observation that have full set across products
    co_unmatched  -- Cell observations where at least one product is missing

    Type of `co_common, co_unmatched` is list of dictionaries of tiles.

    `type(co_common[product_idx:Int][cell_idx:(Int,Int)]) == datacube.api.Tile`

    """
    if product_query is None:
        product_query = {}

    empty_cell = dict(datasets=[], geobox=None)
    co_common = [dict() for _ in products]
    co_unmatched = [dict() for _ in products]

    group_by = query_group_by(**query)

    obs = [
        gw.cell_observations(product=product,
                             cell_index=cell_index,
                             **product_query.get(product, {}),
                             **query) for product in products
    ]

    # set of all cell indexes found across all products
    all_cell_idx = set(reduce(list.__add__, [list(o.keys()) for o in obs]))

    def cell_is_empty(c):
        return len(c['datasets']) == 0

    for cidx in all_cell_idx:
        common, unmatched = common_obs_per_cell(
            *[o.get(cidx, empty_cell) for o in obs])

        for i in range(len(products)):
            if cidx in obs[i]:
                if not cell_is_empty(common[i]):
                    co_common[i][cidx] = common[i]

                if not cell_is_empty(unmatched[i]):
                    co_unmatched[i][cidx] = unmatched[i]

    co_common = [
        GridWorkflow.group_into_cells(c, group_by=group_by) for c in co_common
    ]
    co_unmatched = [
        GridWorkflow.group_into_cells(c, group_by=group_by)
        for c in co_unmatched
    ]

    return co_common, co_unmatched
Ejemplo n.º 17
0
def load_with_meta(dc, *args, **kwargs):
    vals = dc.load(*args, **kwargs)
    datasets = dc.find_datasets(*args, **kwargs)
    sources = dc.group_datasets(datasets, query_group_by())

    return vals.assign(sources=sources)
Ejemplo n.º 18
0
    def group(self, datasets, **search_terms):
        # type: (QueryResult, Dict[str, Any]) -> DatasetPile
        """
        Datasets grouped by their timestamps.
        :param datasets: the `QueryResult` to fetch data from
        :param query: to specify a spatial sub-region
        """
        grid_spec = datasets.grid_spec

        if 'product' in self:
            # select only those inside the ROI
            # ROI could be smaller than the query for `query`
            spatial_query = reject_keys(search_terms, self._NON_SPATIAL_KEYS)
            selected = list(
                select_datasets_inside_polygon(
                    datasets.pile, query_geopolygon(**spatial_query)))

            # geobox
            merged = merge_search_terms(
                select_keys(self, self._NON_SPATIAL_KEYS),
                select_keys(spatial_query, self._NON_SPATIAL_KEYS))

            geobox = output_geobox(datasets=selected,
                                   grid_spec=grid_spec,
                                   **select_keys(merged, self._GEOBOX_KEYS),
                                   **spatial_query)

            # group by time
            group_query = query_group_by(
                **select_keys(merged, self._GROUPING_KEYS))

            def wrap(_, value):
                return QueryResult(value, grid_spec)

            # information needed for Datacube.load_data
            return DatasetPile(Datacube.group_datasets(selected, group_query),
                               geobox).map(wrap)

        elif 'transform' in self:
            return self._input.group(datasets, **search_terms)

        elif 'collate' in self:
            self._assert(
                len(datasets.pile) == len(self._children),
                "invalid dataset pile")

            def build(source_index, product, dataset_pile):
                grouped = product.group(dataset_pile, **search_terms)

                def tag(_, value):
                    in_position = [
                        value if i == source_index else None
                        for i, _ in enumerate(datasets.pile)
                    ]
                    return QueryResult(in_position, grid_spec)

                return grouped.map(tag)

            groups = [
                build(source_index, product, dataset_pile)
                for source_index, (product, dataset_pile) in enumerate(
                    zip(self._children, datasets.pile))
            ]

            return DatasetPile(
                xarray.concat([grouped.pile for grouped in groups],
                              dim='time'),
                select_unique([grouped.geobox for grouped in groups]))

        elif 'juxtapose' in self:
            self._assert(
                len(datasets.pile) == len(self._children),
                "invalid dataset pile")

            groups = [
                product.group(datasets, **search_terms)
                for product, datasets in zip(self._children, datasets.pile)
            ]

            aligned_piles = xarray.align(*[grouped.pile for grouped in groups])
            child_groups = [
                DatasetPile(aligned_piles[i], grouped.geobox)
                for i, grouped in enumerate(groups)
            ]

            def tuplify(indexes, _):
                return QueryResult([
                    grouped.pile.sel(**indexes).item()
                    for grouped in child_groups
                ], grid_spec)

            return DatasetPile(
                child_groups[0].map(tuplify).pile,
                select_unique([grouped.geobox for grouped in groups]))

        else:
            raise VirtualProductException("virtual product was not validated")
Ejemplo n.º 19
0
def interval_uncertainty(polygon_id, item_polygon_path,
                         products=('ls5_pq_albers', 'ls7_pq_albers', 'ls8_pq_albers'),
                         time_period=('1986-01-01', '2017-01-01')):

    """
    This function uses the Digital Earth Australia archive to compute the standard deviation of tide heights for all
    Landsat observations that were used to generate the ITEM 2.0 composite layers and resulting tidal intervals. These
    standard deviations (one for each ITEM 2.0 interval) quantify the 'uncertainty' of each NIDEM elevation estimate:
    larger values indicate the ITEM interval was produced from a composite of images with a larger range of tide
    heights.

    Last modified: September 2018
    Author: Robbi Bishop-Taylor

    :param polygon_id:
        An integer giving the polygon ID of the desired ITEM v2.0 polygon to analyse.

    :param item_polygon_path:
        A string giving the path to the ITEM v2.0 polygon shapefile.

    :param products:
        An optional tuple of DEA Landsat product names used to calculate tide heights of all observations used
        to generate ITEM v2.0 tidal intervals. Defaults to ('ls5_pq_albers', 'ls7_pq_albers', 'ls8_pq_albers'),
        which loads Landsat 5, Landsat 7 and Landsat 8.

    :param time_period:
        An optional tuple giving the start and end date to analyse. Defaults to ('1986-01-01', '2017-01-01'), which
        analyses all Landsat observations from the start of 1986 to the end of 2016.

    :return:
        An array of shape (9,) giving the standard deviation of tidal heights for all Landsat observations used to
        produce each ITEM interval.

    """

    # Import tidal model data and extract geom and tide post
    item_gpd = gpd.read_file(item_polygon_path)
    lat, lon, poly = item_gpd[item_gpd.ID == int(polygon_id)][['lat', 'lon', 'geometry']].values[0]
    geom = geometry.Geometry(mapping(poly), crs=geometry.CRS(item_gpd.crs['init']))

    all_times_obs = list()

    # For each product:
    for source in products:

        # Use entire time range unless LS7
        time_range = ('1986-01-01', '2003-05-01') if source == 'ls7_pq_albers' else time_period

        # Determine matching datasets for geom area and group into solar day
        ds = dc.find_datasets(product=source, time=time_range, geopolygon=geom)
        group_by = query_group_by(group_by='solar_day')
        sources = dc.group_datasets(ds, group_by)

        # If data is found, add time to list then sort
        if len(ds) > 0:
            all_times_obs.extend(sources.time.data.astype('M8[s]').astype('O').tolist())

    # Calculate tide data from X-Y-time location
    all_times_obs = sorted(all_times_obs)
    tp_obs = [TimePoint(float(lon), float(lat), dt) for dt in all_times_obs]
    tides_obs = [tide.tide_m for tide in predict_tide(tp_obs)]

    # Covert to dataframe of observed dates and tidal heights
    df1_obs = pd.DataFrame({'Tide_height': tides_obs}, index=pd.DatetimeIndex(all_times_obs))


    ##################
    # ITEM intervals #
    ##################

    # Compute percentage tide height
    min_height = df1_obs.Tide_height.min()
    max_height = df1_obs.Tide_height.max()
    observed_range = max_height - min_height

    # Create dict of percentile values
    per10_dict = {perc + 1: min_height + observed_range * perc * 0.1 for perc in range(0, 10, 1)}

    # Bin each observation into an interval
    df1_obs['interval'] = pd.cut(df1_obs.Tide_height,
                                 bins=list(per10_dict.values()),
                                 labels=list(per10_dict.keys())[:-1])

    return df1_obs.groupby('interval').std().values.flatten()
Ejemplo n.º 20
0
    def group(self, datasets: VirtualDatasetBag,
              **search_terms: Dict[str, Any]) -> VirtualDatasetBox:
        """
        Datasets grouped by their timestamps.
        :param datasets: the `VirtualDatasetBag` to fetch data from
        :param query: to specify a spatial sub-region
        """
        grid_spec = datasets.grid_spec
        geopolygon = datasets.geopolygon

        if 'product' in self:
            # select only those inside the ROI
            # ROI could be smaller than the query for the `query` method
            if query_geopolygon(**search_terms) is not None:
                geopolygon = query_geopolygon(**search_terms)
                selected = list(
                    select_datasets_inside_polygon(datasets.pile, geopolygon))
            else:
                selected = list(datasets.pile)

            # geobox
            merged = merge_search_terms(
                select_keys(self, self._NON_SPATIAL_KEYS),
                select_keys(search_terms, self._NON_SPATIAL_KEYS))

            geobox = output_geobox(datasets=selected,
                                   grid_spec=grid_spec,
                                   geopolygon=geopolygon,
                                   **select_keys(merged, self._GEOBOX_KEYS))

            # group by time
            group_query = query_group_by(
                **select_keys(merged, self._GROUPING_KEYS))

            # information needed for Datacube.load_data
            return VirtualDatasetBox(
                Datacube.group_datasets(selected, group_query), geobox,
                datasets.product_definitions)

        elif 'transform' in self:
            return self._input.group(datasets, **search_terms)

        elif 'collate' in self:
            self._assert(
                'collate' in datasets.pile
                and len(datasets.pile['collate']) == len(self._children),
                "invalid dataset pile")

            def build(source_index, product, dataset_pile):
                grouped = product.group(
                    VirtualDatasetBag(dataset_pile, datasets.grid_spec,
                                      datasets.geopolygon,
                                      datasets.product_definitions),
                    **search_terms)

                def tag(_, value):
                    return {'collate': (source_index, value)}

                return grouped.map(tag)

            groups = [
                build(source_index, product, dataset_pile)
                for source_index, (product, dataset_pile) in enumerate(
                    zip(self._children, datasets.pile['collate']))
            ]

            return VirtualDatasetBox(
                xarray.concat([grouped.pile for grouped in groups],
                              dim='time'),
                select_unique([grouped.geobox for grouped in groups]),
                merge_dicts(
                    [grouped.product_definitions for grouped in groups]))

        elif 'juxtapose' in self:
            self._assert(
                'juxtapose' in datasets.pile
                and len(datasets.pile['juxtapose']) == len(self._children),
                "invalid dataset pile")

            groups = [
                product.group(
                    VirtualDatasetBag(dataset_pile, datasets.grid_spec,
                                      datasets.geopolygon,
                                      datasets.product_definitions),
                    **search_terms) for product, dataset_pile in zip(
                        self._children, datasets.pile['juxtapose'])
            ]

            aligned_piles = xarray.align(*[grouped.pile for grouped in groups])

            def tuplify(indexes, _):
                return {
                    'juxtapose':
                    [pile.sel(**indexes).item() for pile in aligned_piles]
                }

            return VirtualDatasetBox(
                xr_apply(aligned_piles[0], tuplify),
                select_unique([grouped.geobox for grouped in groups]),
                merge_dicts(
                    [grouped.product_definitions for grouped in groups]))

        else:
            raise VirtualProductException("virtual product was not validated")