Example #1
def main():
    args = gen_args()
    if not args.geojson:
        raise ValueError("No geojson file specified")
    if not args.outfile:
        raise ValueError("No output file specified")
    print('Using tiling grid ' + args.grid)
    with fsspec.open(args.geojson) as fhin:
        data = json.load(fhin)

    geom = Geometry(data["features"][0]["geometry"],

    africa = GRIDS[args.grid]
    task_df = pd.read_csv(args.task_csv)
    aez_tasks = []
    for row in task_df.itertuples():
        tmp_geom = africa.tile_geobox((row.X, row.Y)).extent
        if geom.contains(tmp_geom) or geom.overlaps(tmp_geom):
    output_df = pd.DataFrame(aez_tasks)
    output_df.to_csv(args.outfile, index=False)
    print("Generated " + str(len(output_df)) + " tasks from geojson")

    if args.publish:
        tasks_slices = gen_slices(output_df)
        publish_task(tasks_slices, args.db, args.sqs)
Example #2
def geom_from_file(filename, feature_id):
    The geometry of a feature
    :param filename: name of shape file
    :param feature_id: the id of the wanted feature
    import fiona

    with fiona.open(filename) as input_region:
        geom_list = []
        geopolygon_list = []
        feature_list = []
        crs = CRS(input_region.crs_wkt)
        for feature in input_region:
            if feature_id is not None and feature_id != {}:
                if feature['properties']['ID'] in feature_id:
                    geom = feature['geometry']
                    return feature['properties'], geom, input_region.crs_wkt, Geometry(geom, crs)
                geom = feature['geometry']
                geopolygon_list.append(Geometry(geom, crs))

        return feature_list, geom_list, input_region.crs_wkt, geopolygon_list

    _LOG.info("No geometry found")
Example #3
def geom_from_file(filename, feature_id):
    The geometry of a feature
    :param filename: name of shape file
    :param feature_id: the id of the wanted feature
    import fiona

    geometry_list = []
    geopolygon_list = []
    feature_list = []
    find_feature = False

    with fiona.open(filename) as input_region:
        crs = CRS(input_region.crs_wkt)
        for feature in input_region:
            find_feature = False
            properties = feature['properties']
            if feature_id is None or properties.get(
                    'ID') in feature_id or properties.get('id') in feature_id:
                find_feature = True
            if int(feature.get('id')) in feature_id:
                find_feature = True
            if find_feature:
                geometry = feature['geometry']
                geopolygon = Geometry(geometry, crs)

        if not geometry_list:
            _LOG.info("No geometry found")

        return feature_list, geometry_list, input_region.crs_wkt, geopolygon_list
Example #4
    def tiles_from_geopolygon(
        geopolygon: geometry.Geometry,
        tile_buffer: Optional[Tuple[float, float]] = None,
        geobox_cache: Optional[dict] = None
    ) -> Iterator[Tuple[Tuple[int, int], geometry.GeoBox]]:
        Returns an iterator of tile_index, :py:class:`GeoBox` tuples across
        the grid and overlapping with the specified `geopolygon`.

        .. note::

           Grid cells are referenced by coordinates `(x, y)`, which is the opposite to the usual CRS
           dimension order.

        :param geometry.Geometry geopolygon: Polygon to tile
        :param tile_buffer: Optional <float,float> tuple, (extra padding for the query
                            in native units of this GridSpec)
        :param dict geobox_cache: Optional cache to re-use geoboxes instead of creating new one each time
        :return: iterator of grid cells with :py:class:`GeoBox` tiles
        geopolygon = geopolygon.to_crs(self.crs)
        bbox = geopolygon.boundingbox
        bbox = bbox.buffered(*tile_buffer) if tile_buffer else bbox

        for tile_index, tile_geobox in self.tiles(bbox, geobox_cache):
            tile_geobox = tile_geobox.buffered(
                *tile_buffer) if tile_buffer else tile_geobox

            if geometry.intersects(tile_geobox.extent, geopolygon):
                yield (tile_index, tile_geobox)
Example #5
def boundary_geo_polygon(geometry, crs):
    import shapely.ops
    from shapely.geometry import shape, mapping
    joined = shapely.ops.unary_union(list(shape(geom) for geom in geometry))
    final = joined.convex_hull
    boundary_polygon = Geometry(mapping(final), crs)
    return boundary_polygon
Example #6
    def __call__(self, product, time, group_by) -> Tile:
        # Do for a specific poly whose boundary is known
        output_crs = CRS(self.storage['crs'])
        filtered_item = [
            'geopolygon', 'lon', 'lat', 'longitude', 'latitude', 'x', 'y'
        filtered_dict = {
            k: v
            for k, v in filter(lambda t: t[0] in filtered_item,
        if 'feature_id' in self.input_region:
            filtered_dict['geopolygon'] = Geometry(
            geopoly = filtered_dict['geopolygon']
            geopoly = query_geopolygon(**self.input_region)
        datasets = self.dc.find_datasets(product=product,
        group_by = query_group_by(group_by=group_by)
        sources = self.dc.group_datasets(datasets, group_by)
        output_resolution = [
            self.storage['resolution'][dim] for dim in output_crs.dimensions
        geopoly = geopoly.to_crs(output_crs)
        geobox = GeoBox.from_geopolygon(geopoly, resolution=output_resolution)

        return Tile(sources, geobox)
Example #7
    def footprint_wgs84(self) -> Optional[MultiPolygon]:
        if not self.footprint_geometry:
            return None
        if not self.footprint_crs:
            warnings.warn(f"Geometry without a crs for {self}")
            return None

        return (Geometry(self.footprint_geometry,
                             "EPSG:4326", wrapdateline=True).geom)
Example #8
def eo3_lonlat_bbox(doc, tol=None):
    epsg4326 = CRS('epsg:4326')
    crs = CRS(doc['crs'])
    grids = doc['grids']
    geometry = doc.get('geometry')
    if geometry is None:
        return bbox_union(
            grid2polygon(grid, crs).to_crs(epsg4326, tol).boundingbox
            for grid in grids.values())
        return Geometry(geometry, crs).to_crs(epsg4326, tol).boundingbox
def transform_geojson_wgs_to_epsg(geojson, EPSG):
    Takes a geojson dictionary and converts it from WGS84 (EPSG:4326) to desired EPSG

    geojson: dict
        a geojson dictionary containing a 'geometry' key, in WGS84 coordinates
    EPSG: int
        numeric code for the EPSG coordinate referecnce system to transform into

    transformed_geojson: dict
        a geojson dictionary containing a 'coordinates' key, in the desired CRS

    gg = Geometry(geojson['geometry'], CRS('epsg:4326'))
    gg = gg.to_crs(CRS(f'epsg:{EPSG}'))
    return gg.__geo_interface__
Example #10
def boundary_polygon_from_file(filename: str) -> Geometry:
    # TODO: This should be refactored and moved into datacube.utils.geometry
    import shapely.ops
    from shapely.geometry import shape, mapping
    with fiona.open(filename) as input_region:
        joined = shapely.ops.unary_union(
            list(shape(geom['geometry']).buffer(0) for geom in input_region))
        final = joined.convex_hull
        crs = CRS(input_region.crs_wkt)
        boundary_polygon = Geometry(mapping(final), crs)
    return boundary_polygon
def _get_shape(geometry: WKBElement, crs) -> Optional[Geometry]:
    Our shapes are valid in the db, but can become invalid on
    reprojection. We buffer if needed.

    Eg invalid. 32baf68c-7d91-4e13-8860-206ac69147b0

    (the tests reproduce this error.... but it may be machine/environment dependent?)
    if geometry is None:
        return None

    shape = Geometry(to_shape(geometry), crs).to_crs("EPSG:4326", wrapdateline=True)

    if not shape.is_valid:
        newshape = shape.buffer(0)
        assert math.isclose(
            shape.area, newshape.area, abs_tol=0.0001
        ), f"{shape.area} != {newshape.area}"
        shape = newshape
    return shape
Example #12
def geom_from_bbox(bbox, crs="EPSG:4326"):
    geojson = {
        "coordinates": [[
    return Geometry(geojson, crs=crs)
Example #13
def test_wofs():
    catalog = read_process_catalog("datacube-wps-config.yaml")
    wofs = [entry for entry in catalog if isinstance(entry, WOfSDrill)][0]
    point = Geometry(
            "type": "Point",
            "coordinates": [137.01475095074406, -28.752777955850917, 0]
    results = wofs.query_handler(time="2000", feature=point)
    assert "data" in results
    assert "chart" in results
Example #14
def select_task_generator(input_region, storage, filter_product):
    if input_region is None or input_region == {}:
            'No input_region specified. Generating full available spatial region, gridded files.'
        return GriddedTaskGenerator(storage)

    elif 'geometry' in input_region:  # Larger spatial region
        # A large, multi-tile input region, specified as geojson. Output will be individual tiles.
        geometry = Geometry(input_region['geometry'],
                            CRS('EPSG:4326'))  # GeoJSON is always 4326
        return GriddedTaskGenerator(storage,

    elif 'tile' in input_region:  # For one tile
        return GriddedTaskGenerator(storage,

    elif 'tiles' in input_region:  # List of tiles
        return GriddedTaskGenerator(storage,

    elif 'from_file' in input_region:
        _LOG.info('Input spatial region specified by file: %s',

        if 'feature_id' in input_region or input_region.get(
                'gridded') is False:
            _LOG.info('Generating tasks based on feature polygons.')
            features = features_from_file(input_region['from_file'],

            return NonGriddedTaskGenerator(input_region=input_region,

            _LOG.info('Generating tasks based on grid.')
            geometry = boundary_polygon_from_file(input_region['from_file'])
            return GriddedTaskGenerator(storage, geopolygon=geometry)
            'Generating statistics for an ungridded `input region`. Output as a single file.'
        return NonGriddedTaskGenerator(input_region=input_region,
Example #15
def eo3_lonlat_bbox(doc, tol=None):
    epsg4326 = CRS('epsg:4326')
    crs = doc.get('crs')
    grids = doc.get('grids')

    if crs is None or grids is None:
        raise ValueError("Input must have crs and grids")

    crs = CRS(crs)
    geometry = doc.get('geometry')
    if geometry is None:
        return bbox_union(
            grid2polygon(grid, crs).to_crs(epsg4326, tol).boundingbox
            for grid in grids.values())
        return Geometry(geometry, crs).to_crs(epsg4326, tol).boundingbox
Example #16
def simplify_geom(geom_in, crs="EPSG:4326"):
    geom = geom_in
    # Pick biggest polygon from multipolygon
    if geom.type == "MultiPolygon":
        geom = max(geom, key=lambda x: x.area)
    # Triangulate
    rawtriangles = list(triangulate(geom.geom))
    triangles = list(
            lambda x: geom_in.geom.contains(x.representative_point()) and x.
            area / geom.area > 0.1,
    geom = unary_union(triangles)
    if geom.type == "MultiPolygon":
        geom = max(geom, key=lambda x: x.area)
    return Geometry(geom, crs=crs)
Example #17
    def footprint_wgs84(self) -> Optional[MultiPolygon]:
        if not self.footprint_geometry:
            return None
        if not self.footprint_crs:
            warnings.warn(f"Geometry without a crs for {self}")
            return None

        _crs = self.footprint_crs
            # If defined, EPSG is customized
            _crs = CustomCRSConfigHandlerSingleton(
        except RuntimeError as re:

        return (Geometry(self.footprint_geometry,
                         _crs)  # crs=self.footprint_crs)
                .to_crs("EPSG:4326", wrapdateline=True).geom)
Example #18
def test_fc():
    catalog = read_process_catalog("datacube-wps-config.yaml")
    fc = [entry for entry in catalog if isinstance(entry, FCDrill)][0]
    poly = Geometry(
            "coordinates": [[
                (147.28271484375003, -35.89238773935897),
                (147.03277587890628, -35.663990911348115),
                (146.65237426757815, -35.90684930677119),
                (147.09182739257815, -36.15894422111004),
                (147.28271484375003, -35.89238773935897),
    results = fc.query_handler(time=("2019-03-05", "2019-07-10"), feature=poly)
    assert "data" in results
    assert "chart" in results
Example #19
def test_mangrove():
    catalog = read_process_catalog("datacube-wps-config.yaml")
    fc = [entry for entry in catalog if isinstance(entry, MangroveDrill)][0]
    poly = Geometry(
            "coordinates": [[
                (143.98956298828125, -14.689881366618762),
                (144.26422119140625, -14.689881366618762),
                (144.26422119140625, -14.394778454856146),
                (143.98956298828125, -14.394778454856146),
                (143.98956298828125, -14.689881366618762),
    results = fc.query_handler(time=("2000", "2005"), feature=poly)
    assert "data" in results
    assert "chart" in results
Example #20
def _parse_geom(request_json):
    features = request_json["features"]
    if len(features) < 1:
        # can't drill if there is no geometry
        raise ProcessError("no features specified")

    if len(features) > 1:
        # do we need multipolygon support here?
        raise ProcessError("multiple features specified")

    feature = features[0]

    if hasattr(request_json, "crs"):
        crs = CRS(request_json["crs"]["properties"]["name"])
    elif hasattr(feature, "crs"):
        crs = CRS(feature["crs"]["properties"]["name"])
        # http://geojson.org/geojson-spec.html#coordinate-reference-system-objects
        crs = CRS("urn:ogc:def:crs:OGC:1.3:CRS84")

    return Geometry(feature["geometry"], crs)
Example #21
def cal_mean_std(query_poly):
    landsat_yaml = 'nbart_ld.yaml'
    with open(landsat_yaml, 'r') as f:
        recipe = yaml.safe_load(f)
    landsat_product = construct(**recipe)
    query = {'time': ('1987-01-01', '2000-01-01')}
    location = {'geopolygon': Geometry(query_poly, CRS("EPSG:3577"))}

    dc = Datacube()
    datasets = landsat_product.query(dc, **query)
    grouped = landsat_product.group(datasets, **query)
    _LOG.debug("datasets %s", grouped)

    mask = generate_raster([(query_poly, 1)], grouped.geobox)
    coastline_mask = clip_coastline(grouped.geobox)
    mask[coastline_mask == 0] = 0
    _LOG.debug("mask size %s none zero %s", mask.size, np.count_nonzero(mask))
    if np.count_nonzero(mask) == 0:
        return [], []

    darkest_mean = []
    time_mark = []
    future_list = []

    with MPIPoolExecutor() as executor:
        for i in range(grouped.box.time.shape[0]):
            time_slice = VirtualDatasetBox(grouped.box.sel(time=grouped.box.time.data[i:i+1]), grouped.geobox,
                            grouped.load_natively, grouped.product_definitions, grouped.geopolygon)
            future = executor.submit(load_cal, landsat_product, time_slice, mask)

    for future in future_list:
        r = future.result()
        if r[1] is not None:
            _LOG.debug("darkest time %s", r[0])
            _LOG.debug("darkest mean %s", r[1])
    return time_mark, darkest_mean
Example #22
def eo3_lonlat_bbox(doc: Dict[str, Any],
                    resolution: Optional[float] = None) -> BoundingBox:
    """ Compute bounding box in Lon/Lat for a given EO3 document.
    crs = doc.get('crs')
    grids = doc.get('grids')

    if crs is None or grids is None:
        raise ValueError("Input must have crs and grids")

    crs = CRS(crs)
    geom = doc.get('geometry', None)
    if geom is not None:
        geom = Geometry(geom, crs)
        return lonlat_bounds(geom, resolution=resolution)

    bounds = [
        lonlat_bounds(grid2polygon(grid, crs), resolution=resolution)
        for grid in grids.values()

    return bbox_union(bounds)
Example #23
def post_processing(
    data: xr.Dataset,
    predicted: xr.Dataset,
    config: FeaturePathConfig,
    geobox_used: GeoBox,
) -> xr.DataArray:
    filter prediction results with post processing filters.
    :param data: raw data with all features to run prediction
    :param predicted: The prediction results
    :param config:  FeaturePathConfig configureation
    :param geobox_used: Geobox used to generate the prediciton feature
    :return: only predicted binary class label
    # post prediction filtering
    predict = predicted.Predictions
    query = config.query.copy()
    # Update dc query with geometry
    # geobox_used = self.geobox_dict[(x, y)]
    query["geopolygon"] = Geometry(geobox_used.extent.geom,

    dc = Datacube(app=__name__)
    # mask with WOFS
    # wofs_query = query.pop("measurements")
    wofs = dc.load(product="ga_ls8c_wofs_2_summary", **query)
    wofs = wofs.frequency > 0.2  # threshold
    predict = predict.where(~wofs, 0)

    # mask steep slopes
    slope = data.slope > 35
    predict = predict.where(~slope, 0)

    # mask where the elevation is above 3600m
    elevation = dc.load(product="srtm", **query)
    elevation = elevation.elevation > 3600
    predict = predict.where(~elevation.squeeze(), 0)
    return predict
Example #24
def load_process_save_chunk(output_files: OutputDriver,
                            chunk: Tuple[slice, slice, slice],
                            task: StatsTask, timer: MultiTimer):
        with timer.time('loading_data'):
            data = load_data(chunk, task.sources)
            # mask as per geometry now
            if task.geom_feat:
                geom = Geometry(task.geom_feat, CRS(task.crs_txt))
                data = data.where(geometry_mask([geom], data.geobox, invert=True))
            # pylint: disable=protected-access
            if output_files._driver_name == 'None':
                output_files.get_source(chunk, data)

        last_idx = len(task.output_products) - 1
        for idx, (prod_name, stat) in enumerate(task.output_products.items()):
            _LOG.debug("Computing %s in tile %s %s; %s",
                       prod_name, task.tile_index,
                       "({})".format(", ".join(prettier_slice(c) for c in chunk)),

            measurements = stat.data_measurements

            with timer.time(prod_name):
                result = stat.compute(data)

                if idx == last_idx:  # make sure input data is released early
                    del data

                # restore nodata values back
                result = cast_back(result, measurements)

            # For each of the data variables, shove this chunk into the output results
            with timer.time('writing_data'):
                output_files.write_chunk(prod_name, chunk, result)

    except EmptyChunkException:
        _LOG.debug('Error: No data returned while loading %s for %s. May have all been masked',
                   chunk, task)
Example #25
def load_process_save_chunk(output_files: OutputDriver,
                            chunk: Tuple[slice, slice, slice], task: StatsTask,
                            timer: MultiTimer):
        with timer.time('loading_data'):
            data = load_data(chunk, task.sources)

            # mask as per geometry now
            if task.geom_feat:
                geom = Geometry(task.geom_feat, CRS(task.crs_txt))
                data = data.where(
                    geometry_mask([geom], data.geobox, invert=True))

        last_idx = len(task.output_products) - 1
        for idx, (prod_name, stat) in enumerate(task.output_products.items()):
            _LOG.info("Computing %s in tile %s %s. Current timing: %s",
                      prod_name, task.tile_index, chunk, timer)

            measurements = stat.data_measurements
            with timer.time(prod_name):
                result = stat.compute(data)

                if idx == last_idx:  # make sure input data is released early
                    del data

                # restore nodata values back
                result = cast_back(result, measurements)

            # For each of the data variables, shove this chunk into the output results
            with timer.time('writing_data'):
                for var_name, var in result.data_vars.items(
                ):  # TODO: Move this loop into output_files
                    output_files.write_data(prod_name, var_name, chunk,

    except EmptyChunkException:
            'Error: No data returned while loading %s for %s. May have all been masked',
            chunk, task)
Example #26
def gwf_query(product, lat=None, long=None, region=None, begin=None, end=None,
    """Run a spatial query on a datacube product using either coordinates or a region name

    Wrapper function to call at the begining of nearly all spatial processing command lines

        product (str): Name of an ingested datacube product. The product to query
        lat (tuple): OPtional. For coordinate based spatial query. Tuple of min and max
            latitudes in decimal degreees.
        long (tuple): OPtional. For coordinate based spatial query. Tuple of min and max
            longitudes in decimal degreees.
        region (str): Optional name of a region or country whose geometry is present in the database
            region  or country table. Overrides lat and long when present (not None).
            Countries must be queried using ISO code (e.g.: 'MEX' for Mexico)
        begin (str): Date string in the form '%Y-%m-%d'. For temporally bounded queries
        end (str): Date string in the form '%Y-%m-%d'. For temporally bounded queries
        view (bool): Returns a view instead of the dictionary returned by ``GridWorkflow.list_cells``.
            Useful when the output is be used directly as an iterable (e.g. in ``distributed.map``)
            Default to True

        dict or view: Dictionary (view) of Tile index, Tile key value pair


        >>> from madmex.wrappers import gwf_query

        >>> # Using region name, time unbounded
        >>> tiles_list = gwf_query(product='ls8_espa_mexico', region='Jalisco')
        >>> # Using region name, time windowed
        >>> tiles_list = gwf_query(product='ls8_espa_mexico', region='Jalisco',
        ...                        begin = '2017-01-01', end='2017-03-31')
        >>> # Using lat long box, time windowed
        >>> tiles_list = gwf_query(product='ls8_espa_mexico', lat=[19, 22], long=[-104, -102],
        ...                        begin = '2017-01-01', end='2017-03-31')
    query_params = {'product': product}
    if region is not None:
       # Query database and build a datacube.utils.Geometry(geopolygon)
           query_set = Country.objects.get(name=region)
       except Country.DoesNotExist:
           query_set = Region.objects.get(name=region)
       region_json = json.loads(query_set.the_geom.geojson)
       crs = CRS('EPSG:%d' % query_set.the_geom.srid)
       geom = Geometry(region_json, crs)
    elif lat is not None and long is not None:
        query_params.update(x=long, y=lat)
        raise ValueError('Either a region name or a lat and long must be provided')

    if begin is not None and end is not None:
        begin = datetime.strptime(begin, "%Y-%m-%d")
        end = datetime.strptime(end, "%Y-%m-%d")
        query_params.update(time=(begin, end))

    # GridWorkflow object
    dc = datacube.Datacube()
    gwf = GridWorkflow(dc.index, product=product)
    tile_dict = gwf.list_cells(**query_params)
    # Iterable (dictionary view (analog to list of tuples))
    if view:
        tile_dict = tile_dict.items()
    return tile_dict
class FakeDataset:
    extent = Geometry(BIG_POLYGON, crs=CRS('EPSG:4326'))
    center_time = object()
    crs = CRS('EPSG:4326')
Example #28
 def geopolygon(self):
     return Geometry(self.geometry, self.crs)
Example #29
def mv_search_datasets(index,
    Perform a dataset query via the space_time_view

    :param layer: A ows_configuration.OWSNamedLayer object (single or multiproduct)
    :param index: A datacube index (required)

    :param sel: Selection mode - a MVSelectOpts enum. Defaults to IDS.
    :param times: A list of pairs of datetimes (with time zone)
    :param geom: A datacube.utils.geometry.Geometry object
    :param mask: Bool, if true use the flags product of layer

    :return: See MVSelectOpts doc
    engine = get_sqlalc_engine(index)
    stv = st_view
    if layer is None:
        raise Exception("Must filter by product/layer")
    if mask:
        prod_ids = [p.id for p in layer.pq_products]
        prod_ids = [p.id for p in layer.products]

    s = select(sel.sel(stv)).where(stv.c.dataset_type_ref.in_(prod_ids))
    if times is not None:
        s = s.where(
                for t in times
    orig_crs = None
    if geom is not None:
        orig_crs = geom.crs
        if str(geom.crs) != "EPSG:4326":
            geom = geom.to_crs("EPSG:4326")
        geom_js = json.dumps(geom.json)
        s = s.where(stv.c.spatial_extent.intersects(geom_js))
    # print(s) # Print SQL Statement
    conn = engine.connect()
    if sel == MVSelectOpts.ALL:
        return conn.execute(s)
    if sel == MVSelectOpts.IDS:
        return [r[0] for r in conn.execute(s)]
    if sel in (MVSelectOpts.COUNT, MVSelectOpts.EXTENT):
        for r in conn.execute(s):
            if sel == MVSelectOpts.COUNT:
                return r[0]
            if sel == MVSelectOpts.EXTENT:
                geojson = r[0]
                if geojson is None:
                    return None
                uniongeom = ODCGeom(json.loads(geojson), crs="EPSG:4326")
                if geom:
                    intersect = uniongeom.intersection(geom)
                    if orig_crs and orig_crs != "EPSG:4326":
                        intersect = intersect.to_crs(orig_crs)
                    intersect = uniongeom
                return intersect
    if sel == MVSelectOpts.DATASETS:
        return index.datasets.bulk_get([r[0] for r in conn.execute(s)])
    assert False
Example #30
    def extract_geometry(state):
        from datacube.utils.geometry import Geometry
        from datacube.testutils.geom import epsg4326

        return Geometry(state.selection, epsg4326)