Ejemplo n.º 1
0
    def warped_datacube(self) -> Collection:
        """Retrieve cached datacube defintion."""
        if not self._warped:
            if self.properties.get('reuse_from'):
                reused_datacube: Collection = Collection.query().filter(
                    Collection.name == self.properties['reuse_from']).first()

                if reused_datacube is None:
                    raise RuntimeError(f'Data cube {self.properties["reuse_from"]} not found.')

                if reused_datacube.composite_function.alias != 'IDT':
                    raise RuntimeError(f'Data cube {self.properties["reuse_from"]} must be IDT.')

                if reused_datacube.grid_ref_sys_id != self.datacube.grid_ref_sys_id:
                    raise RuntimeError(
                        f'The grid of data cube {self.datacube.name} and {reused_datacube.name} mismatch.')

                self.reused_datacube = reused_datacube
                # set warped_collection to reused

                if self.params['force']:
                    raise RuntimeError(
                        f'Cannot use flag --force to dispatch data cube derived from {reused_datacube.name}')

                self._warped = reused_datacube
            else:
                datacube_warped = get_cube_id(self.datacube.name)

                self._warped = Collection.query().filter(Collection.name == datacube_warped).first()

        return self._warped
Ejemplo n.º 2
0
 def get_cube_or_404(cube_id: Union[int, str] = None,
                     cube_full_name: str = '-'):
     if cube_id:
         return Collection.query().filter(
             Collection.id == cube_id).first_or_404()
     else:
         cube_name, cube_version = cube_full_name.split('-')
         return Collection.query().filter(
             Collection.name == cube_name,
             Collection.version == cube_version).first_or_404()
 def get_cube_or_404(cube_id=None, cube_full_name: str = '-'):
     """Try to retrieve a data cube on database and raise 404 when not found."""
     if cube_id:
         return Collection.query().filter(
             Collection.id == cube_id).first_or_404()
     else:
         cube_fragments = cube_full_name.split('-')
         cube_name = '-'.join(cube_fragments[:-1])
         cube_version = cube_fragments[-1]
         return Collection.query().filter(
             Collection.name == cube_name,
             Collection.version == cube_version).first_or_404()
    def list_cubes(self):
        """Retrieve the list of data cubes from Brazil Data Cube database."""
        cubes = Collection.query().filter(
            Collection.collection_type == 'cube').all()

        serializer = CollectionForm()

        list_cubes = []
        for cube in cubes:
            cube_dict = serializer.dump(cube)
            cube_name = cube.name

            if cube.composite_function.alias == 'IDT':
                cube_name += '_'

            activities = self.services.get_control_activities(cube_name)
            count = int(
                sum([a['tobe_done'] for a in activities if 'tobe_done' in a]))
            done = int(sum([a['mycount'] for a in activities]))
            errors = int(sum([a['erros'] for a in activities]))
            not_done = count - done - errors

            cube_dict[
                'status'] = 'Error' if errors > 0 else 'Pending' if not_done > 0 else 'Finished'

            cube_dict['timeline'] = [
                t['time_inst'] for t in cube_dict['timeline']
            ]
            list_cubes.append(cube_dict)

        return list_cubes, 200
Ejemplo n.º 5
0
    def warped_datacube(self) -> Collection:
        """Retrieve cached datacube defintion."""
        if not self._warped:
            datacube_warped = get_cube_id(self.datacube.name)

            self._warped = Collection.query().filter(
                Collection.name == datacube_warped).first()

        return self._warped
Ejemplo n.º 6
0
    def validate_provider(cls, collection_id):
        """Check if the given collection has any provider set."""
        collection = Collection.query().filter(Collection.id == collection_id).first_or_404()

        collector_extension: CollectorExtension = current_app.extensions['bdc:collector']

        download_order = collector_extension.get_provider_order(collection, lazy=True)

        if len(download_order) == 0:
            abort(400, f'Collection {collection.name} does not have any data provider set.')
Ejemplo n.º 7
0
    def correction(self, scene):
        """Apply atmospheric correction on collection.

        Args:
            scene - Serialized Activity
        """
        logging.debug('Starting Correction Sentinel...')

        # Get Resolver for Landsat scene level 2
        sentinel_scene = factory.get_from_sceneid(scene['sceneid'], level=2)

        collection = Collection.query().filter(
            Collection.name == sentinel_scene.id).first()

        # Set Collection to the Sentinel Surface Reflectance
        scene['collection_id'] = collection.id
        scene['activity_type'] = 'correctionS2'

        # Create/update activity
        self.create_execution(scene)

        synchronizer = DataSynchronizer(scene['args']['compressed_file'])

        try:
            output_dir = sentinel_scene.path()

            synchronizer.check_data()

            # TODO: Add the sen2cor again as optional processor
            correction_result = correction_laSRC(
                scene['args']['compressed_file'], str(output_dir))

            if DataSynchronizer.is_remote_sync_configured():
                synchronizer.sync_data(correction_result, auto_remove=True)
        except BaseException as e:
            logging.error(
                'An error occurred during task execution - {}'.format(
                    scene.get('sceneid')))
            raise e
        finally:
            if DataSynchronizer.is_remote_sync_configured():
                synchronizer.remove_data(raise_error=False)

                logging.info(
                    f'File {scene["args"].get("compressed_file")} removed.')

        scene['args']['level'] = 2
        scene['args']['file'] = correction_result
        scene['activity_type'] = 'publishS2'

        return scene
Ejemplo n.º 8
0
    def list_cubes(self):
        """Retrieve the list of data cubes from Brazil Data Cube database."""
        cubes = Collection.query().filter(
            Collection.collection_type == 'cube').all()

        serializer = CollectionForm()

        list_cubes = []
        for cube in cubes:
            cube_dict = serializer.dump(cube)
            not_done = 0
            sum_acts = 0
            error = 0
            if cube.composite_function.alias != 'IDT':
                activities = self.services.get_activities_by_datacube(
                    cube.name)
                not_done = len(
                    list(
                        filter(lambda i: i['mystatus'] == 'NOTDONE',
                               activities)))
                error = len(
                    list(filter(lambda i: i['mystatus'] == 'ERROR',
                                activities)))
                sum_acts += len(activities)

            parts = get_cube_parts(cube.name)
            data_cube_identity = '_'.join(parts[:2])
            activities = self.services.get_activities_by_datacube(
                data_cube_identity)
            not_done_identity = len(
                list(filter(lambda i: i['mystatus'] == 'NOTDONE', activities)))
            error_identity = len(
                list(filter(lambda i: i['mystatus'] == 'ERROR', activities)))
            sum_acts += len(activities)

            cube_dict['status'] = 'Pending'
            if sum_acts > 0:
                sum_not_done = not_done + not_done_identity
                sum_errors = error + error_identity
                cube_dict['status'] = 'Error' if sum_errors > 0 else 'Finished' \
                    if (sum_not_done + sum_errors) == 0 else 'Pending'
            list_cubes.append(cube_dict)

        return list_cubes, 200
Ejemplo n.º 9
0
    def list_cubes(cls):
        """Retrieve the list of data cubes from Brazil Data Cube database."""
        cubes = Collection.query().filter(Collection.collection_type == 'cube').all()

        serializer = CollectionForm()

        list_cubes = []

        for cube in cubes:
            cube_dict = serializer.dump(cube)

            # list_tasks = list_pending_tasks() + list_running_tasks()
            # count_tasks = len(list(filter(lambda t: t['collection_id'] == cube.name, list_tasks)))
            count_tasks = 0

            cube_dict['status'] = 'Finished' if count_tasks == 0 else 'Pending'

            list_cubes.append(cube_dict)

        return list_cubes, 200
Ejemplo n.º 10
0
def download(scene_ids):
    """Download the Landsat-8 products using scene id.

    TODO: Support Sentinel 2 and Landsat 5/7.
    """
    from bdc_catalog.models import Collection
    from .collections.business import RadcorBusiness
    from .collections.landsat.utils import LandsatSurfaceReflectance08, factory
    from .collections.utils import get_earth_explorer_api, EARTH_EXPLORER_DOWNLOAD_URI, EARTH_EXPLORER_PRODUCT_ID
    from .utils import initialize_factories

    initialize_factories()

    scenes = scene_ids.split(',')

    api = get_earth_explorer_api()

    dataset = 'LANDSAT_8_C1'

    collection = Collection.query().filter(
        Collection.name == LandsatSurfaceReflectance08.id).first_or_404()

    for scene in scenes:
        landsat_scene_level_1 = factory.get_from_sceneid(scene_id=scene,
                                                         level=1)

        formal = api.lookup(dataset, [scene], inverse=True)

        link = EARTH_EXPLORER_DOWNLOAD_URI.format(
            folder=EARTH_EXPLORER_PRODUCT_ID[dataset], sid=formal[0])

        activity = dict(collection_id=collection.id,
                        activity_type='downloadLC8',
                        tags=[],
                        sceneid=scene,
                        scene_type='SCENE',
                        args=dict(link=link))

        _ = RadcorBusiness.create_activity(activity)

        RadcorBusiness.start(activity)
Ejemplo n.º 11
0
def priority(collection_id, scene_id, output):
    """Download a scene seeking in CollectionProviders.

    Notes:
        You must configure the BDC-Catalog.

    Args:
        collection_id - Collection Identifier
        scene_id - A scene identifier (Landsat Scene Id/Sentinel Scene Id, etc)
        output - Directory to save.
    """
    ext = current_app.extensions['bdc:collector']

    collection = Collection.query().get(collection_id)

    order = ext.get_provider_order(collection)

    for driver in order:
        try:
            file_destination = driver.download(scene_id, output=output)
        except Exception as e:
            logging.warning(f'Download error for provider {driver.provider_name} - {str(e)}')
Ejemplo n.º 12
0
    def post_publish(self, scene):
        logging.info(f'Applying post-processing for {scene["sceneid"]}')
        collection = Collection.query().filter(
            Collection.id == scene['collection_id']).first()

        assets = scene['args']['assets']

        synchronizer = DataSynchronizer(scene['args']['file'])

        if DataSynchronizer.is_remote_sync_configured():
            synchronizer.check_data()

        for entry in assets.values():
            if entry['file'].endswith('Fmask4.tif'):
                post_processing(entry['file'], collection, assets, 10)

        if DataSynchronizer.is_remote_sync_configured():
            synchronizer.sync_data(bucket=Config.AWS_BUCKET_NAME,
                                   auto_remove=True)
            synchronizer.remove_data(raise_error=False)

        return scene
Ejemplo n.º 13
0
def get_collection_items(
    collection_id=None,
    roles=None,
    item_id=None,
    bbox=None,
    datetime=None,
    ids=None,
    collections=None,
    intersects=None,
    page=1,
    limit=10,
    query=None,
    **kwargs,
) -> Pagination:
    """Retrieve a list of collection items based on filters.

    :param collection_id: Single Collection ID to include in the search for items.
                          Only Items in one of the provided Collection will be searched, defaults to None
    :type collection_id: str, optional
    :param item_id: item identifier, defaults to None
    :type item_id: str, optional
    :param bbox: bounding box for intersection [west, north, east, south], defaults to None
    :type bbox: list, optional
    :param datetime: Single date+time, or a range ('/' seperator), formatted to RFC 3339, section 5.6.
                     Use double dots '..' for open date ranges, defaults to None. If the start or end date of an image
                     generated by a temporal composition intersects the given datetime or range it will be included in the
                     result.
    :type datetime: str, optional
    :param ids: Array of Item ids to return. All other filter parameters that further restrict the
                number of search results are ignored, defaults to None
    :type ids: list, optional
    :param collections: Array of Collection IDs to include in the search for items.
                        Only Items in one of the provided Collections will be searched, defaults to None
    :type collections: list, optional
    :param intersects: Searches items by performing intersection between their geometry and provided GeoJSON geometry.
                       All GeoJSON geometry types must be supported., defaults to None
    :type intersects: dict, optional
    :param page: The page offset of results, defaults to 1
    :type page: int, optional
    :param limit: The maximum number of results to return (page size), defaults to 10
    :type limit: int, optional
    :return: list of collectio items
    :rtype: list
    """
    columns = [
        func.concat(Collection.name, "-",
                    Collection.version).label("collection"),
        Collection.collection_type,
        Collection._metadata.label("meta"),
        Item._metadata.label("item_meta"),
        Item.name.label("item"),
        Item.id,
        Item.collection_id,
        Item.start_date.label("start"),
        Item.end_date.label("end"),
        Item.assets,
        Item.created,
        Item.updated,
        cast(Item.cloud_cover, Float).label("cloud_cover"),
        func.ST_AsGeoJSON(Item.geom).label("geom"),
        func.ST_XMin(Item.geom).label("xmin"),
        func.ST_XMax(Item.geom).label("xmax"),
        func.ST_YMin(Item.geom).label("ymin"),
        func.ST_YMax(Item.geom).label("ymax"),
        Tile.name.label("tile"),
    ]

    if roles is None:
        roles = []

    where = [
        Collection.id == Item.collection_id,
        or_(Collection.is_public.is_(True),
            Collection.id.in_([int(r.split(":")[0]) for r in roles])),
    ]

    collections_where = _where_collections(collection_id, collections)
    collections_where.append(
        or_(Collection.is_public.is_(True),
            Collection.id.in_([int(r.split(":")[0]) for r in roles])))
    outer_join = [(Tile, [Item.tile_id == Tile.id])]
    _geom_tables = []
    _collections = Collection.query().filter(*collections_where).all()
    if bbox or intersects:
        grids = GridRefSys.query().filter(
            GridRefSys.id.in_([c.grid_ref_sys_id
                               for c in _collections])).all()
        for grid in grids:
            geom_table = grid.geom_table
            if geom_table is None:
                continue
            _geom_tables.append(geom_table)

    if ids is not None:
        if isinstance(ids, str):
            ids = ids.split(",")
        where += [Item.name.in_(ids)]
    else:
        where += _where_collections(collection_id, collections)

        if item_id is not None:
            where += [Item.name.like(item_id)]

        if query:
            filters = create_query_filter(query)
            if filters:
                where += filters

        if intersects is not None:
            # Intersect with native grid if there is
            geom_expr = func.ST_GeomFromGeoJSON(str(intersects))
            grids_where, joins = intersect_grids(geom_expr,
                                                 geom_tables=_geom_tables)

            where += grids_where
            outer_join += joins
        elif bbox is not None:
            try:
                if isinstance(bbox, str):
                    bbox = bbox.split(",")

                bbox = [float(x) for x in bbox]

                if bbox[0] == bbox[2] or bbox[1] == bbox[3]:
                    raise InvalidBoundingBoxError("")

                geom_expr = func.ST_MakeEnvelope(bbox[0], bbox[1], bbox[2],
                                                 bbox[3],
                                                 func.ST_SRID(Item.geom))
                grid_where, joins = intersect_grids(geom_expr,
                                                    geom_tables=_geom_tables)

                where += grid_where
                outer_join += joins
            except (ValueError, InvalidBoundingBoxError) as e:
                abort(400, f"'{bbox}' is not a valid bbox.")

        if datetime is not None:
            if "/" in datetime:
                matches_open = ("..", "")
                time_start, time_end = datetime.split("/")
                if time_start in matches_open:  # open start
                    date_filter = [
                        or_(Item.start_date <= time_end,
                            Item.end_date <= time_end)
                    ]
                elif time_end in matches_open:  # open end
                    date_filter = [
                        or_(Item.start_date >= time_start,
                            Item.end_date >= time_start)
                    ]
                else:  # closed range
                    date_filter = [
                        or_(
                            and_(Item.start_date >= time_start,
                                 Item.start_date <= time_end),
                            and_(Item.end_date >= time_start,
                                 Item.end_date <= time_end),
                            and_(Item.start_date < time_start,
                                 Item.end_date > time_end),
                        )
                    ]
            else:
                date_filter = [
                    and_(Item.start_date <= datetime,
                         Item.end_date >= datetime)
                ]
            where += date_filter

    query = session.query(*columns)
    for entity, join_conditions in outer_join:
        query = query.outerjoin(entity, *join_conditions)

    try:
        query = query.filter(*where).order_by(Item.start_date.desc(), Item.id)
        result = query.paginate(page=int(page),
                                per_page=int(limit),
                                error_out=False,
                                max_per_page=BDC_STAC_MAX_LIMIT)

        return result
    except Exception as err:
        msg = str(err)
        if hasattr(err, "orig"):
            msg = str(err.orig)
        abort(400, msg.rstrip())
Ejemplo n.º 14
0
def publish(blends, band_map, **kwargs):
    """Execute publish task and catalog datacube result.

    Args:
        activity - Datacube Activity Model
    """
    period = blends[0]['period']
    logging.info(f'Executing publish {period}')

    version = blends[0]['version']

    cube: Collection = Collection.query().filter(
        Collection.name == blends[0]['datacube'],
        Collection.version == version).first()
    warped_datacube = blends[0]['warped_datacube']
    tile_id = blends[0]['tile_id']

    # Retrieve which bands to generate quick look
    bands = cube.bands
    band_id_map = {band.id: band.name for band in bands}

    quicklook = cube.quicklook[0]

    quick_look_bands = [
        band_id_map[quicklook.red], band_id_map[quicklook.green],
        band_id_map[quicklook.blue]
    ]

    merges = dict()
    blend_files = dict()

    composite_function = DataCubeFragments(cube.name).composite_function

    for blend_result in blends:
        if composite_function != 'IDENTITY':
            blend_files[blend_result['band']] = blend_result['blends']

        if blend_result.get('clear_observation_file'):
            blend_files[CLEAR_OBSERVATION_NAME] = {
                composite_function: blend_result['clear_observation_file']
            }

        if blend_result.get('total_observation'):
            blend_files[TOTAL_OBSERVATION_NAME] = {
                composite_function: blend_result['total_observation']
            }

        if blend_result.get('provenance'):
            blend_files[PROVENANCE_NAME] = {
                composite_function: blend_result['provenance']
            }

        for merge_date, definition in blend_result['scenes'].items():
            merges.setdefault(
                merge_date,
                dict(dataset=definition['dataset'],
                     cloudratio=definition['cloudratio'],
                     ARDfiles=dict()))
            merges[merge_date]['ARDfiles'].update(definition['ARDfiles'])

    if composite_function != 'IDT':
        cloudratio = blends[0]['cloudratio']

        # Generate quick looks for cube scenes
        publish_datacube(cube, quick_look_bands, tile_id, period, blend_files,
                         cloudratio, band_map, **kwargs)

    # Generate quick looks of irregular cube
    wcube = Collection.query().filter(Collection.name == warped_datacube,
                                      Collection.version == version).first()

    for merge_date, definition in merges.items():
        date = merge_date.replace(definition['dataset'], '')

        publish_merge(quick_look_bands, wcube, tile_id, date, definition,
                      band_map)

    try:
        db.session.commit()
    except:
        db.session.rollback()
Ejemplo n.º 15
0
    def orchestrate(self):
        """Orchestrate datacube defintion and prepare temporal resolutions."""
        self.datacube = Collection.query().filter(
            Collection.name == self.params['datacube']).one()

        temporal_schema = self.datacube.temporal_composition_schema

        cube_parameters: CubeParameters = CubeParameters.query().filter(
            CubeParameters.collection_id == self.datacube.id).first()

        if cube_parameters is None:
            raise RuntimeError(
                f'No parameters configured for data cube "{self.datacube.id}"')

        # This step acts like first execution. When no stac_url defined in cube parameters but it was given, save it.
        if self.properties.get(
                'stac_url') and not cube_parameters.metadata_.get('stac_url'):
            logging.debug(
                f'No "stac_url"/"token" configured yet for cube parameters.'
                f'Using {self.properties["stac_url"]}')
            meta = cube_parameters.metadata_.copy()
            meta['stac_url'] = self.properties['stac_url']
            meta['token'] = self.properties.get('token')
            cube_parameters.metadata_ = meta
            cube_parameters.save(commit=True)

        # Validate parameters
        cube_parameters.validate()

        # Pass the cube parameters to the data cube functions arguments
        props = deepcopy(cube_parameters.metadata_)
        props.update(self.properties)
        self.properties = props

        dstart = self.params['start_date']
        dend = self.params['end_date']

        if self.datacube.composite_function.alias == 'IDT':
            timeline = [[dstart, dend]]
        else:
            if self.datacube.composite_function.alias == 'STK':
                warnings.warn(
                    'The composite function STK is deprecated. Use LCF (Least Cloud Cover First) instead.',
                    DeprecationWarning,
                    stacklevel=2)

            timeline = Timeline(**temporal_schema,
                                start_date=dstart,
                                end_date=dend).mount()

        where = [Tile.grid_ref_sys_id == self.datacube.grid_ref_sys_id]

        if self.params.get('tiles'):
            where.append(Tile.name.in_(self.params['tiles']))

        self.tiles = db.session.query(Tile).filter(*where).all()

        self.bands = Band.query().filter(
            Band.collection_id == self.warped_datacube.id).all()

        bands = self.datacube_bands
        self.band_map = {
            b.name: dict(name=b.name,
                         data_type=b.data_type,
                         nodata=b.nodata,
                         min_value=b.min_value,
                         max_value=b.max_value)
            for b in bands
        }

        if self.properties.get('reuse_from'):
            warnings.warn(
                'The parameter `reuse_from` is deprecated and will be removed in next version. '
                'Use `reuse_data_cube` instead.')
            common_bands = _common_bands()
            collection_bands = [
                b.name for b in self.datacube.bands
                if b.name not in common_bands
            ]

            reused_collection_bands = [b.name for b in self.bands]

            # The input cube (STK/MED) must have all bands of reused. Otherwise raise Error.
            if not set(collection_bands).issubset(
                    set(reused_collection_bands)):
                raise RuntimeError(
                    f'Reused data cube {self.warped_datacube.name} must have all bands of {self.datacube.name}'
                )

            # Extra filter to only use bands of Input data cube.
            self.bands = [b for b in self.bands if b.name in collection_bands]

        if cube_parameters.reuse_cube:
            self.reused_datacube = cube_parameters.reuse_cube

        for tile in self.tiles:
            tile_name = tile.name

            grs: GridRefSys = tile.grs

            grid_geom: sqlalchemy.Table = grs.geom_table

            srid_column = get_srid_column(grid_geom.c)

            # TODO: Raise exception when using a native grid argument
            #  Use bands resolution and match with SRID context (degree x degree) etc.

            tile_stats = db.session.query(
                (func.ST_XMin(grid_geom.c.geom)).label('min_x'),
                (func.ST_YMax(grid_geom.c.geom)).label('max_y'),
                (func.ST_XMax(grid_geom.c.geom) -
                 func.ST_XMin(grid_geom.c.geom)).label('dist_x'),
                (func.ST_YMax(grid_geom.c.geom) -
                 func.ST_YMin(grid_geom.c.geom)).label('dist_y'),
                (func.ST_Transform(
                    func.ST_SetSRID(grid_geom.c.geom, srid_column),
                    4326)).label('feature')).filter(
                        grid_geom.c.tile == tile_name).first()

            self.mosaics[tile_name] = dict(periods=dict())

            for interval in timeline:
                startdate = interval[0]
                enddate = interval[1]

                if dstart is not None and startdate < dstart:
                    continue
                if dend is not None and enddate > dend:
                    continue

                period = f'{startdate}_{enddate}'

                self.mosaics[tile_name]['periods'][period] = {}
                self.mosaics[tile_name]['periods'][period][
                    'start'] = startdate.strftime('%Y-%m-%d')
                self.mosaics[tile_name]['periods'][period][
                    'end'] = enddate.strftime('%Y-%m-%d')
                self.mosaics[tile_name]['periods'][period][
                    'dist_x'] = tile_stats.dist_x
                self.mosaics[tile_name]['periods'][period][
                    'dist_y'] = tile_stats.dist_y
                self.mosaics[tile_name]['periods'][period][
                    'min_x'] = tile_stats.min_x
                self.mosaics[tile_name]['periods'][period][
                    'max_y'] = tile_stats.max_y
                self.mosaics[tile_name]['periods'][period][
                    'feature'] = tile_stats.feature
                if self.properties.get('shape', None):
                    self.mosaics[tile_name]['periods'][period][
                        'shape'] = self.properties['shape']
Ejemplo n.º 16
0
    def radcor(cls, args: dict):
        """Search for Landsat/Sentinel Images and dispatch download task."""
        args.setdefault('limit', 299)
        args.setdefault('cloud', CLOUD_DEFAULT)
        args['tileid'] = 'notile'
        args['satsen'] = args['satsen']
        args['start'] = args.get('start')
        args['end'] = args.get('end')

        # Get bbox
        w = float(args['w'])
        e = float(args['e'])
        s = float(args['s'])
        n = float(args['n'])

        # Get the requested period to be processed
        rstart = args['start']
        rend = args['end']

        sat = args['satsen']
        cloud = float(args['cloud'])
        limit = args['limit']
        action = args.get('action', 'preview')
        do_harmonization = (args['harmonize'].lower()
                            == 'true') if 'harmonize' in args else False

        extra_args = args.get('args', dict())

        activities = []

        collections = Collection.query().filter(
            Collection.collection_type == 'collection').all()

        # TODO: Review this code. The collection name is not unique anymore.
        collections_map = {c.name: c.id for c in collections}

        scenes = {}

        def __get_collection(name: str) -> str:
            """Ensure collection name exists on database."""
            collection = collections_map.get(name)

            if collection is None:
                abort(404, f'Collection {collection} not found.')

            return collection

        try:
            if 'landsat' in sat.lower():
                result = get_landsat_scenes(w, n, e, s, rstart, rend, cloud,
                                            sat)
                scenes.update(result)
                for id in result:
                    scene = result[id]
                    sceneid = scene['sceneid']

                    landsat_scene_level_2 = landsat_factory.get_from_sceneid(
                        sceneid, level=2)

                    collection_id = __get_collection(landsat_scene_level_2.id)

                    # Set collection_id as L1 by default. Change to L2 when skip L1 tasks (AWS)
                    activity = dict(collection_id=collection_id,
                                    activity_type='downloadLC8',
                                    tags=args.get('tags', []),
                                    sceneid=sceneid,
                                    scene_type='SCENE',
                                    args=dict(link=scene['link'],
                                              cloud=scene.get('cloud'),
                                              harmonize=do_harmonization))

                    created = cls.create_activity(activity)

                    if action == 'start' and not created:
                        logging.warning(
                            'radcor - activity already done {}'.format(
                                activity['sceneid']))
                        continue

                    activities.append(activity)

            if 'S2' in sat:
                result = get_sentinel_scenes(w, n, e, s, rstart, rend, cloud,
                                             limit)
                scenes.update(result)
                for id in result:
                    scene = result[id]
                    sceneid = scene['sceneid']

                    sentinel_scene_level_2 = sentinel_factory.get_from_sceneid(
                        sceneid, level=2)

                    collection_id = __get_collection(sentinel_scene_level_2.id)

                    activity = dict(collection_id=collection_id,
                                    activity_type='downloadS2',
                                    tags=args.get('tags', []),
                                    sceneid=sceneid,
                                    scene_type='SCENE',
                                    args=dict(link=scene['link'],
                                              cloud=scene.get('cloud'),
                                              harmonize=do_harmonization))

                    created = cls.create_activity(activity)

                    if action == 'start' and not created:
                        logging.warning(
                            'radcor - activity already done {}'.format(
                                sceneid))
                        continue

                    scenes[id] = scene

                    activities.append(activity)

            if action == 'start':
                db.session.commit()

                for activity in activities:
                    cls.start(activity, **extra_args)
            else:
                db.session.rollback()

        except BaseException:
            db.session.rollback()
            raise

        return scenes
Ejemplo n.º 17
0
def publish(collection_item: Item,
            scene: RadcorActivity,
            skip_l1=False,
            **kwargs):
    """Publish Landsat collection.

    It works with both Digital Number (DN) and Surface Reflectance (SR).

    Args:
        collection_item - Collection Item
        scene - Current Activity
    """
    identifier = scene.sceneid

    # Get collection level to publish. Default is l1
    collection_level = scene.args.get('level') or 1

    landsat_scene = factory.get_from_sceneid(identifier,
                                             level=collection_level)

    productdir = scene.args.get('file')

    logging.warning('Publish {} - {} (id={})'.format(scene.collection_id,
                                                     productdir, scene.id))

    if productdir and productdir.endswith('.gz'):
        target_dir = landsat_scene.path()
        makedirs(target_dir, exist_ok=True)

        productdir = uncompress(productdir, str(target_dir))

    collection = Collection.query().filter(
        Collection.id == collection_item.collection_id).one()

    quicklook = Quicklook.query().filter(
        Quicklook.collection_id == collection.id).all()

    if quicklook:
        quicklook_bands = Band.query().filter(
            Band.id.in_(quicklook.red, quicklook.green, quicklook.blue)).all()
        quicklook = [
            quicklook_bands[0].name, quicklook_bands[1].name,
            quicklook_bands[2].name
        ]
    else:
        quicklook = DEFAULT_QUICK_LOOK_BANDS

    files = {}
    qlfiles = {}

    bands = landsat_scene.get_band_map()

    for gband, band in bands.items():
        fs = landsat_scene.get_files()

        if not fs:
            continue

        for f in fs:
            if f.stem.endswith(band) and f.suffix.lower().endswith('.tif'):
                files[gband] = f
                if gband in quicklook:
                    qlfiles[gband] = str(f)

    # Generate Vegetation Index files
    generate_vi(productdir, files)

    # Apply valid range and Cog files
    for band, file_path in files.items():
        tif_file = str(file_path)

        if landsat_scene.level == 2:
            _ = apply_valid_range(tif_file, tif_file)

        # Set destination of COG file
        files[band] = generate_cogs(tif_file, tif_file)
        if not is_valid_tif(tif_file):
            raise RuntimeError('Not Valid {}'.format(tif_file))

    # Extract basic scene information and build the quicklook
    pngname = productdir + '/{}.png'.format(identifier)

    dataset = GDALOpen(qlfiles['nir'], GA_ReadOnly)
    numlin = 768
    numcol = int(
        float(dataset.RasterXSize) / float(dataset.RasterYSize) * numlin)
    del dataset

    create_quick_look(pngname,
                      [qlfiles[band] for band in quicklook if band in qlfiles],
                      rows=numlin,
                      cols=numcol)

    productdir = productdir.replace(Config.DATA_DIR, '')

    assets_to_upload = {
        'quicklook':
        dict(file=pngname, asset=productdir.replace('/Repository/Archive', ''))
    }

    for instance in ['local', 'aws']:
        engine_instance = {'local': db, 'aws': db_aws}
        engine = engine_instance[instance]

        # Skip catalog on aws for digital number
        if landsat_scene.level == 1 and instance == 'aws':
            continue

        if instance == 'aws':
            if Config.DISABLE_PUBLISH_SECOND_DB:
                logging.info('Skipping publish in second db.')
                continue

            asset_url = productdir.replace('/Repository/Archive',
                                           Config.AWS_BUCKET_NAME)
        else:
            asset_url = productdir

        pngname_relative = resource_path.join(asset_url, Path(pngname).name)

        assets_to_upload['quicklook']['asset'] = pngname_relative

        with engine.session.begin_nested():
            with engine.session.no_autoflush:
                # Add collection item to the session if not present
                if collection_item not in engine.session:
                    item = engine.session.query(Item).filter(
                        Item.name == collection_item.name, Item.collection_id
                        == collection_item.collection_id).first()

                    if not item:
                        cloned_properties = CollectionItemForm().dump(
                            collection_item)
                        collection_item = Item(**cloned_properties)
                        engine.session.add(collection_item)

                collection_bands = engine.session.query(Band)\
                    .filter(Band.collection_id == collection_item.collection_id)\
                    .all()

                assets = dict(thumbnail=create_asset_definition(
                    str(pngname_relative), 'image/png', ['thumbnail'],
                    str(pngname)))

                geom = min_convex_hull = None

                # Inserting data into Product table
                for band in files:
                    template = resource_path.join(asset_url,
                                                  Path(files[band]).name)

                    band_model = next(
                        filter(lambda b: band == b.common_name,
                               collection_bands), None)

                    if not band_model:
                        logging.warning(
                            'Band {} of collection {} not found in database. Skipping...'
                            .format(band, collection_item.collection_id))
                        continue

                    if geom is None:
                        geom = raster_extent(files[band])
                        min_convex_hull = raster_convexhull(files[band])

                    assets[band_model.name] = create_asset_definition(
                        template,
                        COG_MIME_TYPE, ['data'],
                        files[band],
                        is_raster=True)

                    assets_to_upload[band] = dict(file=files[band],
                                                  asset=template)

                collection_item.assets = assets
                collection_item.geom = from_shape(geom, srid=4326)
                collection_item.min_convex_hull = from_shape(min_convex_hull,
                                                             srid=4326)
                # Add into scope of local and remote database
                add_instance(engine, collection_item)

        # Persist database
        commit(engine)

    return assets_to_upload
Ejemplo n.º 18
0
    def radcor(cls, args: dict):
        """Search for Landsat/Sentinel Images and dispatch download task."""
        args.setdefault('cloud', 100)

        cloud = float(args['cloud'])
        action = args.get('action', 'preview')

        collections = Collection.query().filter(Collection.collection_type.in_(['collection', 'cube'])).all()

        # TODO: Review this code. The collection name is not unique anymore.
        collections_map = {f'{c.name}-{c.version}': c.id for c in collections}

        tasks = args.get('tasks', [])

        force = args.get('force', False)
        options = dict()

        if 'platform' in args:
            options['platform'] = args['platform']

        if 'scenes' not in args and 'tiles' not in args:
            w, e = float(args['w']), float(args['e'])
            s, n = float(args['s']), float(args['n'])
            bbox = [w, s, e, n]
            options['bbox'] = bbox

        try:
            catalog_provider, provider = get_provider(catalog=args['catalog'])

            if 'scenes' in args:
                result = []

                unique_scenes = set(args['scenes'])

                for scene in unique_scenes:
                    query_result = provider.search(
                        query=args['dataset'],
                        filename=f'{scene}*',
                        **options
                    )

                    result.extend(query_result)
            elif 'tiles' in args:
                result = []
                for tile in args['tiles']:
                    query_result = provider.search(
                        query=args['dataset'],
                        tile=tile,
                        start_date=args['start'],
                        end_date=args['end'],
                        cloud_cover=cloud,
                        **options
                    )
                    result.extend(query_result)
            else:
                result = provider.search(
                    query=args['dataset'],
                    start_date=args['start'],
                    end_date=args['end'],
                    cloud_cover=cloud,
                    **options
                )

            def _recursive(scene, task, parent=None, parallel=True, pass_args=True):
                """Create task dispatcher recursive."""
                collection_id = collections_map[task['collection']]
                # Create activity definition example
                activity = cls._activity_definition(collection_id, task['type'], scene, **task['args'])
                activity['args'].update(dict(catalog=args['catalog'], dataset=args['dataset']))

                _task = cls._task_definition(task['type'])
                # Try to create activity in database and the parent if there is.
                instance, created = cls.create_activity(activity, parent)

                # When activity already exists and force is not set, skips to avoid collect multiple times
                if not created and not force:
                    return None

                dump = RadcorActivityForm().dump(instance)
                dump['args'].update(activity['args'])

                keywords = dict(collection_id=collection_id, activity_type=task['type'])
                # If no children
                if not task.get('tasks'):
                    if parent is None:
                        return _task.s(dump, force=force)
                    return _task.s(**keywords)

                res = []

                for child in task['tasks']:
                    # When triggering children, use parallel=False to use chain workflow
                    child_task = _recursive(scene, child, parent=instance, parallel=False, pass_args=False)

                    if child_task:
                        res.append(child_task)

                handler = group(*res) if parallel else chain(*res)

                arguments = []

                if pass_args:
                    arguments.append(dump)

                return _task.s(*arguments, **keywords) | handler

            if action == 'start':
                to_dispatch = []

                with db.session.begin_nested():
                    for task in tasks:
                        if task['type'] == 'download':
                            cls.validate_provider(collections_map[task['collection']])

                        for scene_result in result:
                            children_task = _recursive(scene_result, task, parent=None)

                            if children_task:
                                to_dispatch.append(children_task)

                db.session.commit()

                if len(to_dispatch) > 0:
                    group(to_dispatch).apply_async()
        except Exception:
            db.session.rollback()
            raise

        return result
Ejemplo n.º 19
0
    def check_scenes(cls, collections: str, start_date: datetime, end_date: datetime,
                     catalog: str = None, dataset: str = None,
                     grid: str = None, tiles: list = None, bbox: list = None, catalog_kwargs=None, only_tiles=False):
        """Check for the scenes in remote provider and compares with the Collection Builder."""
        bbox_list = []
        if grid and tiles:
            grid = GridRefSys.query().filter(GridRefSys.name == grid).first_or_404(f'Grid "{grid}" not found.')
            geom_table = grid.geom_table

            rows = db.session.query(
                geom_table.c.tile,
                func.ST_Xmin(func.ST_Transform(geom_table.c.geom, 4326)).label('xmin'),
                func.ST_Ymin(func.ST_Transform(geom_table.c.geom, 4326)).label('ymin'),
                func.ST_Xmax(func.ST_Transform(geom_table.c.geom, 4326)).label('xmax'),
                func.ST_Ymax(func.ST_Transform(geom_table.c.geom, 4326)).label('ymax'),
            ).filter(geom_table.c.tile.in_(tiles)).all()
            for row in rows:
                bbox_list.append((row.tile, (row.xmin, row.ymin, row.xmax, row.ymax)))
        else:
            bbox_list.append(('', bbox))

        instance, provider = get_provider(catalog)

        collection_map = dict()
        collection_ids = list()

        for _collection in collections:
            collection, version = _collection.split('-')

            collection = Collection.query().filter(
                Collection.name == collection,
                Collection.version == version
            ).first_or_404(f'Collection "{collection}-{version}" not found.')

            collection_ids.append(collection.id)
            collection_map[_collection] = collection

        options = dict(start_date=start_date, end_date=end_date)
        if catalog_kwargs:
            options.update(catalog_kwargs)

        redis = current_app.redis
        output = dict(
            collections={cname: dict(total_scenes=0, total_missing=0, missing_external=[]) for cname in collections}
        )

        items = {cid: set() for cid in collection_ids}
        external_scenes = set()

        for tile, _bbox in bbox_list:
            with redis.pipeline() as pipe:
                if only_tiles:
                    entry = tile
                    options['tile'] = tile
                else:
                    options['bbox'] = _bbox
                    entry = _bbox

                periods = _generate_periods(start_date.replace(tzinfo=None), end_date.replace(tzinfo=None))

                for period_start, period_end in periods:
                    _items = db.session.query(Item.name, Item.collection_id).filter(
                        Item.collection_id.in_(collection_ids),
                        func.ST_Intersects(
                            func.ST_MakeEnvelope(
                                *_bbox, func.ST_SRID(Item.geom)
                            ),
                            Item.geom
                        ),
                        or_(
                            and_(Item.start_date >= period_start, Item.start_date <= period_end),
                            and_(Item.end_date >= period_start, Item.end_date <= period_end),
                            and_(Item.start_date < period_start, Item.end_date > period_end),
                        )
                    ).order_by(Item.name).all()

                    for item in _items:
                        items[item.collection_id].add(item.name)

                    options['start_date'] = period_start.strftime('%Y-%m-%d')
                    options['end_date'] = period_end.strftime('%Y-%m-%d')

                    key = f'scenes:{catalog}:{dataset}:{period_start.strftime("%Y%m%d")}_{period_end.strftime("%Y%m%d")}_{entry}'

                    pipe.get(key)
                    provider_scenes = []

                    if not redis.exists(key):
                        provider_scenes = provider.search(dataset, **options)
                        provider_scenes = [s.scene_id for s in provider_scenes]

                        pipe.set(key, json.dumps(provider_scenes))

                    external_scenes = external_scenes.union(set(provider_scenes))

                cached_scenes = pipe.execute()

                for cache in cached_scenes:
                    # When cache is True, represents set the value were cached.
                    if cache is not None and cache is not True:
                        external_scenes = external_scenes.union(set(json.loads(cache)))

        output['total_external'] = len(external_scenes)
        for _collection_name, _collection in collection_map.items():
            _items = set(items[_collection.id])
            diff = list(external_scenes.difference(_items))

            output['collections'][_collection_name]['total_scenes'] = len(_items)
            output['collections'][_collection_name]['total_missing'] = len(diff)
            output['collections'][_collection_name]['missing_external'] = diff

            for cname, _internal_collection in collection_map.items():
                if cname != _collection_name:
                    diff = list(_items.difference(set(items[_internal_collection.id])))
                    output['collections'][_collection_name][f'total_missing_{cname}'] = len(diff)
                    output['collections'][_collection_name][f'missing_{cname}'] = diff

        return output
Ejemplo n.º 20
0
    def start_process(self, params):
        response = {}
        datacube_identify = f'{params["datacube_name"]}-{params["datacube_version"]}'
        response = self.services.get_process_by_id(datacube_identify)

        if 'Items' not in response or len(response['Items']) == 0:
            raise NotFound('Datacube not found in proccess table!')

        # get process infos by dynameDB
        process_info = response['Items'][0]
        process_params = json.loads(process_info['infos'])
        indexes = process_params['indexes']
        quality_band = process_params['quality_band']
        functions = [process_params['composite_function'], 'IDT']
        satellite = process_info['metadata']['platform']['code']
        mask = process_info.get('mask', None)

        tiles = params['tiles']
        start_date = datetime.strptime(params['start_date'],
                                       '%Y-%m-%d').strftime('%Y-%m-%d')
        end_date = datetime.strptime(params['end_date'], '%Y-%m-%d').strftime('%Y-%m-%d') \
            if params.get('end_date') else datetime.now().strftime('%Y-%m-%d')

        # verify cube info
        cube_infos = Collection.query().filter(
            Collection.id == process_info['datacube_id']).first()
        cube_infos_irregular = Collection.query().filter(
            Collection.id == process_info['irregular_datacube_id']).first()
        if not cube_infos or not cube_infos_irregular:
            return 'Cube not found!', 404

        # get bands list
        bands = Band.query().filter(
            Band.collection_id == cube_infos_irregular.id).all()
        bands_list = []
        indexes_list = []
        for band in bands:
            if band.name.upper() not in [
                    i['common_name'].upper() for i in indexes
            ]:
                bands_list.append(band.name)
            else:
                indexes_available = {
                    'NDVI': ['NIR', 'RED'],
                    'EVI': ['NIR', 'RED', 'BLUE']
                }
                if not indexes_available.get(band.name.upper()):
                    return 'Index not available', 400

                index = dict(
                    name=band.name,
                    bands=[
                        dict(
                            name=b.name,
                            common_name=b.common_name
                        ) for b in bands \
                            if b.common_name.upper() in indexes_available[band.name.upper()]
                    ]
                )
                if len(index['bands']) != len(
                        indexes_available[band.name.upper()]):
                    return 'bands: {}, are needed to create the {} index'.format(
                        ','.join(indexes_available[band.name.upper()]),
                        band.name), 400
                indexes_list.append(index)

        # get quicklook bands
        bands_ql = Quicklook.query().filter(
            Quicklook.collection_id == cube_infos_irregular.id).first()
        bands_ql_list = [
            list(filter(lambda b: b.id == bands_ql.red, bands))[0].name,
            list(filter(lambda b: b.id == bands_ql.green, bands))[0].name,
            list(filter(lambda b: b.id == bands_ql.blue, bands))[0].name
        ]

        cub_ref = cube_infos or cube_infos_irregular

        # items => old mosaic
        # orchestrate
        shape = params.get('shape', None)
        self.score['items'] = orchestrate(cub_ref, tiles, start_date, end_date,
                                          functions, shape)

        # prepare merge
        crs = cube_infos.grs.crs
        formatted_version = format_version(cube_infos.version)
        prepare_merge(self, cube_infos['name'], params['collections'],
                      satellite, bands_list, indexes_list, bands_ql_list,
                      float(bands[0].resolution_x),
                      float(bands[0].resolution_y), int(bands[0].nodata), crs,
                      quality_band, functions, formatted_version,
                      params.get('force', False), mask)

        return dict(message='Processing started with succesfully'), 201
Ejemplo n.º 21
0
    def start_process(self, params):
        response = {}
        datacube_identify = f'{params["datacube"]}-{params["datacube_version"]}'
        response = self.services.get_process_by_id(datacube_identify)

        if 'Items' not in response or len(response['Items']) == 0:
            raise NotFound('Datacube not found in proccess table!')

        # get process infos by dynameDB
        process_info = response['Items'][0]
        process_params = json.dumps(process_info['infos'], cls=DecimalEncoder)
        process_params = json.loads(process_params)

        indexes = process_params['indexes']
        quality_band = process_params['quality_band']
        functions = [process_params['composite_function'], 'IDT']
        satellite = process_params['metadata']['platform']['code']
        mask = process_params['parameters'].get('mask')
        if not mask:
            raise NotFound(
                'Mask values not found in item allocated in processing table - dynamoDB'
            )

        stac_list = params.get('stac_list', None)
        if not stac_list and process_params['parameters'].get('stac_list'):
            stac_list = process_params['parameters']['stac_list']
        elif not stac_list:
            raise NotFound('STAC url and collection is required')

        landsat_harmonization = process_params['parameters'].get(
            'landsat_harmonization', {})
        if not landsat_harmonization.get('apply', False):
            landsat_harmonization = None

        self.services = CubeServices(bucket=self.services.bucket_name,
                                     stac_list=stac_list)

        collections = [stac['collection'] for stac in stac_list]

        tiles = params['tiles']
        start_date = params['start_date'].strftime('%Y-%m-%d')
        end_date = params['end_date'].strftime('%Y-%m-%d') \
            if params.get('end_date') else datetime.now().strftime('%Y-%m-%d')

        # verify cube info
        cube_infos = Collection.query().filter(
            Collection.id == process_info['datacube_id']).first()
        cube_infos_irregular = Collection.query().filter(
            Collection.id == process_info['irregular_datacube_id']).first()
        if not cube_infos or not cube_infos_irregular:
            return 'Cube not found!', 404

        # get bands list
        bands = Band.query().filter(
            Band.collection_id == cube_infos_irregular.id).all()

        bands_expressions = dict()

        bands_list = []
        bands_ids_list = {}
        for band in bands:
            if band.name.upper() not in [
                    i['common_name'].upper() for i in indexes
            ]:
                bands_list.append(band.name)
                bands_ids_list[band.id] = band.name
            elif band._metadata and band._metadata.get(
                    'expression') and band._metadata['expression'].get(
                        'value'):
                meta = deepcopy(band._metadata)
                meta['data_type'] = band.data_type
                bands_expressions[band.name] = meta

        # get quicklook bands
        bands_ql = Quicklook.query().filter(
            Quicklook.collection_id == cube_infos_irregular.id).first()
        bands_ql_list = [
            list(filter(lambda b: b.id == bands_ql.red, bands))[0].name,
            list(filter(lambda b: b.id == bands_ql.green, bands))[0].name,
            list(filter(lambda b: b.id == bands_ql.blue, bands))[0].name
        ]

        # items => { 'tile_id': bbox, xmin, ..., periods: {'start_end': collection, ... } }
        # orchestrate
        shape = params.get('shape', None)
        temporal_schema = cube_infos.temporal_composition_schema
        self.score['items'] = orchestrate(cube_infos_irregular,
                                          temporal_schema,
                                          tiles,
                                          start_date,
                                          end_date,
                                          shape,
                                          item_prefix=ITEM_PREFIX)

        # prepare merge
        crs = cube_infos.grs.crs
        formatted_version = format_version(cube_infos.version)
        not_started = prepare_merge(
            self,
            cube_infos.name,
            cube_infos_irregular.name,
            collections,
            satellite,
            bands_list,
            bands_ids_list,
            bands_ql_list,
            float(bands[0].resolution_x),
            float(bands[0].resolution_y),
            int(bands[0].nodata),
            crs,
            quality_band,
            functions,
            formatted_version,
            params.get('force', False),
            mask,
            bands_expressions=bands_expressions,
            indexes_only_regular_cube=params.get('indexes_only_regular_cube'),
            landsat_harmonization=landsat_harmonization)

        if len(not_started):
            return dict(
                message=
                'Some scenes have not been started! If necessary, use the force parameter.',
                scenes_not_started=not_started), 200

        return dict(message='Processing started with succesfully'), 200
Ejemplo n.º 22
0
def warp_merge(activity, band_map, force=False, **kwargs):
    """Execute datacube merge task.

    This task consists in the following steps:

    **1.** Prepare a raster using dimensions of datacube GRS schema.
    **2.** Open collection dataset with RasterIO and reproject to datacube GRS Schema.
    **3.** Fill the respective pathrow into raster

    Args:
        activity - Datacube Activity Model
        force - Flag to build data cube without cache.

    Returns:
        Validated activity
    """
    logging.warning('Executing merge {} - {}'.format(
        activity.get('warped_collection_id'), activity['band']))

    record = create_execution(activity)

    record.warped_collection_id = activity['warped_collection_id']
    merge_date = activity['date']

    tile_id = activity['tile_id']
    version = activity['args']['version']

    merge_file_path = None

    if activity['args'].get('reuse_datacube'):
        collection = Collection.query().filter(
            Collection.id == activity['args']['reuse_datacube']).first()

        if not force:
            # TODO: Should we search in Activity instead?
            merge_file_path = build_cube_path(collection.name,
                                              merge_date,
                                              tile_id,
                                              version=collection.version,
                                              band=record.band)

            if not merge_file_path.exists():
                # TODO: Should we raise exception??
                logging.warning(
                    f'Cube {record.warped_collection_id} requires {collection.name}, but the file {str(merge_file_path)} not found. Skipping'
                )
                raise RuntimeError(
                    f"""Cube {record.warped_collection_id} is derived from {collection.name},
                    but the file {str(merge_file_path)} was not found.""")

        else:
            raise RuntimeError(
                f'Cannot use option "force" for derived data cube - {record.warped_collection_id} of {collection.name}'
            )

    if merge_file_path is None:
        merge_file_path = build_cube_path(record.warped_collection_id,
                                          merge_date,
                                          tile_id,
                                          version=version,
                                          band=record.band)

        if activity['band'] == band_map['quality'] and len(
                activity['args']['datasets']):
            kwargs['build_provenance'] = True

    reused = False

    # Reuse merges already done. Rebuild only with flag ``--force``
    if not force and merge_file_path.exists() and merge_file_path.is_file():
        efficacy = cloudratio = 0

        if activity['band'] == band_map['quality']:
            # When file exists, compute the file statistics
            efficacy, cloudratio = compute_data_set_stats(str(merge_file_path))

        reused = True

        activity['args']['file'] = str(merge_file_path)
        activity['args']['efficacy'] = efficacy
        activity['args']['cloudratio'] = cloudratio
        record.traceback = ''

        args = deepcopy(record.args)
        args.update(activity['args'])

        activity['args'] = args

        record.args = args
        record.save()
    else:
        record.status = 'STARTED'
        record.save()

        record.args = activity['args']

        try:
            args = deepcopy(activity.get('args'))
            args.pop('period', None)
            args['tile_id'] = tile_id
            args['date'] = record.date.strftime('%Y-%m-%d')
            args['cube'] = record.warped_collection_id

            empty = args.get('empty', False)

            # Create base directory
            merge_file_path.parent.mkdir(parents=True, exist_ok=True)

            if empty:
                # create empty raster
                file_path = create_empty_raster(
                    str(merge_file_path),
                    proj4=args['srs'],
                    cog=True,
                    nodata=args['nodata'],
                    dtype='int16',  # TODO: Pass through args
                    dist=[args['dist_x'], args['dist_y']],
                    resolution=[args['resx'], args['resy']],
                    xmin=args['xmin'],
                    ymax=args['ymax'])
                res = dict(file=str(file_path),
                           efficacy=100,
                           cloudratio=0,
                           resolution=args['resx'],
                           nodata=args['nodata'])
            else:
                res = merge_processing(str(merge_file_path),
                                       band_map=band_map,
                                       band=record.band,
                                       **args,
                                       **kwargs)

            merge_args = deepcopy(activity['args'])
            merge_args.update(res)

            record.traceback = ''
            record.status = 'SUCCESS'
            record.args = merge_args

            activity['args'].update(merge_args)
        except BaseException as e:
            record.status = 'FAILURE'
            record.traceback = capture_traceback(e)
            logging.error('Error in merge. Activity {}'.format(record.id),
                          exc_info=True)

            raise e
        finally:
            record.save()

    logging.warning(
        'Merge {} executed successfully. Efficacy={}, cloud_ratio={}'.format(
            str(merge_file_path), activity['args']['efficacy'],
            activity['args']['cloudratio']))

    activity['args']['reused'] = reused

    return activity
    def _check_reuse_cube(self):
        self._reuse_cube = None

        if self.metadata_.get('reuse_data_cube'):
            self._reuse_cube = Collection.query().get(self.metadata_['reuse_data_cube'])
Ejemplo n.º 24
0
    def orchestrate(self):
        """Orchestrate datacube defintion and prepare temporal resolutions."""
        self.datacube = Collection.query().filter(
            Collection.name == self.params['datacube']).one()

        temporal_schema = self.datacube.temporal_composition_schema

        dstart = self.params['start_date']
        dend = self.params['end_date']

        timeline = Timeline(**temporal_schema,
                            start_date=dstart,
                            end_date=dend).mount()

        where = [Tile.grid_ref_sys_id == self.datacube.grid_ref_sys_id]

        if self.params.get('tiles'):
            where.append(Tile.name.in_(self.params['tiles']))

        self.tiles = db.session.query(Tile).filter(*where).all()

        self.bands = Band.query().filter(
            Band.collection_id == self.warped_datacube.id).all()

        if self.properties.get('reuse_from'):
            common_bands = _common_bands()
            collection_bands = [
                b.name for b in self.datacube.bands
                if b.name not in common_bands
            ]

            reused_collection_bands = [b.name for b in self.bands]

            # The input cube (STK/MED) must have all bands of reused. Otherwise raise Error.
            if not set(collection_bands).issubset(
                    set(reused_collection_bands)):
                raise RuntimeError(
                    f'Reused data cube {self.warped_datacube.name} must have all bands of {self.datacube.name}'
                )

            # Extra filter to only use bands of Input data cube.
            self.bands = [b for b in self.bands if b.name in collection_bands]

        for tile in self.tiles:
            tile_name = tile.name

            grs: GridRefSys = tile.grs

            grid_geom = grs.geom_table

            tile_stats = db.session.query(
                (func.ST_XMin(grid_geom.c.geom)).label('min_x'),
                (func.ST_YMax(grid_geom.c.geom)).label('max_y'),
                (func.ST_XMax(grid_geom.c.geom) -
                 func.ST_XMin(grid_geom.c.geom)).label('dist_x'),
                (func.ST_YMax(grid_geom.c.geom) -
                 func.ST_YMin(grid_geom.c.geom)).label('dist_y')).filter(
                     grid_geom.c.tile == tile_name).first()

            self.mosaics[tile_name] = dict(periods=dict())

            for interval in timeline:
                startdate = interval[0]
                enddate = interval[1]

                if dstart is not None and startdate < dstart:
                    continue
                if dend is not None and enddate > dend:
                    continue

                period = f'{startdate}_{enddate}'
                cube_relative_path = f'{self.datacube.name}/v{self.datacube.version:03d}/{tile_name}/{period}'

                self.mosaics[tile_name]['periods'][period] = {}
                self.mosaics[tile_name]['periods'][period][
                    'start'] = startdate.strftime('%Y-%m-%d')
                self.mosaics[tile_name]['periods'][period][
                    'end'] = enddate.strftime('%Y-%m-%d')
                self.mosaics[tile_name]['periods'][period][
                    'dist_x'] = tile_stats.dist_x
                self.mosaics[tile_name]['periods'][period][
                    'dist_y'] = tile_stats.dist_y
                self.mosaics[tile_name]['periods'][period][
                    'min_x'] = tile_stats.min_x
                self.mosaics[tile_name]['periods'][period][
                    'max_y'] = tile_stats.max_y
                self.mosaics[tile_name]['periods'][period][
                    'dirname'] = cube_relative_path
                if self.properties.get('shape', None):
                    self.mosaics[tile_name]['periods'][period][
                        'shape'] = self.properties['shape']
Ejemplo n.º 25
0
    def orchestrate(self):
        """Orchestrate datacube defintion and prepare temporal resolutions."""
        self.datacube = Collection.query().filter(
            Collection.name == self.params['datacube']).one()

        temporal_schema = self.datacube.temporal_composition_schema

        dstart = self.params['start_date']
        dend = self.params['end_date']

        timeline = Timeline(**temporal_schema,
                            start_date=dstart,
                            end_date=dend).mount()

        where = [Tile.grid_ref_sys_id == self.datacube.grid_ref_sys_id]

        if self.params.get('tiles'):
            where.append(Tile.name.in_(self.params['tiles']))

        self.tiles = db.session.query(Tile).filter(*where).all()

        self.bands = Band.query().filter(
            Band.collection_id == self.warped_datacube.id).all()

        for tile in self.tiles:
            tile_name = tile.name

            grs: GridRefSys = tile.grs

            grid_geom = grs.geom_table

            tile_stats = db.session.query(
                (func.ST_XMin(grid_geom.c.geom)).label('min_x'),
                (func.ST_YMax(grid_geom.c.geom)).label('max_y'),
                (func.ST_XMax(grid_geom.c.geom) -
                 func.ST_XMin(grid_geom.c.geom)).label('dist_x'),
                (func.ST_YMax(grid_geom.c.geom) -
                 func.ST_YMin(grid_geom.c.geom)).label('dist_y')).filter(
                     grid_geom.c.tile == tile_name).first()

            self.mosaics[tile_name] = dict(periods=dict())

            for interval in timeline:
                startdate = interval[0]
                enddate = interval[1]

                if dstart is not None and startdate < dstart:
                    continue
                if dend is not None and enddate > dend:
                    continue

                period = f'{startdate}_{enddate}'
                cube_relative_path = f'{self.datacube.name}/v{self.datacube.version:03d}/{tile_name}/{period}'

                self.mosaics[tile_name]['periods'][period] = {}
                self.mosaics[tile_name]['periods'][period][
                    'start'] = startdate.strftime('%Y-%m-%d')
                self.mosaics[tile_name]['periods'][period][
                    'end'] = enddate.strftime('%Y-%m-%d')
                self.mosaics[tile_name]['periods'][period][
                    'dist_x'] = tile_stats.dist_x
                self.mosaics[tile_name]['periods'][period][
                    'dist_y'] = tile_stats.dist_y
                self.mosaics[tile_name]['periods'][period][
                    'min_x'] = tile_stats.min_x
                self.mosaics[tile_name]['periods'][period][
                    'max_y'] = tile_stats.max_y
                self.mosaics[tile_name]['periods'][period][
                    'dirname'] = cube_relative_path
Ejemplo n.º 26
0
    def _create_cube_definition(cls, cube_id: str, params: dict) -> dict:
        """Create a data cube definition.

        Basically, the definition consists in `Collection` and `Band` attributes.

        Note:
            It does not try to create when data cube already exists.

        Args:
            cube_id - Data cube
            params - Dict of required values to create data cube. See @validators.py

        Returns:
            A serialized data cube information.
        """
        cube_parts = get_cube_parts(cube_id)

        function = cube_parts.composite_function

        cube_id = cube_parts.datacube

        cube = Collection.query().filter(
            Collection.name == cube_id,
            Collection.version == params['version']).first()

        grs = GridRefSys.query().filter(
            GridRefSys.name == params['grs']).first()

        if grs is None:
            abort(404, f'Grid {params["grs"]} not found.')

        cube_function = CompositeFunction.query().filter(
            CompositeFunction.alias == function).first()

        if cube_function is None:
            abort(404, f'Function {function} not found.')

        data = dict(name='Meter', symbol='m')
        resolution_meter, _ = get_or_create_model(ResolutionUnit,
                                                  defaults=data,
                                                  symbol='m')

        mime_type, _ = get_or_create_model(MimeType,
                                           defaults=dict(name=COG_MIME_TYPE),
                                           name=COG_MIME_TYPE)

        if cube is None:
            cube = Collection(
                name=cube_id,
                title=params['title'],
                temporal_composition_schema=params['temporal_composition']
                if function != 'IDT' else None,
                composite_function_id=cube_function.id,
                grs=grs,
                _metadata=params['metadata'],
                description=params['description'],
                collection_type='cube',
                is_public=params.get('public', True),
                version=params['version'])

            cube.save(commit=False)

            bands = []

            default_bands = (CLEAR_OBSERVATION_NAME.lower(),
                             TOTAL_OBSERVATION_NAME.lower(),
                             PROVENANCE_NAME.lower())

            band_map = dict()

            for band in params['bands']:
                name = band['name'].strip()

                if name in default_bands:
                    continue

                is_not_cloud = params['quality_band'] != band['name']

                if band['name'] == params['quality_band']:
                    data_type = 'uint8'
                else:
                    data_type = band['data_type']

                band_model = Band(name=name,
                                  common_name=band['common_name'],
                                  collection=cube,
                                  min_value=0,
                                  max_value=10000 if is_not_cloud else 4,
                                  nodata=-9999 if is_not_cloud else 255,
                                  scale=0.0001 if is_not_cloud else 1,
                                  data_type=data_type,
                                  resolution_x=params['resolution'],
                                  resolution_y=params['resolution'],
                                  resolution_unit_id=resolution_meter.id,
                                  description='',
                                  mime_type_id=mime_type.id)

                if band.get('metadata'):
                    band_model._metadata = cls._validate_band_metadata(
                        deepcopy(band['metadata']), band_map)

                band_model.save(commit=False)
                bands.append(band_model)

                band_map[name] = band_model

                if band_model._metadata:
                    for _band_origin_id in band_model._metadata['expression'][
                            'bands']:
                        band_provenance = BandSRC(band_src_id=_band_origin_id,
                                                  band_id=band_model.id)
                        band_provenance.save(commit=False)

            quicklook = Quicklook(
                red=band_map[params['bands_quicklook'][0]].id,
                green=band_map[params['bands_quicklook'][1]].id,
                blue=band_map[params['bands_quicklook'][2]].id,
                collection=cube)

            quicklook.save(commit=False)

        # Create default Cube Bands
        if function != 'IDT':
            _ = cls.get_or_create_band(cube.id,
                                       **CLEAR_OBSERVATION_ATTRIBUTES,
                                       resolution_unit_id=resolution_meter.id,
                                       resolution_x=params['resolution'],
                                       resolution_y=params['resolution'])
            _ = cls.get_or_create_band(cube.id,
                                       **TOTAL_OBSERVATION_ATTRIBUTES,
                                       resolution_unit_id=resolution_meter.id,
                                       resolution_x=params['resolution'],
                                       resolution_y=params['resolution'])

            if function == 'STK':
                _ = cls.get_or_create_band(
                    cube.id,
                    **PROVENANCE_ATTRIBUTES,
                    resolution_unit_id=resolution_meter.id,
                    resolution_x=params['resolution'],
                    resolution_y=params['resolution'])

        if params.get('is_combined') and function != 'MED':
            _ = cls.get_or_create_band(cube.id,
                                       **DATASOURCE_ATTRIBUTES,
                                       resolution_unit_id=resolution_meter.id,
                                       resolution_x=params['resolution'],
                                       resolution_y=params['resolution'])

        return CollectionForm().dump(cube)
Ejemplo n.º 27
0
def publish(blends,
            band_map,
            quality_band: str,
            reuse_data_cube=None,
            **kwargs):
    """Execute publish task and catalog datacube result.

    Args:
        activity - Datacube Activity Model
    """
    if isinstance(blends, Iterable):
        blend_reference = blends[0]
    else:
        blend_reference = blends

    period = blend_reference['period']
    logging.info(f'Executing publish {period}')

    version = blend_reference['version']

    cube: Collection = Collection.query().filter(
        Collection.name == blend_reference['datacube'],
        Collection.version == version).first()
    warped_datacube = blend_reference['warped_datacube']
    tile_id = blend_reference['tile_id']
    reused_cube = blend_reference.get('reuse_datacube')

    # Retrieve which bands to generate quick look
    bands = cube.bands
    band_id_map = {band.id: band.name for band in bands}

    quicklook = cube.quicklook[0]

    quick_look_bands = [
        band_id_map[quicklook.red], band_id_map[quicklook.green],
        band_id_map[quicklook.blue]
    ]

    merges = dict()
    blend_files = dict()

    composite_function = DataCubeFragments(cube.name).composite_function

    quality_blend = dict(efficacy=100, cloudratio=0)

    for blend_result in blends:
        if composite_function != 'IDT':
            blend_files[blend_result['band']] = blend_result['blends']

        if blend_result.get('clear_observation_file'):
            blend_files[CLEAR_OBSERVATION_NAME] = {
                composite_function: blend_result['clear_observation_file']
            }

        if blend_result.get('total_observation'):
            blend_files[TOTAL_OBSERVATION_NAME] = {
                composite_function: blend_result['total_observation']
            }

        if blend_result.get('provenance'):
            blend_files[PROVENANCE_NAME] = {
                composite_function: blend_result['provenance']
            }

        if blend_result.get('datasource'):
            blend_files[DATASOURCE_NAME] = {
                composite_function: blend_result['datasource']
            }

        for merge_date, definition in blend_result['scenes'].items():
            merges.setdefault(
                merge_date,
                dict(datasets=definition.get('datasets',
                                             definition.get('dataset')),
                     cloudratio=definition['cloudratio'],
                     ARDfiles=dict()))
            merges[merge_date]['ARDfiles'].update(definition['ARDfiles'])
            merges[merge_date]['empty'] = definition.get('empty', False)

        if blend_result['band'] == quality_band:
            quality_blend = blend_result

    _blend_result = []
    cube_geom_table = cube.grs.geom_table
    srid_column = get_srid_column(cube_geom_table.c)
    srid = None
    result = db.session.query(srid_column.label('srid')).first()
    if result is not None:
        srid = result.srid

    if composite_function != 'IDT':
        cloudratio = quality_blend['cloudratio']

        # Generate quick looks for cube scenes
        _blend_result = publish_datacube(cube,
                                         quick_look_bands,
                                         tile_id,
                                         period,
                                         blend_files,
                                         cloudratio,
                                         band_map,
                                         reuse_data_cube=reuse_data_cube,
                                         srid=srid,
                                         **kwargs)

    # Generate quick looks of irregular cube
    wcube = Collection.query().filter(Collection.name == warped_datacube,
                                      Collection.version == version).first()

    _merge_result = dict()

    if not reused_cube:
        for merge_date, definition in merges.items():
            if definition.get('empty') and definition['empty']:
                # Empty data cubes, Keep only composite item
                clear_merge(merge_date, definition)
                continue

            _merge_result[merge_date] = publish_merge(
                quick_look_bands,
                wcube,
                tile_id,
                merge_date,
                definition,
                band_map,
                reuse_data_cube=reuse_data_cube,
                srid=srid)

        try:
            db.session.commit()
        except:
            db.session.rollback()

    return _blend_result, _merge_result