Beispiel #1
0
def publish(blends):
    """Execute publish task and catalog datacube result.

    Args:
        activity - Datacube Activity Model
    """
    logging.warning('Executing publish')

    cube = Collection.query().filter(
        Collection.id == blends[0]['datacube']).first()
    warped_datacube = blends[0]['warped_datacube']
    tile_id = blends[0]['tile_id']
    period = blends[0]['period']
    cloudratio = blends[0]['cloudratio']

    # Retrieve which bands to generate quick look
    quick_look_bands = cube.bands_quicklook.split(',')

    merges = dict()
    blend_files = dict()

    for blend_result in blends:
        blend_files[blend_result['band']] = blend_result['blends']

        if blend_result.get('cloud_count_file'):
            blend_files['cnc'] = dict(MED=blend_result['cloud_count_file'],
                                      STK=blend_result['cloud_count_file'])

        for merge_date, definition in blend_result['scenes'].items():
            merges.setdefault(
                merge_date,
                dict(dataset=definition['dataset'],
                     cloudratio=definition['cloudratio'],
                     ARDfiles=dict()))
            merges[merge_date]['ARDfiles'].update(definition['ARDfiles'])

    # Generate quick looks for cube scenes
    publish_datacube(cube, quick_look_bands, cube.id, tile_id, period,
                     blend_files, cloudratio)

    # Generate quick looks of irregular cube
    wcube = Collection.query().filter(Collection.id == warped_datacube).first()

    for merge_date, definition in merges.items():
        date = merge_date.replace(definition['dataset'], '')

        publish_merge(quick_look_bands, wcube, definition['dataset'], tile_id,
                      period, date, definition)

    try:
        refresh_materialized_view(db.session, AssetMV.__table__)
        db.session.commit()
        logging.info('View refreshed.')
    except:
        db.session.rollback()
    def start_process(self, params):
        cube_id = get_cube_id(params['datacube'], 'MED')
        tiles = params['tiles'].split(',')
        start_date = datetime.strptime(params['start_date'],
                                       '%Y-%m-%d').strftime('%Y-%m-%d')
        end_date = datetime.strptime(params['end_date'], '%Y-%m-%d').strftime('%Y-%m-%d') \
            if params.get('end_date') else datetime.now().strftime('%Y-%m-%d')

        # verify cube info
        cube_infos = Collection.query().filter(
            Collection.id == cube_id).first()
        if not cube_infos:
            return 'Cube not found!', 404

        # get bands list
        bands = Band.query().filter(
            Band.collection_id == get_cube_id(params['datacube'])).all()
        bands_list = [band.name for band in bands]

        # items => old mosaic
        # orchestrate
        self.score['items'] = orchestrate(params['datacube'], cube_infos,
                                          tiles, start_date, end_date)

        # prepare merge
        prepare_merge(self, params['datacube'],
                      params['collections'].split(','), bands_list,
                      cube_infos.bands_quicklook, bands[0].resolution_x,
                      bands[0].resolution_y, bands[0].fill,
                      cube_infos.raster_size_schemas.raster_size_x,
                      cube_infos.raster_size_schemas.raster_size_y,
                      cube_infos.raster_size_schemas.chunk_size_x,
                      cube_infos.grs_schema.crs)

        return 'Succesfully', 201
    def check_for_invalid_merges(cls, datacube: str, tile: str,
                                 start_date: str, last_date: str) -> dict:
        """List merge files used in data cube and check for invalid scenes.

        Args:
            datacube: Data cube name
            tile: Brazil Data Cube Tile identifier
            start_date: Activity start date (period)
            last_date: Activity End (period)

        Returns:
            List of Images used in period
        """
        cube = Collection.query().filter(Collection.id == datacube).first()

        if cube is None or not cube.is_cube:
            raise NotFound('Cube {} not found'.format(datacube))

        # TODO validate schema to avoid start/end too abroad

        res = Activity.list_merge_files(datacube, tile, start_date, last_date)

        result = validate_merges(res)

        return result, 200
    def get_cube(cls, cube_name: str):
        collection = Collection.query().filter(
            Collection.id == cube_name).first()

        if collection is None or not collection.is_cube:
            return 'Cube "{}" not found.'.format(cube_name), 404

        return Serializer.serialize(collection), 200
Beispiel #5
0
    def warped_datacube(self) -> Collection:
        """Retrieve cached datacube defintion."""
        if not self._warped:
            datacube_warped = get_cube_id(self.datacube.id)

            self._warped = Collection.query().filter(
                Collection.id == datacube_warped).first()

        return self._warped
Beispiel #6
0
def publish(blends):
    logging.warning('Executing publish')

    cube = Collection.query().filter(
        Collection.id == blends[0]['datacube']).first()
    warped_datacube = blends[0]['warped_datacube']
    tile_id = blends[0]['tile_id']
    period = blends[0]['period']
    cloudratio = blends[0]['cloudratio']

    # Retrieve which bands to generate quick look
    quick_look_bands = cube.bands_quicklook.split(',')

    merges = dict()
    blend_files = dict()

    for blend_result in blends:
        blend_files[blend_result['band']] = blend_result['blends']

        for merge_date, definition in blend_result['scenes'].items():
            merges.setdefault(
                merge_date,
                dict(dataset=definition['dataset'],
                     cloudratio=definition['cloudratio'],
                     ARDfiles=dict()))
            merges[merge_date]['ARDfiles'].update(definition['ARDfiles'])

    # Generate quick looks for cube scenes
    publish_datacube(cube, quick_look_bands, cube.id, tile_id, period,
                     blend_files, cloudratio)

    # Generate quick looks of irregular cube
    for merge_date, definition in merges.items():
        date = merge_date.replace(definition['dataset'], '')

        wcube = Collection.query().filter(
            Collection.id == warped_datacube).first()

        publish_merge(quick_look_bands, wcube, definition['dataset'], tile_id,
                      period, date, definition)
Beispiel #7
0
def load_collections(fixture_path: str):
    """Load default collections to database.

    Args:
        fixture_path - Path relative to fixtures. i.e 'data/tiles.json'
    """
    collections = json_parser(resource_string(__name__, fixture_path))

    with db.session.begin_nested():
        for collection in collections:
            bands = collection.pop('bands')

            c = Collection(**collection)
            c.save(commit=False)

            for band in bands:
                b = Band(**band)
                b.collection = c

                b.save(commit=False)
def dispatch(activity: dict):
    """Dispatches the activity to the respective celery task handler.

    Args:
        activity (RadcorActivity) - A not done activity
    """
    from .sentinel import tasks as sentinel_tasks
    from .landsat import tasks as landsat_tasks
    # TODO: Implement it as factory (TaskDispatcher) and pass the responsibility to the task type handler

    app = activity.get('activity_type')

    if app == 'downloadS2':
        # We are assuming that collection TOA
        collection_sr = Collection.query().filter(
            Collection.id == 'S2SR_SEN28').first()

        if collection_sr is None:
            raise RuntimeError('The collection "S2SR_SEN28" not found')

        # Raw chain represents TOA publish chain
        publish_raw_data_chain = sentinel_tasks.publish_sentinel.s()
        # Atm Correction chain
        atm_corr_publish_chain = sentinel_tasks.atm_correction.s(
        ) | sentinel_tasks.publish_sentinel.s()
        # Publish ATM Correction
        upload_chain = sentinel_tasks.upload_sentinel.s()

        inner_group = upload_chain

        if activity['args'].get('harmonize'):
            # Harmonization chain
            harmonize_chain = sentinel_tasks.harmonization_sentinel.s() | sentinel_tasks.publish_sentinel.s() | \
                        sentinel_tasks.upload_sentinel.s()
            inner_group = group(upload_chain, harmonize_chain)

        inner_group = atm_corr_publish_chain | inner_group
        outer_group = group(publish_raw_data_chain, inner_group)
        task_chain = sentinel_tasks.download_sentinel.s(activity) | outer_group
        return chain(task_chain).apply_async()
    elif app == 'correctionS2':
        task_chain = sentinel_tasks.atm_correction.s(activity) | \
                        sentinel_tasks.publish_sentinel.s() | \
                        sentinel_tasks.upload_sentinel.s()
        return chain(task_chain).apply_async()
    elif app == 'publishS2':
        tasks = [sentinel_tasks.publish_sentinel.s(activity)]

        if 'S2SR' in activity['collection_id']:
            tasks.append(sentinel_tasks.upload_sentinel.s())

        return chain(*tasks).apply_async()
    elif app == 'harmonizeS2':
        task_chain = sentinel_tasks.harmonization_sentinel.s(activity) | sentinel_tasks.publish_sentinel.s() | \
                    sentinel_tasks.upload_sentinel.s()
        return chain(task_chain).apply_async()
    elif app == 'uploadS2':
        return sentinel_tasks.upload_sentinel.s(activity).apply_async()

    elif app == 'downloadLC8':
        # We are assuming that collection DN
        collection_lc8 = Collection.query().filter(
            Collection.id == 'LC8SR').first()

        if collection_lc8 is None:
            raise RuntimeError('The collection "LC8SR" not found')

        # Raw chain represents DN publish chain
        raw_data_chain = landsat_tasks.publish_landsat.s()
        # Atm Correction chain
        atm_corr_chain = landsat_tasks.atm_correction_landsat.s()
        # Publish ATM Correction
        publish_atm_chain = landsat_tasks.publish_landsat.s(
        ) | landsat_tasks.upload_landsat.s()

        inner_group = publish_atm_chain

        # Check if will add harmonization chain on group
        if activity['args'].get('harmonize'):
            # Harmonization chain
            harmonize_chain = landsat_tasks.harmonization_landsat.s() | landsat_tasks.publish_landsat.s() | \
                        landsat_tasks.upload_landsat.s()
            inner_group = group(publish_atm_chain, harmonize_chain)

        atm_chain = atm_corr_chain | inner_group
        outer_group = group(raw_data_chain, atm_chain)
        task_chain = landsat_tasks.download_landsat.s(activity) | outer_group

        return chain(task_chain).apply_async()
    elif app == 'correctionLC8':
        # Atm Correction chain
        atm_corr_chain = landsat_tasks.atm_correction_landsat.s(activity)
        # Publish ATM Correction
        publish_atm_chain = landsat_tasks.publish_landsat.s(
        ) | landsat_tasks.upload_landsat.s()

        inner_group = publish_atm_chain

        # Check if will add harmonization chain on group
        if activity['args'].get('harmonize'):
            # Harmonization chain
            harmonize_chain = landsat_tasks.harmonization_landsat.s() | landsat_tasks.publish_landsat.s() | \
                        landsat_tasks.upload_landsat.s()
            inner_group = group(publish_atm_chain, harmonize_chain)

        task_chain = atm_corr_chain | inner_group
        return chain(task_chain).apply_async()
    elif app == 'publishLC8':
        task_chain = landsat_tasks.publish_landsat.s(
            activity) | landsat_tasks.upload_landsat.s()
        return chain(task_chain).apply_async()
    elif app == 'harmonizeLC8':
        task_chain = landsat_tasks.harmonization_landsat.s(activity) | landsat_tasks.publish_landsat.s() | \
                    landsat_tasks.upload_landsat.s()
        return chain(task_chain).apply_async()
    elif app == 'uploadLC8':
        return landsat_tasks.upload_landsat.s(activity).apply_async()
Beispiel #9
0
def publish(self, activity):
    print('==> start PUBLISH')
    services = self.services

    activity['mystart'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')   
    warped_cube = '_'.join(activity['datacube'].split('_')[0:2])

    # Generate quicklooks for CUBES (MEDIAN, STACK ...) 
    qlbands = activity['quicklook'].split(',')
    for function in ['MED', 'STK']:
        cube_id = get_cube_id(activity['datacube'], function)
        general_scene_id = '{}_{}_{}_{}'.format(
            cube_id, activity['tileid'], activity['start'], activity['end'])

        qlfiles = []
        for band in qlbands:
            qlfiles.append(services.prefix + activity['blended'][band][function + 'file'])

        pngname = generateQLook(general_scene_id, qlfiles)
        dirname_ql = activity['dirname'].replace(
            '{}/'.format(warped_cube), '{}/'.format(cube_id))
        if pngname is None:
            print('publish - Error generateQLook for {}'.format(general_scene_id))
            return False
        s3pngname = os.path.join(dirname_ql, '{}_{}'.format(activity['start'], activity['end']), os.path.basename(pngname))
        services.upload_file_S3(pngname, s3pngname, {'ACL': 'public-read'})
        os.remove(pngname)

    # Generate quicklooks for all ARD scenes (WARPED)
    for datedataset in activity['scenes']:
        scene = activity['scenes'][datedataset]

        cube_id = get_cube_id(activity['datacube'])
        general_scene_id = '{}_{}_{}'.format(
            cube_id, activity['tileid'], str(scene['date'])[0:10])
        qlfiles = []
        for band in qlbands:
            filename = os.path.join(services.prefix + activity['dirname'], str(scene['date'])[0:10], scene['ARDfiles'][band])
            qlfiles.append(filename)

        pngname = generateQLook(general_scene_id, qlfiles)
        if pngname is None:
            print('publish - Error generateQLook for {}'.format(general_scene_id))
            return False
        s3pngname = os.path.join(activity['dirname'], str(scene['date'])[0:10], os.path.basename(pngname))
        services.upload_file_S3(pngname, s3pngname, {'ACL': 'public-read'})
        os.remove(pngname)

    # register collection_items and assets in DB (MEDIAN, STACK ...)
    for function in ['MED', 'STK']:
        cube_id = '{}_{}'.format(activity['datacube'], function)
        cube = Collection.query().filter(
            Collection.id == cube_id
        ).first()
        if not cube:
            print('cube {} not found!'.format(cube_id))
            continue

        general_scene_id = '{}_{}_{}_{}'.format(
            cube_id, activity['tileid'], activity['start'], activity['end'])

        # delete collection_items and assets if exists
        assets = Asset.query().filter(
            Asset.collection_item_id == general_scene_id
        ).all()
        for asset in assets:
            db.session().delete(asset)
            db.session().commit()

        coll_item = CollectionItem.query().filter(
            CollectionItem.id == general_scene_id
        ).first()
        if coll_item:
            db.session().delete(coll_item)
            db.session().commit()

        # insert 'collection_item'
        range_date = '{}_{}'.format(activity['start'], activity['end'])
        png_name = '{}.png'.format(general_scene_id)
        dirname_ql = activity['dirname'].replace(
            '{}/'.format(warped_cube), '{}/'.format(cube_id))
        s3_pngname = os.path.join(dirname_ql, range_date, png_name)
        CollectionItem(
            id=general_scene_id,
            collection_id=cube_id,
            grs_schema_id=cube.grs_schema_id,
            tile_id=activity['tileid'],
            item_date=activity['start'],
            composite_start=activity['start'],
            composite_end=activity['end'],
            quicklook='{}/{}'.format(BUCKET_NAME, s3_pngname),
            cloud_cover=activity['cloudratio'],
            scene_type=function,
            compressed_file=None
        ).save()

        # insert 'assets'
        bands_by_cube = Band.query().filter(
            Band.collection_id == cube_id
        ).all()
        for band in activity['bands']:
            if band == 'quality': 
                continue
            band_id = list(filter(lambda b: str(b.common_name) == band, bands_by_cube))
            if not band_id:
                print('band {} not found!'.format(band))
                continue

            Asset(
                collection_id=cube_id,
                band_id=band_id[0].id,
                grs_schema_id=cube.grs_schema_id,
                tile_id=activity['tileid'],
                collection_item_id=general_scene_id,
                url='{}/{}'.format(BUCKET_NAME, activity['blended'][band][function + 'file']),
                source=None,
                raster_size_x=activity['raster_size_x'],
                raster_size_y=activity['raster_size_y'],
                raster_size_t=1,
                chunk_size_x=activity['chunk_size_x'],
                chunk_size_y=activity['chunk_size_y'],
                chunk_size_t=1
            ).save()

    # Register all ARD scenes - WARPED Collection
    for datedataset in activity['scenes']:
        scene = activity['scenes'][datedataset]

        cube_id = get_cube_id(activity['datacube'])
        cube = Collection.query().filter(
            Collection.id == cube_id
        ).first()
        if not cube:
            print('cube {} not found!'.format(cube_id))
            continue

        general_scene_id = '{}_{}_{}'.format(
            cube_id, activity['tileid'], str(scene['date'])[0:10])

        # delete 'assets' and 'collection_items' if exists
        assets = Asset.query().filter(
            Asset.collection_item_id == general_scene_id
        ).all()
        for asset in assets:
            db.session().delete(asset)
            db.session().commit()

        coll_item = CollectionItem.query().filter(
            CollectionItem.id == general_scene_id
        ).first()
        if coll_item:
            db.session().delete(coll_item)
            db.session().commit()

        # insert 'collection_item'
        pngname = '{}.png'.format(general_scene_id)
        s3pngname = os.path.join(activity['dirname'], str(scene['date'])[0:10], pngname)
        CollectionItem(
            id=general_scene_id,
            collection_id=cube_id,
            grs_schema_id=cube.grs_schema_id,
            tile_id=activity['tileid'],
            item_date=scene['date'],
            composite_start=scene['date'],
            composite_end=scene['date'],
            quicklook='{}/{}'.format(BUCKET_NAME, s3pngname),
            cloud_cover=int(scene['cloudratio']),
            scene_type='WARPED',
            compressed_file=None
        ).save()

        # insert 'assets'
        bands_by_cube = Band.query().filter(
            Band.collection_id == cube_id
        ).all()
        for band in activity['bands']:
            if band not in scene['ARDfiles']:
                print('publish - problem - band {} not in scene[files]'.format(band))
                continue
            band_id = list(filter(lambda b: str(b.common_name) == band, bands_by_cube))
            if not band_id:
                print('band {} not found!'.format(band))
                continue
            
            raster_size_x = scene['raster_size_x'] if scene.get('raster_size_x') else activity.get('raster_size_x')
            raster_size_y = scene['raster_size_y'] if scene.get('raster_size_y') else activity.get('raster_size_y')
            block_size = scene['block_size'] if scene.get('block_size') else activity.get('block_size')
            Asset(
                collection_id=cube_id,
                band_id=band_id[0].id,
                grs_schema_id=cube.grs_schema_id,
                tile_id=activity['tileid'],
                collection_item_id=general_scene_id,
                url='{}/{}'.format(BUCKET_NAME, os.path.join(activity['dirname'], str(scene['date'])[0:10], scene['ARDfiles'][band])),
                source=None,
                raster_size_x=raster_size_x,
                raster_size_y=raster_size_y,
                raster_size_t=1,
                chunk_size_x=block_size,
                chunk_size_y=block_size,
                chunk_size_t=1
            ).save()

    # Update status and end time in DynamoDB
    activity['myend'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
    activity['mystatus'] = 'DONE'
    services.put_item_kinesis(activity)

    refresh_materialized_view(db.session, AssetMV.__table__)
    return True
def publish(collection_item: CollectionItem, scene: RadcorActivity):
    """Publish Landsat collection.

    It works with both Digital Number (DN) and Surface Reflectance (SR).

    Args:
        collection_item - Collection Item
        scene - Current Activity
    """
    identifier = scene.sceneid
    cc = identifier.split('_')
    pathrow = cc[2]
    date = cc[3]
    yyyymm = '{}-{}'.format(date[0:4], date[4:6])

    productdir = scene.args.get('file')

    logging.warning('Publish {} - {} (id={})'.format(scene.collection_id,
                                                     productdir, scene.id))

    if productdir and productdir.endswith('.gz'):
        target_dir = Path(
            Config.DATA_DIR) / 'Repository/Archive/{}/{}/{}'.format(
                collection_item.collection_id, yyyymm, pathrow)
        makedirs(target_dir, exist_ok=True)

        productdir = uncompress(productdir, str(target_dir))

    collection = Collection.query().filter(
        Collection.id == collection_item.collection_id).one()
    quicklook = collection.bands_quicklook.split(
        ',') if collection.bands_quicklook else DEFAULT_QUICK_LOOK_BANDS

    files = {}
    qlfiles = {}

    if collection.id == 'LC8DN':
        bands = BAND_MAP_DN
    elif collection.id == 'LC8NBAR':
        bands = BAND_MAP_NBAR
    else:
        bands = BAND_MAP_SR

    for gband, band in bands.items():
        template = productdir + '/LC08_*_{}_{}_*_{}.*'.format(
            pathrow, date, band)
        fs = glob.glob(template)

        if not fs:
            continue

        for f in fs:
            if f.lower().endswith('.tif'):
                files[gband] = f
                if gband in quicklook:
                    qlfiles[gband] = f

    # Skip EVI/NDVI generation for Surface Reflectance
    # since the espa-science already done
    if collection.id == 'LC8DN' or collection.id == 'LC8NBAR':
        generate_vi(productdir, files)

    # Apply valid range and Cog files
    for band, file_path in files.items():
        if collection.id == 'LC8SR':
            _ = apply_valid_range(file_path, file_path)
        # Set destination of COG file
        files[band] = generate_cogs(file_path, file_path)
        if not is_valid_tif(file_path):
            raise RuntimeError('Not Valid {}'.format(file_path))

    # Extract basic scene information and build the quicklook
    pngname = productdir + '/{}.png'.format(identifier)

    dataset = GDALOpen(qlfiles['nir'], GA_ReadOnly)
    numlin = 768
    numcol = int(
        float(dataset.RasterXSize) / float(dataset.RasterYSize) * numlin)
    image = numpy.zeros((
        numlin,
        numcol,
        len(qlfiles),
    ), dtype=numpy.uint8)

    del dataset

    nb = 0
    for band in quicklook:
        template = qlfiles[band]
        dataset = GDALOpen(template, GA_ReadOnly)
        raster = dataset.GetRasterBand(1).ReadAsArray(0, 0,
                                                      dataset.RasterXSize,
                                                      dataset.RasterYSize)

        del dataset

        raster = resize(raster, (numlin, numcol), order=1, preserve_range=True)
        nodata = raster == -9999
        # Evaluate minimum and maximum values
        a = numpy.array(raster.flatten())
        p1, p99 = numpy.percentile(a[a > 0], (1, 99))
        # Convert minimum and maximum values to 1,255 - 0 is nodata
        raster = exposure.rescale_intensity(raster,
                                            in_range=(p1, p99),
                                            out_range=(1, 255)).astype(
                                                numpy.uint8)
        image[:, :, nb] = raster.astype(numpy.uint8) * numpy.invert(nodata)
        nb += 1

    write_png(pngname, image, transparent=(0, 0, 0))

    productdir = productdir.replace(Config.DATA_DIR, '')

    assets_to_upload = {
        'quicklook':
        dict(file=pngname, asset=productdir.replace('/Repository/Archive', ''))
    }

    for instance in ['local', 'aws']:
        engine_instance = {'local': db, 'aws': db_aws}
        engine = engine_instance[instance]

        # Skip catalog on aws for digital number
        if collection_item.collection_id == 'LC8DN' and instance == 'aws':
            continue

        if instance == 'aws':
            asset_url = productdir.replace('/Repository/Archive',
                                           Config.AWS_BUCKET_NAME)
        else:
            asset_url = productdir

        pngname = resource_path.join(asset_url, Path(pngname).name)

        assets_to_upload['quicklook']['asset'] = pngname

        with engine.session.begin_nested():
            with engine.session.no_autoflush:
                # Add collection item to the session if not present
                if collection_item not in engine.session:
                    item = engine.session.query(CollectionItem).filter(
                        CollectionItem.id == collection_item.id).first()

                    if not item:
                        cloned_properties = CollectionItemForm().dump(
                            collection_item)
                        collection_item = CollectionItem(**cloned_properties)
                        engine.session.add(collection_item)

                collection_item.quicklook = pngname

                collection_bands = engine.session.query(Band).filter(
                    Band.collection_id == collection_item.collection_id).all()

                # Inserting data into Product table
                for band in files:
                    template = resource_path.join(asset_url,
                                                  Path(files[band]).name)

                    dataset = GDALOpen(files[band], GA_ReadOnly)
                    asset_band = dataset.GetRasterBand(1)

                    chunk_x, chunk_y = asset_band.GetBlockSize()

                    band_model = next(
                        filter(lambda b: band == b.common_name,
                               collection_bands), None)

                    if not band_model:
                        logging.warning(
                            'Band {} of collection {} not found in database. Skipping...'
                            .format(band, collection_item.collection_id))
                        continue

                    defaults = dict(url=template,
                                    source=cc[0],
                                    raster_size_x=dataset.RasterXSize,
                                    raster_size_y=dataset.RasterYSize,
                                    raster_size_t=1,
                                    chunk_size_t=1,
                                    chunk_size_x=chunk_x,
                                    chunk_size_y=chunk_y)

                    asset, _ = get_or_create_model(
                        Asset,
                        engine=engine,
                        defaults=defaults,
                        collection_id=scene.collection_id,
                        band_id=band_model.id,
                        grs_schema_id=scene.collection.grs_schema_id,
                        tile_id=collection_item.tile_id,
                        collection_item_id=collection_item.id,
                    )
                    asset.url = defaults['url']

                    assets_to_upload[band] = dict(file=files[band],
                                                  asset=asset.url)

                    # Add into scope of local and remote database
                    add_instance(engine, asset)

            # Persist database
        commit(engine)

    return assets_to_upload
Beispiel #11
0
def publish(collection_item: CollectionItem, scene: RadcorActivity):
    """Publish Landsat collection.

    It works with both Digital Number (DN) and Surface Reflectance (SR).

    Args:
        collection_item - Collection Item
        scene - Current Activity
    """
    identifier = scene.sceneid

    # Get collection level to publish. Default is l1
    collection_level = scene.args.get('level') or 1

    landsat_scene = factory.get_from_sceneid(identifier,
                                             level=collection_level)

    productdir = scene.args.get('file')

    logging.warning('Publish {} - {} (id={})'.format(scene.collection_id,
                                                     productdir, scene.id))

    if productdir and productdir.endswith('.gz'):
        target_dir = landsat_scene.path()
        makedirs(target_dir, exist_ok=True)

        productdir = uncompress(productdir, str(target_dir))

    collection = Collection.query().filter(
        Collection.id == collection_item.collection_id).one()
    quicklook = collection.bands_quicklook.split(
        ',') if collection.bands_quicklook else DEFAULT_QUICK_LOOK_BANDS

    files = {}
    qlfiles = {}

    bands = landsat_scene.get_band_map()

    for gband, band in bands.items():
        fs = landsat_scene.get_files()

        if not fs:
            continue

        for f in fs:
            if f.stem.endswith(band) and f.suffix.lower().endswith('.tif'):
                files[gband] = f
                if gband in quicklook:
                    qlfiles[gband] = str(f)

    # Generate Vegetation Index files
    generate_vi(productdir, files)

    # Apply valid range and Cog files
    for band, file_path in files.items():
        tif_file = str(file_path)

        if landsat_scene.level == 2:
            _ = apply_valid_range(tif_file, tif_file)

        # Set destination of COG file
        files[band] = generate_cogs(tif_file, tif_file)
        if not is_valid_tif(tif_file):
            raise RuntimeError('Not Valid {}'.format(tif_file))

    # Extract basic scene information and build the quicklook
    pngname = productdir + '/{}.png'.format(identifier)

    dataset = GDALOpen(qlfiles['nir'], GA_ReadOnly)
    numlin = 768
    numcol = int(
        float(dataset.RasterXSize) / float(dataset.RasterYSize) * numlin)
    del dataset

    create_quick_look(pngname,
                      [qlfiles[band] for band in quicklook if band in qlfiles],
                      rows=numlin,
                      cols=numcol)

    productdir = productdir.replace(Config.DATA_DIR, '')

    assets_to_upload = {
        'quicklook':
        dict(file=pngname, asset=productdir.replace('/Repository/Archive', ''))
    }

    for instance in ['local', 'aws']:
        engine_instance = {'local': db, 'aws': db_aws}
        engine = engine_instance[instance]

        # Skip catalog on aws for digital number
        if landsat_scene.level == 1 and instance == 'aws':
            continue

        if instance == 'aws':
            asset_url = productdir.replace('/Repository/Archive',
                                           Config.AWS_BUCKET_NAME)
        else:
            asset_url = productdir

        pngname = resource_path.join(asset_url, Path(pngname).name)

        assets_to_upload['quicklook']['asset'] = pngname

        with engine.session.begin_nested():
            with engine.session.no_autoflush:
                # Add collection item to the session if not present
                if collection_item not in engine.session:
                    item = engine.session.query(CollectionItem).filter(
                        CollectionItem.id == collection_item.id).first()

                    if not item:
                        cloned_properties = CollectionItemForm().dump(
                            collection_item)
                        collection_item = CollectionItem(**cloned_properties)
                        engine.session.add(collection_item)

                collection_item.quicklook = pngname

                collection_bands = engine.session.query(Band)\
                    .filter(Band.collection_id == collection_item.collection_id)\
                    .all()

                # Inserting data into Product table
                for band in files:
                    template = resource_path.join(asset_url,
                                                  Path(files[band]).name)

                    dataset = GDALOpen(files[band], GA_ReadOnly)
                    asset_band = dataset.GetRasterBand(1)

                    chunk_x, chunk_y = asset_band.GetBlockSize()

                    band_model = next(
                        filter(lambda b: band == b.common_name,
                               collection_bands), None)

                    if not band_model:
                        logging.warning(
                            'Band {} of collection {} not found in database. Skipping...'
                            .format(band, collection_item.collection_id))
                        continue

                    defaults = dict(url=template,
                                    source=landsat_scene.source(),
                                    raster_size_x=dataset.RasterXSize,
                                    raster_size_y=dataset.RasterYSize,
                                    raster_size_t=1,
                                    chunk_size_t=1,
                                    chunk_size_x=chunk_x,
                                    chunk_size_y=chunk_y)

                    asset, _ = get_or_create_model(
                        Asset,
                        engine=engine,
                        defaults=defaults,
                        collection_id=scene.collection_id,
                        band_id=band_model.id,
                        grs_schema_id=scene.collection.grs_schema_id,
                        tile_id=collection_item.tile_id,
                        collection_item_id=collection_item.id,
                    )
                    asset.url = defaults['url']

                    assets_to_upload[band] = dict(file=files[band],
                                                  asset=asset.url)

                    # Add into scope of local and remote database
                    add_instance(engine, asset)

        # Persist database
        commit(engine)

    return assets_to_upload
Beispiel #12
0
    def list_cubes(cls):
        """Retrieve the list of data cubes from Brazil Data Cube database."""
        cubes = Collection.query().filter(Collection.is_cube.is_(True)).all()

        return [Serializer.serialize(cube) for cube in cubes], 200
Beispiel #13
0
    def create_cube(self, params):
        params['composite_function_list'] = ['IDENTITY', 'STK', 'MED']

        # generate cubes metadata
        cubes_db = Collection.query().filter().all()
        cubes = []
        cubes_serealized = []
        for composite_function in params['composite_function_list']:
            c_function_id = composite_function.upper()
            raster_size_id = '{}-{}'.format(params['grs'],
                                            int(params['resolution']))
            cube_id = get_cube_id(params['datacube'], c_function_id)

            # add cube
            if not list(filter(lambda x: x.id == cube_id, cubes)) and not list(
                    filter(lambda x: x.id == cube_id, cubes_db)):
                cube = Collection(
                    id=cube_id,
                    temporal_composition_schema_id=params['temporal_schema']
                    if c_function_id.upper() != 'IDENTITY' else 'Anull',
                    raster_size_schema_id=raster_size_id,
                    composite_function_schema_id=c_function_id,
                    grs_schema_id=params['grs'],
                    description=params['description'],
                    radiometric_processing=None,
                    geometry_processing=None,
                    sensor=None,
                    is_cube=True,
                    oauth_scope=params.get('oauth_scope', None),
                    license=params['license'],
                    bands_quicklook=','.join(params['bands_quicklook']),
                    metadata=params['metadata'])
                cubes.append(cube)
                cubes_serealized.append(Serializer.serialize(cube))
        BaseModel.save_all(cubes)

        bands = []
        for cube in cubes:
            # save bands
            for band in params['bands']:
                band = band.strip()

                if (band == 'cnc' and cube.composite_function_schema_id == 'IDENTITY') or \
                    (band =='quality' and cube.composite_function_schema_id != 'IDENTITY'):
                    continue

                is_not_cloud = band != 'quality' and band != 'cnc'
                bands.append(
                    Band(name=band,
                         collection_id=cube.id,
                         min=0 if is_not_cloud else 0,
                         max=10000 if is_not_cloud else 255,
                         fill=-9999 if is_not_cloud else 0,
                         scale=0.0001 if is_not_cloud else 1,
                         data_type='int16' if is_not_cloud else 'Uint16',
                         common_name=band,
                         resolution_x=params['resolution'],
                         resolution_y=params['resolution'],
                         resolution_unit='m',
                         description='',
                         mime_type='image/tiff'))
        BaseModel.save_all(bands)

        return cubes_serealized, 201
    def create(cls, params: dict):
        """Create and persist datacube on database."""
        params['composite_function_list'] = ['IDENTITY', 'STK', 'MED']

        # generate cubes metadata
        cubes_db = Collection.query().filter().all()
        cubes = []
        cubes_serealized = []

        for composite_function in params['composite_function_list']:
            c_function_id = composite_function.upper()

            cube_id = get_cube_id(params['datacube'], c_function_id)

            raster_size_id = '{}-{}'.format(params['grs'],
                                            int(params['resolution']))

            temporal_composition = params[
                'temporal_schema'] if c_function_id.upper(
                ) != 'IDENTITY' else 'Anull'

            # add cube
            if not list(filter(lambda x: x.id == cube_id, cubes)) and not list(
                    filter(lambda x: x.id == cube_id, cubes_db)):
                cube = Collection(
                    id=cube_id,
                    temporal_composition_schema_id=temporal_composition,
                    raster_size_schema_id=raster_size_id,
                    composite_function_schema_id=c_function_id,
                    grs_schema_id=params['grs'],
                    description=params['description'],
                    radiometric_processing=None,
                    geometry_processing=None,
                    sensor=None,
                    is_cube=True,
                    oauth_scope=params.get('oauth_scope', None),
                    bands_quicklook=','.join(params['bands_quicklook']),
                    license=params.get('license'))

                cubes.append(cube)
                cubes_serealized.append(CollectionForm().dump(cube))

        BaseModel.save_all(cubes)

        bands = []

        for cube in cubes:
            fragments = get_cube_parts(cube.id)

            # A IDENTITY data cube is composed by CollectionName and Resolution (LC8_30, S2_10)
            is_identity = len(fragments) == 2

            # save bands
            for band in params['bands']:
                # Skip creation of band CNC for IDENTITY data cube
                # or band quality for composite data cube
                if (band == 'cnc' and is_identity) or (band == 'quality'
                                                       and not is_identity):
                    continue

                is_not_cloud = band != 'quality' and band != 'cnc'

                band = band.strip()
                bands.append(
                    Band(name=band,
                         collection_id=cube.id,
                         min=0 if is_not_cloud else 0,
                         max=10000 if is_not_cloud else 255,
                         fill=-9999 if is_not_cloud else 0,
                         scale=0.0001 if is_not_cloud else 1,
                         data_type='int16' if is_not_cloud else 'Uint16',
                         common_name=band,
                         resolution_x=params['resolution'],
                         resolution_y=params['resolution'],
                         resolution_unit='m',
                         description='',
                         mime_type='image/tiff'))

        BaseModel.save_all(bands)

        return cubes_serealized, 201
Beispiel #15
0
    def orchestrate(self):
        """Orchestrate datacube defintion and prepare temporal resolutions."""
        self.datacube = Collection.query().filter(
            Collection.id == self.params['datacube']).one()

        temporal_schema = self.datacube.temporal_composition_schema.temporal_schema
        temporal_step = self.datacube.temporal_composition_schema.temporal_composite_t

        # Create tiles
        self.create_tiles(self.params['tiles'], self.datacube)

        cube_start_date = self.params['start_date']

        dstart = self.params['start_date']
        dend = self.params['end_date']

        if cube_start_date is None:
            cube_start_date = dstart.strftime('%Y-%m-%d')

        cube_end_date = dend.strftime('%Y-%m-%d')

        periodlist = decode_periods(temporal_schema, cube_start_date,
                                    cube_end_date, int(temporal_step))

        where = [Tile.grs_schema_id == self.datacube.grs_schema_id]

        if self.params.get('tiles'):
            where.append(Tile.id.in_(self.params['tiles']))

        self.tiles = Tile.query().filter(*where).all()

        self.bands = Band.query().filter(
            Band.collection_id == self.warped_datacube.id).all()

        number_cols = int(self.datacube.raster_size_schemas.raster_size_x)
        number_rows = int(self.datacube.raster_size_schemas.raster_size_y)

        for tile in self.tiles:
            self.mosaics[tile.id] = dict(periods=dict())

            for datekey in sorted(periodlist):
                requested_period = periodlist[datekey]
                for periodkey in requested_period:
                    _, startdate, enddate = periodkey.split('_')

                    if dstart is not None and startdate < dstart.strftime(
                            '%Y-%m-%d'):
                        continue
                    if dend is not None and enddate > dend.strftime(
                            '%Y-%m-%d'):
                        continue

                    self.mosaics[tile.id]['periods'][periodkey] = {}
                    self.mosaics[
                        tile.id]['periods'][periodkey]['start'] = startdate
                    self.mosaics[
                        tile.id]['periods'][periodkey]['end'] = enddate
                    self.mosaics[
                        tile.id]['periods'][periodkey]['cols'] = number_cols
                    self.mosaics[
                        tile.id]['periods'][periodkey]['rows'] = number_rows
                    self.mosaics[tile.id]['periods'][periodkey][
                        'dirname'] = '{}/{}/{}-{}/'.format(
                            self.datacube.id, tile.id, startdate, enddate)
 def get_collection(self, activity) -> Collection:
     """Retrieve the collection associated with Builder Activity."""
     return Collection.query().filter(
         Collection.id == activity.collection_id).one()
Beispiel #17
0
    def warped_datacube(self):
        datacube_warped = '{}WARPED'.format(self.datacube.id[:-3])

        return Collection.query().filter(
            Collection.id == datacube_warped).first()
Beispiel #18
0
    def orchestrate(self):
        self.datacube = Collection.query().filter(
            Collection.id == self.params['datacube']).one()

        temporal_schema = self.datacube.temporal_composition_schema.temporal_schema
        temporal_step = self.datacube.temporal_composition_schema.temporal_composite_t

        # Create tiles
        self.create_tiles(self.params['tiles'], self.datacube)

        # TODO: Check in STAC for cube item
        # datacube_stac = stac_cli.collection(self.datacube.id)

        collections_items = CollectionItem.query().filter(
            CollectionItem.collection_id == self.datacube.id,
            CollectionItem.grs_schema_id ==
            self.datacube.grs_schema_id).order_by(
                CollectionItem.composite_start).all()
        cube_start_date = self.params['start_date']
        if list(
                filter(lambda c_i: c_i.tile_id == self.params['tiles'][0],
                       collections_items)):
            cube_start_date = collections_items[0].composite_start

        dstart = self.params['start_date']
        dend = self.params['end_date']

        if cube_start_date is None:
            cube_start_date = dstart.strftime('%Y-%m-%d')

        cube_end_date = dend.strftime('%Y-%m-%d')

        periodlist = decode_periods(temporal_schema, cube_start_date,
                                    cube_end_date, int(temporal_step))

        where = [Tile.grs_schema_id == self.datacube.grs_schema_id]

        if self.params.get('tiles'):
            where.append(Tile.id.in_(self.params['tiles']))

        self.tiles = Tile.query().filter(*where).all()

        self.bands = Band.query().filter(
            Band.collection_id == self.datacube.id).all()

        number_cols = self.datacube.raster_size_schemas.raster_size_x
        number_rows = self.datacube.raster_size_schemas.raster_size_y

        for tile in self.tiles:
            self.mosaics[tile.id] = dict(periods=dict())

            for datekey in sorted(periodlist):
                requested_period = periodlist[datekey]
                for periodkey in requested_period:
                    _, startdate, enddate = periodkey.split('_')

                    if dstart is not None and startdate < dstart.strftime(
                            '%Y-%m-%d'):
                        continue
                    if dend is not None and enddate > dend.strftime(
                            '%Y-%m-%d'):
                        continue

                    self.mosaics[tile.id]['periods'][periodkey] = {}
                    self.mosaics[
                        tile.id]['periods'][periodkey]['start'] = startdate
                    self.mosaics[
                        tile.id]['periods'][periodkey]['end'] = enddate
                    self.mosaics[
                        tile.id]['periods'][periodkey]['cols'] = number_cols
                    self.mosaics[
                        tile.id]['periods'][periodkey]['rows'] = number_rows
                    self.mosaics[tile.id]['periods'][periodkey][
                        'dirname'] = '{}/{}/{}-{}/'.format(
                            self.datacube.id, tile.id, startdate, enddate)