def publish(collection_item: Item, scene: RadcorActivity, skip_l1=False, **kwargs): """Publish Landsat collection. It works with both Digital Number (DN) and Surface Reflectance (SR). Args: collection_item - Collection Item scene - Current Activity """ identifier = scene.sceneid # Get collection level to publish. Default is l1 collection_level = scene.args.get('level') or 1 landsat_scene = factory.get_from_sceneid(identifier, level=collection_level) productdir = scene.args.get('file') logging.warning('Publish {} - {} (id={})'.format(scene.collection_id, productdir, scene.id)) if productdir and productdir.endswith('.gz'): target_dir = landsat_scene.path() makedirs(target_dir, exist_ok=True) productdir = uncompress(productdir, str(target_dir)) collection = Collection.query().filter( Collection.id == collection_item.collection_id).one() quicklook = Quicklook.query().filter( Quicklook.collection_id == collection.id).all() if quicklook: quicklook_bands = Band.query().filter( Band.id.in_(quicklook.red, quicklook.green, quicklook.blue)).all() quicklook = [ quicklook_bands[0].name, quicklook_bands[1].name, quicklook_bands[2].name ] else: quicklook = DEFAULT_QUICK_LOOK_BANDS files = {} qlfiles = {} bands = landsat_scene.get_band_map() for gband, band in bands.items(): fs = landsat_scene.get_files() if not fs: continue for f in fs: if f.stem.endswith(band) and f.suffix.lower().endswith('.tif'): files[gband] = f if gband in quicklook: qlfiles[gband] = str(f) # Generate Vegetation Index files generate_vi(productdir, files) # Apply valid range and Cog files for band, file_path in files.items(): tif_file = str(file_path) if landsat_scene.level == 2: _ = apply_valid_range(tif_file, tif_file) # Set destination of COG file files[band] = generate_cogs(tif_file, tif_file) if not is_valid_tif(tif_file): raise RuntimeError('Not Valid {}'.format(tif_file)) # Extract basic scene information and build the quicklook pngname = productdir + '/{}.png'.format(identifier) dataset = GDALOpen(qlfiles['nir'], GA_ReadOnly) numlin = 768 numcol = int( float(dataset.RasterXSize) / float(dataset.RasterYSize) * numlin) del dataset create_quick_look(pngname, [qlfiles[band] for band in quicklook if band in qlfiles], rows=numlin, cols=numcol) productdir = productdir.replace(Config.DATA_DIR, '') assets_to_upload = { 'quicklook': dict(file=pngname, asset=productdir.replace('/Repository/Archive', '')) } for instance in ['local', 'aws']: engine_instance = {'local': db, 'aws': db_aws} engine = engine_instance[instance] # Skip catalog on aws for digital number if landsat_scene.level == 1 and instance == 'aws': continue if instance == 'aws': if Config.DISABLE_PUBLISH_SECOND_DB: logging.info('Skipping publish in second db.') continue asset_url = productdir.replace('/Repository/Archive', Config.AWS_BUCKET_NAME) else: asset_url = productdir pngname_relative = resource_path.join(asset_url, Path(pngname).name) assets_to_upload['quicklook']['asset'] = pngname_relative with engine.session.begin_nested(): with engine.session.no_autoflush: # Add collection item to the session if not present if collection_item not in engine.session: item = engine.session.query(Item).filter( Item.name == collection_item.name, Item.collection_id == collection_item.collection_id).first() if not item: cloned_properties = CollectionItemForm().dump( collection_item) collection_item = Item(**cloned_properties) engine.session.add(collection_item) collection_bands = engine.session.query(Band)\ .filter(Band.collection_id == collection_item.collection_id)\ .all() assets = dict(thumbnail=create_asset_definition( str(pngname_relative), 'image/png', ['thumbnail'], str(pngname))) geom = min_convex_hull = None # Inserting data into Product table for band in files: template = resource_path.join(asset_url, Path(files[band]).name) band_model = next( filter(lambda b: band == b.common_name, collection_bands), None) if not band_model: logging.warning( 'Band {} of collection {} not found in database. Skipping...' .format(band, collection_item.collection_id)) continue if geom is None: geom = raster_extent(files[band]) min_convex_hull = raster_convexhull(files[band]) assets[band_model.name] = create_asset_definition( template, COG_MIME_TYPE, ['data'], files[band], is_raster=True) assets_to_upload[band] = dict(file=files[band], asset=template) collection_item.assets = assets collection_item.geom = from_shape(geom, srid=4326) collection_item.min_convex_hull = from_shape(min_convex_hull, srid=4326) # Add into scope of local and remote database add_instance(engine, collection_item) # Persist database commit(engine) return assets_to_upload
def publish(collection_item: Item, scene: RadcorActivity, skip_l1=False, **kwargs): """Publish Sentinel collection. It works with both L1C and L2A. Args: collection_item - Collection Item scene - Current Activity """ qlband = 'TCI' # Get collection level to publish. Default is l1 # TODO: Check in database the scenes level 2 already published. We must set to level 2 collection_level = scene.args.get('level') or 1 if collection_level == 1 and skip_l1: logging.info( f'Skipping publish skip_l1={skip_l1} L1 - {collection_item.collection_id}' ) return dict() sentinel_scene = factory.get_from_sceneid(scene.sceneid, level=collection_level) harmonized_scene = factory.get_from_sceneid(scene.sceneid, level=3) product_uri = sentinel_scene.path() product_uri.mkdir(parents=True, exist_ok=True) band_map = sentinel_scene.get_band_map() if scene.collection_id == harmonized_scene.id: # Retrieves all tif files from scene tiffiles = get_tif_files(scene) # Find the desired files to be published and put then in files bands = [] files = {} for tiffile in sorted(tiffiles): filename = os.path.basename(tiffile) parts = filename.split('_') band = parts[2][:-4] # Select removing .tif extension if band not in bands and band in band_map.keys(): bands.append(band) files[band_map[band]] = tiffile logging.warning('Publish {} - {} (id={}, tiffiles={})'.format( scene.collection_id, scene.args.get('file'), scene.id, len(tiffiles))) # Define filenames for products parts = os.path.basename(tiffiles[0]).split('_') file_basename = '_'.join(parts[:-1]) pngname = os.path.join(scene.args.get('file'), file_basename + '.png') copy(pngname, str(product_uri)) else: # Retrieves all jp2 files from scene if sentinel_scene.level == 1: files_list = get_jp2_files(scene) else: files_list = sentinel_scene.get_files() # Find the desired files to be published and put then in files bands = [] files = {} for file in sorted(files_list): filename = Path(file).stem parts = filename.split('_') if len(parts) in (3, 8): band = parts[-1] else: band = '_'.join(parts[-2:]) if band not in bands and band in band_map.keys(): bands.append(band) files[band_map[band]] = str(file) elif band == qlband: files['qlfile'] = str(file) logging.warning('Publish {} - {} (id={}, files={})'.format( scene.collection_id, scene.args.get('file'), scene.id, len(files))) if len(files.keys()) == 0: raise RuntimeError('No files found for {} - {}'.format( scene.sceneid, str(product_uri))) # Retrieve a file name and use as reference for the Vegetation Index files file_name = Path(files.get('quality', list(files.values())[0])).name file_basename = '_'.join(file_name.split('_')[:-1]) # Create vegetation index generate_vi(file_basename, str(product_uri), files) bands.append('NDVI') bands.append('EVI') band_map['NDVI'] = 'ndvi' band_map['EVI'] = 'evi' for sband in bands: band = band_map[sband] file = files[band] # Set destination of COG file cog_file_name = '{}_{}.tif'.format(file_basename, sband) cog_file_path = product_uri / cog_file_name files[band] = generate_cogs(str(file), str(cog_file_path)) if not is_valid_tif(cog_file_path): raise RuntimeError('Not Valid {}'.format(cog_file_path)) assets_to_upload = {} for instance in ['local', 'aws']: engine_instance = {'local': db, 'aws': db_aws} engine = engine_instance[instance] # Skip catalog on aws for digital number if sentinel_scene.level == 1 and instance == 'aws': continue base_file_prefix = 'Repository/Archive' if instance == 'aws': if Config.DISABLE_PUBLISH_SECOND_DB: logging.info('Skipping publish in second db.') continue asset_url = Config.AWS_BUCKET_NAME / (product_uri.relative_to( Path(Config.DATA_DIR) / base_file_prefix)) else: asset_url = Path( Config.ITEM_ASSET_PREFIX) / product_uri.relative_to( Path(Config.DATA_DIR) / base_file_prefix) collection_bands = engine.session.query(Band).filter( Band.collection_id == scene.collection_id).all() with engine.session.begin_nested(): with engine.session.no_autoflush: # Add collection item to the session if not present if collection_item not in engine.session: item = engine.session.query(Item).filter( Item.name == collection_item.name, Item.collection_id == collection_item.collection_id).first() if not item: cloned_properties = CollectionItemForm().dump( collection_item) cloned_item = Item(**cloned_properties) engine.session.add(cloned_item) assets = dict() # Create Qlook file pngname = product_uri / '{}.png'.format(file_basename) if not pngname.exists(): # When TCI band found, use it to generate quicklook if files.get('qlfile'): create_quick_look_from_tci(str(pngname), files['qlfile']) else: create_quick_look( str(pngname), [files['red'], files['green'], files['blue']]) normalized_quicklook_path = os.path.normpath('{}/{}'.format( str(asset_url), os.path.basename(pngname.name))) assets_to_upload['quicklook'] = dict( asset=str(normalized_quicklook_path), file=str(pngname)) assets['thumbnail'] = create_asset_definition( str(normalized_quicklook_path), 'image/png', ['thumbnail'], str(pngname)) geom = min_convex_hull = None # Convert original format to COG for sband in bands: # Set destination of COG file cog_file_name = '{}_{}.tif'.format(file_basename, sband) cog_file_path = product_uri / cog_file_name band_model = next( filter(lambda b: b.name == sband, collection_bands), None) if band_model is None: logging.warning( 'Band {} not registered on database. Skipping'. format(sband)) continue if geom is None: geom = raster_extent(cog_file_path) min_convex_hull = raster_convexhull(cog_file_path) assets[band_model.name] = create_asset_definition( f'{str(asset_url)}/{cog_file_name}', COG_MIME_TYPE, ['data'], cog_file_path, is_raster=True) assets_to_upload[sband] = (dict( file=str(cog_file_path), asset=assets[band_model.name]['href'])) collection_item.geom = from_shape(geom, srid=4326) collection_item.min_convex_hull = from_shape(min_convex_hull, srid=4326) collection_item.assets = assets commit(engine) return assets_to_upload