def create_collections(collections): dg_collection = Collection.open( os.path.join(root_url, 'DGOpenData', 'catalog.json')) # Create collections if not exist current_cat_names = [ x.split('/')[-2] for x in dg_collection.links(rel='child') ] out_d = {} for coll in collections: if coll['id'] not in current_cat_names: print("Creating new collection: {}".format(coll['id'])) new_coll = Collection(coll) dg_collection.add_catalog(new_coll) out_d.update({coll['id']: new_coll}) dg_collection.save() else: print("Opening existing collection: {}".format(coll['id'])) out_d.update({ coll['id']: Collection.open( os.path.join(root_url, 'DGOpenData', coll['id'], 'catalog.json')) }) return out_d
def populateDatabase(): for item in col.items(): print(item) #item2 = Item.open(item) #print(item2) row = item.properties['eo:row'] column = item.properties['eo:column'] data = item.properties['datetime'] data = data[0:10] id = str(item) url = f'https://landsat-stac.s3.amazonaws.com/landsat-8-l1/{column}/{row}/{data}/{id}.json' test = Collection.open(url) test.save('mycat/catalog' + str(i) + '.json') with open('mycat/catalog' + str(i) + '.json') as json_file: test2 = json.load(json_file) test2['_id'] = test2['id'] del test2['id'] del test2['assets'] with open('mycat/catalog' + str(i) + '.json', 'w') as outfile: json.dump(test2, outfile) with open('mycat/catalog' + str(i) + '.json') as f: file_data = json.load(f) collection_currency.insert(file_data) print(url) i = i + 1 client.close()
def find_items(collection_name, sensor_name=None): col = Collection.open( os.path.join(root_url, 'DGOpenData', collection_name, 'catalog.json')) for item in col.items(): if sensor_name: if 'eo:platform' in item.properties: if item.properties['eo:platform'] == sensor_name: yield item else: yield item
def test_add_item(self): cat = Catalog.create(root='http://my.cat').save( os.path.join(self.path, 'catalog.json')) col = Collection.open( os.path.join(testpath, 'catalog/eo/landsat-8-l1/catalog.json')) cat.add_catalog(col) item = Item.open( os.path.join(testpath, 'catalog/eo/landsat-8-l1/item.json')) col.add_item(item) assert (item.parent().id == 'landsat-8-l1')
def create_project_collections(self, projects, ds_name): with open(projects, 'r') as geoj: data = json.load(geoj) for feat in data['features']: if feat['extent']['temporal'][0]: year = feat['extent']['temporal'][0].split('-')[0] year_cat = Catalog.open( os.path.join(self.root, ds_name, year, 'catalog.json')) coll = Collection(feat) year_cat.add_catalog(coll) year_cat.save()
def test_add_item_with_subcatalogs(self): cat = Catalog.create(root='http://my.cat').save( os.path.join(self.path, 'test_subcatalogs.json')) col = Collection.open( os.path.join(testpath, 'catalog/eo/landsat-8-l1/catalog.json')) cat.add_catalog(col) item = Item.open( os.path.join(testpath, 'catalog/eo/landsat-8-l1/item.json')) col.add_item(item, path='${landsat:path}/${landsat:row}/${date}') assert (item.root().id == cat.id) assert (item.collection().id == col.id) # test code using existing catalogs col.add_item(item, '${landsat:path}/${landsat:row}/${date}') assert (item.root().id == cat.id)
def toSTAC(self, **kwargs) -> Collection: root_collection = Collection.create( id=self.collectionId, root='https://hpda.{self.collectionId}', **kwargs) collection_root_path = os.path.join(self.collectionDir, "root.json") root_collection.save(collection_root_path) for agg in self.aggs.values(): sub_collection = agg.toSTAC(self.collectionDir) root_collection.add_collection(sub_collection) for fileRec in agg.fileRecs: item: Item = fileRec.toSTAC(self.collectionDir) sub_collection.add_item(item) print(f"Saving item to {item.filename}") item.save(item.filename) return root_collection
def lambda_handler(event, context): logger.info('Event: %s' % json.dumps(event)) collection = Collection.open( 'https://sentinel-stac.s3.amazonaws.com/sentinel-2-l1c/catalog.json') msg = json.loads(event['Records'][0]['Sns']['Message']) logger.debug('Message: %s' % json.dumps(msg)) for m in msg['tiles']: url = op.join(SETTINGS['roda_url'], m['path'], 'tileInfo.json') metadata = read_remote(url) logger.debug('Metadata: %s' % json.dumps(metadata)) # transform to STAC item = transform(metadata) logger.info('Item: %s' % json.dumps(item.data)) #collection.add_item(item, path=SETTINGS['path_pattern'], filename=SETTINGS['fname_pattern']) #logger.info('Added %s as %s' % (item, item.filename)) client.publish(TopicArn=sns_arn, Message=json.dumps(item.data)) logger.info('Published to %s' % sns_arn)
def toSTAC(self, collectionDir: str) -> Collection: dataset = Dataset(self.filePath()) dims = {name: dim.size for name, dim in dataset.dimensions.items()} aid = os.path.dirname(self.base) stacFile = os.path.join(collectionDir, f"{aid}.json") print(f"Generating STAC spec for agg {aid} at '{stacFile}'") metadata = dict( id=aid, description="", location=self.base, extent="", properties=dict(nFiles=self.nFiles, nTs=self.nTs, paths=self.paths, vars=self.vars, dims=dims), ) collection = Collection(data=metadata, filename=stacFile) return collection
def main(items=None, fetch=None, save=None, **kwargs): """ Main function for performing a search """ _save = save if items is None else None items = satsearch(items, save=_save, **kwargs) # if not downloading nothing more to do if fetch is None: return # check that there is a valid geometry for clipping feature = items._search.get('parameters', {}).get('intersects', None) if feature is None: raise Exception('No geometry provided') derived_items = [] # for each date, combine scenes for date in items.dates(): print('Processing files for %s' % date) _items = [s for s in items if s.date == date] # TODO - split out by user specified metadata (e.g., platform, collection) item = satfetch(_items, feature['geometry'], fetch) derived_items.append(item) # this needs update to sat-stac to support adding metadata to Items # see https://github.com/sat-utils/sat-stac/issues/39 #props = { # 'software': 'sat-fetch v%s' % __version__ #} col = Collection.create() col._data['id'] = 'sat-fetch' col._data['description'] = 'Fetch items created by sat-fetch' col._data['links'].append({ 'rel': 'about', 'href': 'https://github.com/sat-utils/sat-fetch' }) derived_items = ItemCollection(derived_items, collections=[col]) if save is not None: derived_items.save(save) return derived_items
def update_collection(event, context): collection_root = os.getenv('COLLECTION_ROOT') path = os.getenv('PATH') filename = os.getenv('FILENAME') item_count = len(event['Records']) stac_links = [] for record in event['Records']: stac_item = json.loads(record['body']) print(stac_item) col = Collection.open(collection_root) collection_name = col.id kwargs = {'item': Item(stac_item)} if path: kwargs.update({'path': '$' + '/$'.join(path.split('/'))}) if filename: kwargs.update({'filename': '$' + '/$'.join(filename.split('/'))}) print(kwargs) col.add_item(**kwargs) col.save() stac_links.append(kwargs['item'].links('self')[0]) # Send message to SNS Topic if enabled if NOTIFICATION_TOPIC: kwargs = utils.stac_to_sns(kwargs['item'].data) kwargs.update({ 'TopicArn': f"arn:aws:sns:{REGION}:{ACCOUNT_ID}:{NOTIFICATION_TOPIC}" }) sns_client.publish(**kwargs) print( f"LOGS CollectionName: {collection_name}\tItemCount: {item_count}\tItemLinks: {stac_links}" )
def update_collection(root, long_poll, concurrency, path, filename): # Create a SQS queue for the collection # Subscribe SQS queue to SNS topic with filter policy on collection name # Configure lambda function and attach to SQS queue (use ENV variables to pass state) name = Collection.open(root).id filter_rule = {'collection': [name]} pattern = re.compile('[\W_]+') name = pattern.sub('', name) with open(sls_config_path, 'r') as f: # Using unsafe load to preserve type. sls_config = yaml.unsafe_load(f) aws_resources = resources.update_collection(name, root, filter_rule, long_poll, concurrency, path, filename) sls_config['resources']['Resources'].update(aws_resources['resources']) sls_config['functions'].update(aws_resources['functions']) with open(sls_config_path, 'w') as outf: yaml.dump(sls_config, outf, indent=1)
def collection(cls, cid): """ Get a Collection record """ url = op.join(config.API_URL, 'collections', cid) return Collection(cls.query(url=url))
def ingest_items(self, url): root = Collection.open(url) for item in root.items(): AssetLoader(item, self.config.API_ENDPOINT).ingest()
def ingest_collections(self, url): root = Collection.open(url) for coll in root.collections(): AssetLoader(coll, self.config.API_ENDPOINT).ingest()
def test_add_item_without_saving(self): col = Collection.create() item = Item.open( os.path.join(testpath, 'catalog/eo/landsat-8-l1/item.json')) with self.assertRaises(STACError): col.add_item(item)
def build_stac_catalog(id_list=None, verbose=False): prefix = '/data/' tempdir = tempfile.mkdtemp(prefix=prefix) tempthumbs = tempfile.mkdtemp(prefix=prefix) print("Catalog tempdir: {}".format(tempdir)) print("Thumbnails tempdir: {}".format(tempthumbs)) NoaaStormCatalog.verbose = verbose print("Running web scraper.") with ScrapyRunner(NoaaStormCatalog) as runner: scraped_items = list(runner.execute(ids=id_list)) collections = scraped_items.pop(0) item_count = scraped_items.pop(0) collections = create_collections(collections, scraped_items, id_list) # Build stac catalog locally root_catalog = Catalog.open(os.path.join(ROOT_URL, 'catalog.json')) root_catalog.save_as(filename=os.path.join(tempdir, 'catalog.json')) # NOAA Storm catalog os.mkdir(os.path.join(tempdir, 'NOAAStorm')) noaa_storm_cat = Catalog.open( os.path.join(ROOT_URL, 'NOAAStorm', 'catalog.json')) noaa_storm_cat.save_as( filename=os.path.join(tempdir, 'NOAAStorm', 'catalog.json')) print("Creating collections.") d = {} for collection in collections: coll = Collection(collection) noaa_storm_cat.add_catalog(coll) d.update({collection['id']: coll}) # Setup directories for thumbnails thumbdir = os.path.join(tempthumbs, 'thumbnails') os.mkdir(thumbdir) for coll in d: coll_dir = os.path.join(thumbdir, d[coll].id) if not os.path.exists(coll_dir): os.mkdir(coll_dir) # Sort assets archive_assets = [] for item in scraped_items: if 'archive' in item: if item['archive'].endswith('_RGB.tar'): archive_assets.append( RGBArchive( item, os.path.join(thumbdir, d[item['event_name']].id))) elif item['archive'].endswith( ('GCS_NAD83.tar', 'GCS_NAD83.zip')): archive_assets.append( JpegTilesArchive( item, os.path.join(thumbdir, d[item['event_name']].id))) elif item['archive'].endswith(('Oblique.tar', 'Oblique.zip')): archive_assets.append( ObliqueArchive( item, os.path.join(thumbdir, d[item['event_name']].id))) else: print("Found a JPG with disconnected world file") # Download archives download_archives(archive_assets, prefix) print("Creating items and thumbnails.") # Add items for item in build_stac_items(archive_assets): d[item['collection']].add_item(Item(item), path='${date}', filename='${id}') # Update spatial extent of collection try: if item['bbox'][0] < d[ item['collection']].extent['spatial'][0]: d[item['collection']].extent['spatial'][0] = item['bbox'][ 0] if item['bbox'][1] < d[ item['collection']].extent['spatial'][1]: d[item['collection']].extent['spatial'][1] = item['bbox'][ 1] if item['bbox'][2] < d[ item['collection']].extent['spatial'][2]: d[item['collection']].extent['spatial'][2] = item['bbox'][ 2] if item['bbox'][3] < d[ item['collection']].extent['spatial'][3]: d[item['collection']].extent['spatial'][3] = item['bbox'][ 3] except: d[item['collection']].extent['spatial'] = item['bbox'] # Update temporal extent of collection try: item_dt = load_datetime(item['properties']['datetime']) min_dt = load_datetime( d[item['collection']].extent['temporal'][0]) max_dt = load_datetime( d[item['collection']].extent['temporal'][1]) if item_dt < min_dt: d[item['collection']].extent['temporal'][0] = item[ 'properties']['datetime'] if item_dt > max_dt: d[item['collection']].extent['temporal'][1] = item[ 'properites']['datetime'] except: d[item['collection']].extent['temporal'] = [ item['properties']['datetime'], item['properties']['datetime'] ] # Upload catalog to S3 print("Uploading catalog to S3.") subprocess.call(f"aws s3 sync {tempdir} s3://cognition-disaster-data/", shell=True) print("Uploading thumbnails to S3.") # Upload thumbnails to S3 subprocess.call( f"aws s3 sync {thumbdir} s3://cognition-disaster-data/thumbnails/", shell=True) cleanup(prefix)
def collection(self, cid, headers=None): """ Get a Collection record """ url = urljoin(self.url, 'collections/%s' % cid) return Collection(self.query(url=url, headers=headers))
def open_collection(self): filename = os.path.join(testpath, 'catalog/eo/landsat-8-l1/catalog.json') return Collection.open(filename)
import os.path as op from shapely.geometry import MultiPoint, Point from shapely import geometry from datetime import datetime, timedelta from dateutil.parser import parse from pyproj import Proj, transform as reproj from satstac import Collection, Item, utils from .utils import get_matching_s3_keys, read_from_s3 from .version import __version__ logger = logging.getLogger(__name__) _collection = Collection.open( op.join(op.dirname(__file__), 'sentinel-2-l1c.json')) SETTINGS = { 'roda_url': 'https://roda.sentinel-hub.com/sentinel-s2-l1c', 's3_url': 'https://sentinel-s2-l1c.s3.amazonaws.com', 'inv_bucket': 'sentinel-inventory', 'inv_key': 'sentinel-s2-l1c/sentinel-s2-l1c-inventory', 'path_pattern': '${sentinel:utm_zone}/${sentinel:latitude_band}/${sentinel:grid_square}', 'fname_pattern': '${date}/${id}' } def add_items(catalog, records, start_date=None,
def ingest_collection(self, url): AssetLoader(Collection.open(url), self.config.API_ENDPOINT).ingest()
from pymongo import MongoClient client = MongoClient( 'mongodb+srv://piAdmin:[email protected]/test?retryWrites=true&w=majority', 27017) db = client['metadata'] collection_currency = db['landsat'] cat = Catalog.open( 'https://landsat-stac.s3.amazonaws.com/landsat-8-l1/catalog.json') print(cat) #print(data) #test # create a Catalog object with JSON #mycat = Catalog(data) #https://landsat-stac.s3.amazonaws.com/landsat-8-l1/ LC80101172015002LGN00 col = Collection.open( 'https://landsat-stac.s3.amazonaws.com/landsat-8-l1/catalog.json') print(col, col.extent) #print(col.items()) i = 1 def populateDatabase(): for item in col.items(): print(item) #item2 = Item.open(item) #print(item2) row = item.properties['eo:row'] column = item.properties['eo:column'] data = item.properties['datetime'] data = data[0:10] id = str(item)