def test_init_without_collection(self): with open(self.filename) as f: data = json.loads(f.read()) with open(self.filename.replace('item.json', 'catalog.json')) as f: coldata = json.loads(f.read()) item = Item(data) assert (item.id == data['id']) assert (item.collection() is None)
def load_item(item): if type(item) == Item: return item elif type(item) == str: if item.startswith('http'): return Item.open(item) elif type(item) == dict: return Item(item) else: raise TypeError("Invalid input encountered.")
def test_init_without_collection(self): with open(self.filename) as f: data = json.loads(f.read()) with open(self.filename.replace('item.json', 'catalog.json')) as f: coldata = json.loads(f.read()) item = Item(data) assert(item.id == data['id']) assert(item.collection() is None) assert(item.eobands == []) # now put collection properties here data['properties'].update(coldata['properties']) item = Item(data) assert(len(item.eobands) == 11)
def create_derived_item(items, geometry): """ Create metadata for dervied scene from multiple input scenes """ # data provenance, iterate through links links = [] for i in items: selflink = [l['href'] for l in i._data['links'] if i['rel'] == 'self'] if len(selflink) > 0: links.append({'rel': 'derived_from', 'href': selflink[0]}) print(geometry) lons = [c[0] for c in geometry['coordinates'][0]] lats = [c[1] for c in geometry['coordinates'][0]] bbox = [min(lons), min(lats), max(lons), max(lats)] # properties props = {'datetime': items[0]['datetime']} collections = [ item['collection'] for item in items if item['collection'] is not None ] if len(collections) == 1: props['collection'] = collections[0] item = { 'type': 'Feature', 'id': '%s_%s' % (items[0]['eo:platform'], items[0].date), 'bbox': bbox, 'geometry': geometry, 'properties': props, 'links': links, 'assets': {} } return Item(item)
def test_load_item(self): input1 = self.infile input2 = Item.open(self.infile) input3 = input2.data self.assertTrue( type(StacItem.load_item(input1)) == type(StacItem.load_item( input2)) == type(StacItem.load_item(input3)) == Item)
def test_download_assets(self): """ Retrieve multiple data files """ item = Item.open(self.filename) fnames = item.download_assets(keys=['MTL', 'ANG'], filename_template=self.filename_template) for f in fnames: assert (os.path.exists(f))
def test_get_path(self): """ Test string templating with item fields """ item = Item.open(self.filename) st = item.get_path('${collection}/${date}') assert (st == 'landsat-8-l1/2020-06-11') st = item.get_path('nosub') assert (st == 'nosub')
def test_open_with_collection(self): item = Item.open(self.filename) assert (item.collection().id == 'landsat-8-l1') sum = item.collection().summaries assert (len(sum) == 4) assert (len(sum['platform']) == 1) assert ('landsat-8' in sum['platform'])
def test_assets(self): """ Get assets for download """ item = Item.open(self.filename) href = item.data['assets']['B1']['href'] assert(item.assets['B1']['href'] == href) assert(item.asset('B1')['href'] == href) assert(item.asset('coastal')['href'] == href)
def items(self, limit=10000, headers=None): """ Return all of the Items and Collections for this search """ _limit = 500 items = [] found = self.found(headers=headers) if found > limit: logger.warning( 'There are more items found (%s) than the limit (%s) provided.' % (found, limit)) maxitems = min(found, limit) kwargs = {'page': 1, 'limit': min(_limit, maxitems)} kwargs.update(self.kwargs) url = urljoin(self.url, 'search') while len(items) < maxitems: items += [ Item(i) for i in self.query(url=url, headers=headers, **kwargs) ['features'] ] kwargs['page'] += 1 # retrieve collections collections = [] try: for c in set([ item._data['collection'] for item in items if 'collection' in item._data ]): collections.append(self.collection(c, headers=headers)) #del collections[c]['links'] except: pass return ItemCollection(items, collections=collections)
def test_substitute(self): """ Test string substitution with item fields """ item = Item.open(self.filename) st = item.substitute('${collection}/${date}') assert(st == 'landsat-8-l1/2018-10-12') st = item.substitute('nosub') assert(st == 'nosub')
def test_download(self): """ Retrieve a data file """ item = Item.open(self.filename) fname = item.download(key='MTL', path=self.path) assert(os.path.exists(fname)) fname = item.download(key='MTL', path=self.path) assert(os.path.exists(fname))
def items_by_id(cls, ids, collection): """ Return Items from collection with matching ids """ col = cls.collection(collection) items = [] base_url = op.join(config.API_URL, 'collections', collection, 'items') for id in ids: items.append(Item(cls.query(op.join(base_url, id)))) return Items(items, collections=[col])
def test_publish(self): path = os.path.join(self.path, 'test_publish') shutil.copytree(os.path.join(testpath, 'catalog'), path) cat = Catalog.open(os.path.join(path, 'catalog.json')) cat.publish('https://my.cat') item = Item.open(os.path.join(path, 'eo/landsat-8-l1/item.json')) assert (item.links('self')[0] == 'https://my.cat/eo/landsat-8-l1/item.json')
def test_open(self): """ Initialize an item """ item = Item.open(self.filename) dt, tm = item.properties['datetime'].split('T') assert(str(item.date) == dt) assert(item.id == item.data['id']) assert(item.geometry == item.data['geometry']) assert(str(item) == item.data['id']) assert(len(item.bbox) == 4)
def test_add_item(self): cat = Catalog.create(root='http://my.cat').save( os.path.join(self.path, 'catalog.json')) col = Collection.open( os.path.join(testpath, 'catalog/eo/landsat-8-l1/catalog.json')) cat.add_catalog(col) item = Item.open( os.path.join(testpath, 'catalog/eo/landsat-8-l1/item.json')) col.add_item(item) assert (item.parent().id == 'landsat-8-l1')
def items_by_id(cls, ids, collection): """ Return Items from collection with matching ids """ col = cls.collection(collection) items = [] base_url = urljoin(config.API_URL, 'collections/%s/items' % collection) for id in ids: try: items.append(Item(cls.query(urljoin(base_url, id)))) except SatSearchError as err: pass return Items(items, collections=[col])
def items(self, limit=10000, page_limit=500, headers=None): """ Return all of the Items and Collections for this search """ found = self.found(headers=headers) limit = self.limit or limit if found > limit: logger.warning( 'There are more items found (%s) than the limit (%s) provided.' % (found, limit)) nextlink = { 'method': 'POST', 'href': urljoin(self.url, 'search'), 'headers': headers, 'body': self.kwargs, 'merge': False } items = [] while nextlink and len(items) < limit: if nextlink.get('method', 'GET') == 'GET': resp = self.query(url=nextlink['href'], headers=headers, **self.kwargs) else: _headers = nextlink.get('headers', {}) _body = nextlink.get('body', {}) _body.update({'limit': page_limit}) if nextlink.get('merge', False): _headers.update(headers) _body.update(self.kwargs) resp = self.query(url=nextlink['href'], headers=_headers, **_body) items += [Item(i) for i in resp['features']] links = [l for l in resp['links'] if l['rel'] == 'next'] nextlink = links[0] if len(links) == 1 else None # retrieve collections collections = [] try: for c in set([ item._data['collection'] for item in items if 'collection' in item._data ]): collections.append(self.collection(c, headers=headers)) #del collections[c]['links'] except: pass logger.debug(f"Found: {len(items)}") return ItemCollection(items, collections=collections)
def test_add_item_with_subcatalogs(self): cat = Catalog.create(root='http://my.cat').save( os.path.join(self.path, 'test_subcatalogs.json')) col = Collection.open( os.path.join(testpath, 'catalog/eo/landsat-8-l1/catalog.json')) cat.add_catalog(col) item = Item.open( os.path.join(testpath, 'catalog/eo/landsat-8-l1/item.json')) col.add_item(item, path='${landsat:path}/${landsat:row}/${date}') assert (item.root().id == cat.id) assert (item.collection().id == col.id) # test code using existing catalogs col.add_item(item, '${landsat:path}/${landsat:row}/${date}') assert (item.root().id == cat.id)
def _test_download_paths(self): """ Testing of download paths and filenames """ item = Item.open(self.filename) datadir = config.DATADIR filename = config.FILENAME config.DATADIR = os.path.join(testpath, '${date}') config.FILENAME = '${date}_${id}' fname = scene.download('MTL') _fname = os.path.join(testpath, '2017-01-01/2017-01-01_testscene_MTL.txt') assert(fname == _fname) assert(os.path.exists(fname)) config.DATADIR = datadir config.FILENAME = filename shutil.rmtree(os.path.join(testpath, '2017-01-01')) assert(os.path.exists(fname) == False)
def toSTAC(self, collectionDir: str) -> Item: cid = os.path.basename(self.path) itemsDir = os.path.join(collectionDir, "items") stacFile = os.path.join(itemsDir, f"{cid}.json") os.makedirs(itemsDir, exist_ok=True) metadata = dict( id=cid, location=self.path, time_range=[str(self.start_date), str(self.end_date)], date_range=[self.start_time_value, self.end_time_value], n_time_steps=self.size, time_units=self.units, variables=self.vars_list, calendar=str(self.calendar), base_date=str(self.base_date)) item = Item(data=metadata, filename=stacFile) return item
def items(self, limit=10000): """ Return all of the Items and Collections for this search """ _limit = 500 if 'ids' in self.kwargs: col = self.kwargs.get('query', {}).get('collection', {}).get('eq', None) if col is None: raise SatSearchError( 'Collection required when searching by id') return self.items_by_id(self.kwargs['ids'], col) items = [] found = self.found() if found > limit: logger.warning( 'There are more items found (%s) than the limit (%s) provided.' % (found, limit)) maxitems = min(found, limit) kwargs = {'page': 1, 'limit': min(_limit, maxitems)} kwargs.update(self.kwargs) while len(items) < maxitems: items += [Item(i) for i in self.query(**kwargs)['features']] kwargs['page'] += 1 # retrieve collections collections = [] for c in set([ item.properties['collection'] for item in items if 'collection' in item.properties ]): collections.append(self.collection(c)) #del collections[c]['links'] # merge collections into items #_items = [] #for item in items: # import pdb; pdb.set_trace() # if 'collection' in item['properties']: # item = dict_merge(item, collections[item['properties']['collection']]) # _items.append(Item(item)) search = {'endpoint': config.API_URL, 'parameters': self.kwargs} return ItemCollection(items, collections=collections, search=search)
def update_collection(event, context): collection_root = os.getenv('COLLECTION_ROOT') path = os.getenv('PATH') filename = os.getenv('FILENAME') item_count = len(event['Records']) stac_links = [] for record in event['Records']: stac_item = json.loads(record['body']) print(stac_item) col = Collection.open(collection_root) collection_name = col.id kwargs = {'item': Item(stac_item)} if path: kwargs.update({'path': '$' + '/$'.join(path.split('/'))}) if filename: kwargs.update({'filename': '$' + '/$'.join(filename.split('/'))}) print(kwargs) col.add_item(**kwargs) col.save() stac_links.append(kwargs['item'].links('self')[0]) # Send message to SNS Topic if enabled if NOTIFICATION_TOPIC: kwargs = utils.stac_to_sns(kwargs['item'].data) kwargs.update({ 'TopicArn': f"arn:aws:sns:{REGION}:{ACCOUNT_ID}:{NOTIFICATION_TOPIC}" }) sns_client.publish(**kwargs) print( f"LOGS CollectionName: {collection_name}\tItemCount: {item_count}\tItemLinks: {stac_links}" )
def test_download_thumbnail(self): """ Get thumbnail for item """ item = Item.open(self.filename) fname = item.download(key='thumbnail', path=self.path) assert(os.path.exists(fname))
def test_no_asset(self): item = Item.open(self.filename) assert(item.asset('no-such-asset') == None)
def test_class_properties(self): """ Test the property functions of the Item class """ item = Item.open(self.filename) l = os.path.join(os.path.dirname(item.filename), item.data['links'][0]['href']) assert(os.path.abspath(item.links()[0]) == os.path.abspath(l))
def test_open_with_collection(self): item = Item.open(self.filename) assert(item.collection().id == 'landsat-8-l1') assert(len(item['eo:bands']) == 11) assert(item['eo:off_nadir'] == 0)
def _test_create_derived(self): """ Create single derived scene """ scenes = [self.get_test_scene(), self.get_test_scene()] scene = Item.create_derived(scenes) assert(scene.date == scenes[0].date) assert(scene['c:id'] == scenes[0]['c:id'])
def test_download_nonexist(self): """ Test downloading of non-existent file """ item = Item.open(self.filename) fname = item.download(key='fake_asset', path=self.path) assert(fname is None)
def build_stac_catalog(id_list=None, verbose=False): prefix = '/data/' tempdir = tempfile.mkdtemp(prefix=prefix) tempthumbs = tempfile.mkdtemp(prefix=prefix) print("Catalog tempdir: {}".format(tempdir)) print("Thumbnails tempdir: {}".format(tempthumbs)) NoaaStormCatalog.verbose = verbose print("Running web scraper.") with ScrapyRunner(NoaaStormCatalog) as runner: scraped_items = list(runner.execute(ids=id_list)) collections = scraped_items.pop(0) item_count = scraped_items.pop(0) collections = create_collections(collections, scraped_items, id_list) # Build stac catalog locally root_catalog = Catalog.open(os.path.join(ROOT_URL, 'catalog.json')) root_catalog.save_as(filename=os.path.join(tempdir, 'catalog.json')) # NOAA Storm catalog os.mkdir(os.path.join(tempdir, 'NOAAStorm')) noaa_storm_cat = Catalog.open( os.path.join(ROOT_URL, 'NOAAStorm', 'catalog.json')) noaa_storm_cat.save_as( filename=os.path.join(tempdir, 'NOAAStorm', 'catalog.json')) print("Creating collections.") d = {} for collection in collections: coll = Collection(collection) noaa_storm_cat.add_catalog(coll) d.update({collection['id']: coll}) # Setup directories for thumbnails thumbdir = os.path.join(tempthumbs, 'thumbnails') os.mkdir(thumbdir) for coll in d: coll_dir = os.path.join(thumbdir, d[coll].id) if not os.path.exists(coll_dir): os.mkdir(coll_dir) # Sort assets archive_assets = [] for item in scraped_items: if 'archive' in item: if item['archive'].endswith('_RGB.tar'): archive_assets.append( RGBArchive( item, os.path.join(thumbdir, d[item['event_name']].id))) elif item['archive'].endswith( ('GCS_NAD83.tar', 'GCS_NAD83.zip')): archive_assets.append( JpegTilesArchive( item, os.path.join(thumbdir, d[item['event_name']].id))) elif item['archive'].endswith(('Oblique.tar', 'Oblique.zip')): archive_assets.append( ObliqueArchive( item, os.path.join(thumbdir, d[item['event_name']].id))) else: print("Found a JPG with disconnected world file") # Download archives download_archives(archive_assets, prefix) print("Creating items and thumbnails.") # Add items for item in build_stac_items(archive_assets): d[item['collection']].add_item(Item(item), path='${date}', filename='${id}') # Update spatial extent of collection try: if item['bbox'][0] < d[ item['collection']].extent['spatial'][0]: d[item['collection']].extent['spatial'][0] = item['bbox'][ 0] if item['bbox'][1] < d[ item['collection']].extent['spatial'][1]: d[item['collection']].extent['spatial'][1] = item['bbox'][ 1] if item['bbox'][2] < d[ item['collection']].extent['spatial'][2]: d[item['collection']].extent['spatial'][2] = item['bbox'][ 2] if item['bbox'][3] < d[ item['collection']].extent['spatial'][3]: d[item['collection']].extent['spatial'][3] = item['bbox'][ 3] except: d[item['collection']].extent['spatial'] = item['bbox'] # Update temporal extent of collection try: item_dt = load_datetime(item['properties']['datetime']) min_dt = load_datetime( d[item['collection']].extent['temporal'][0]) max_dt = load_datetime( d[item['collection']].extent['temporal'][1]) if item_dt < min_dt: d[item['collection']].extent['temporal'][0] = item[ 'properties']['datetime'] if item_dt > max_dt: d[item['collection']].extent['temporal'][1] = item[ 'properites']['datetime'] except: d[item['collection']].extent['temporal'] = [ item['properties']['datetime'], item['properties']['datetime'] ] # Upload catalog to S3 print("Uploading catalog to S3.") subprocess.call(f"aws s3 sync {tempdir} s3://cognition-disaster-data/", shell=True) print("Uploading thumbnails to S3.") # Upload thumbnails to S3 subprocess.call( f"aws s3 sync {thumbdir} s3://cognition-disaster-data/thumbnails/", shell=True) cleanup(prefix)