Example #1
0
 def test_init_without_collection(self):
     with open(self.filename) as f:
         data = json.loads(f.read())
     with open(self.filename.replace('item.json', 'catalog.json')) as f:
         coldata = json.loads(f.read())
     item = Item(data)
     assert (item.id == data['id'])
     assert (item.collection() is None)
Example #2
0
 def load_item(item):
     if type(item) == Item:
         return item
     elif type(item) == str:
         if item.startswith('http'):
             return Item.open(item)
     elif type(item) == dict:
         return Item(item)
     else:
         raise TypeError("Invalid input encountered.")
Example #3
0
 def test_init_without_collection(self):
     with open(self.filename) as f:
         data = json.loads(f.read())
     with open(self.filename.replace('item.json', 'catalog.json')) as f:
         coldata = json.loads(f.read())
     item = Item(data)
     assert(item.id == data['id'])
     assert(item.collection() is None)
     assert(item.eobands == [])
     # now put collection properties here
     data['properties'].update(coldata['properties'])
     item = Item(data)
     assert(len(item.eobands) == 11)
Example #4
0
def create_derived_item(items, geometry):
    """ Create metadata for dervied scene from multiple input scenes """
    # data provenance, iterate through links
    links = []
    for i in items:
        selflink = [l['href'] for l in i._data['links'] if i['rel'] == 'self']
        if len(selflink) > 0:
            links.append({'rel': 'derived_from', 'href': selflink[0]})
    print(geometry)
    lons = [c[0] for c in geometry['coordinates'][0]]
    lats = [c[1] for c in geometry['coordinates'][0]]
    bbox = [min(lons), min(lats), max(lons), max(lats)]
    # properties
    props = {'datetime': items[0]['datetime']}
    collections = [
        item['collection'] for item in items if item['collection'] is not None
    ]
    if len(collections) == 1:
        props['collection'] = collections[0]
    item = {
        'type': 'Feature',
        'id': '%s_%s' % (items[0]['eo:platform'], items[0].date),
        'bbox': bbox,
        'geometry': geometry,
        'properties': props,
        'links': links,
        'assets': {}
    }
    return Item(item)
 def test_load_item(self):
     input1 = self.infile
     input2 = Item.open(self.infile)
     input3 = input2.data
     self.assertTrue(
         type(StacItem.load_item(input1)) == type(StacItem.load_item(
             input2)) == type(StacItem.load_item(input3)) == Item)
Example #6
0
 def test_download_assets(self):
     """ Retrieve multiple data files """
     item = Item.open(self.filename)
     fnames = item.download_assets(keys=['MTL', 'ANG'],
                                   filename_template=self.filename_template)
     for f in fnames:
         assert (os.path.exists(f))
Example #7
0
 def test_get_path(self):
     """ Test string templating with item fields """
     item = Item.open(self.filename)
     st = item.get_path('${collection}/${date}')
     assert (st == 'landsat-8-l1/2020-06-11')
     st = item.get_path('nosub')
     assert (st == 'nosub')
Example #8
0
 def test_open_with_collection(self):
     item = Item.open(self.filename)
     assert (item.collection().id == 'landsat-8-l1')
     sum = item.collection().summaries
     assert (len(sum) == 4)
     assert (len(sum['platform']) == 1)
     assert ('landsat-8' in sum['platform'])
Example #9
0
 def test_assets(self):
     """ Get assets for download """
     item = Item.open(self.filename)
     href = item.data['assets']['B1']['href']
     assert(item.assets['B1']['href'] == href)
     assert(item.asset('B1')['href'] == href)
     assert(item.asset('coastal')['href'] == href)
Example #10
0
    def items(self, limit=10000, headers=None):
        """ Return all of the Items and Collections for this search """
        _limit = 500

        items = []
        found = self.found(headers=headers)
        if found > limit:
            logger.warning(
                'There are more items found (%s) than the limit (%s) provided.'
                % (found, limit))
        maxitems = min(found, limit)
        kwargs = {'page': 1, 'limit': min(_limit, maxitems)}
        kwargs.update(self.kwargs)
        url = urljoin(self.url, 'search')
        while len(items) < maxitems:
            items += [
                Item(i) for i in self.query(url=url, headers=headers, **kwargs)
                ['features']
            ]
            kwargs['page'] += 1

        # retrieve collections
        collections = []
        try:
            for c in set([
                    item._data['collection'] for item in items
                    if 'collection' in item._data
            ]):
                collections.append(self.collection(c, headers=headers))
                #del collections[c]['links']
        except:
            pass

        return ItemCollection(items, collections=collections)
Example #11
0
 def test_substitute(self):
     """ Test string substitution with item fields """
     item = Item.open(self.filename)
     st = item.substitute('${collection}/${date}')
     assert(st == 'landsat-8-l1/2018-10-12')
     st = item.substitute('nosub')
     assert(st == 'nosub')
Example #12
0
 def test_download(self):
     """ Retrieve a data file """
     item = Item.open(self.filename)
     fname = item.download(key='MTL', path=self.path)
     assert(os.path.exists(fname))
     fname = item.download(key='MTL', path=self.path)
     assert(os.path.exists(fname))
Example #13
0
 def items_by_id(cls, ids, collection):
     """ Return Items from collection with matching ids """
     col = cls.collection(collection)
     items = []
     base_url = op.join(config.API_URL, 'collections', collection, 'items')
     for id in ids:
         items.append(Item(cls.query(op.join(base_url, id))))
     return Items(items, collections=[col])
Example #14
0
 def test_publish(self):
     path = os.path.join(self.path, 'test_publish')
     shutil.copytree(os.path.join(testpath, 'catalog'), path)
     cat = Catalog.open(os.path.join(path, 'catalog.json'))
     cat.publish('https://my.cat')
     item = Item.open(os.path.join(path, 'eo/landsat-8-l1/item.json'))
     assert (item.links('self')[0] ==
             'https://my.cat/eo/landsat-8-l1/item.json')
Example #15
0
 def test_open(self):
     """ Initialize an item """
     item = Item.open(self.filename)
     dt, tm = item.properties['datetime'].split('T')
     assert(str(item.date) == dt)
     assert(item.id == item.data['id'])
     assert(item.geometry == item.data['geometry'])
     assert(str(item) == item.data['id'])
     assert(len(item.bbox) == 4)
Example #16
0
 def test_add_item(self):
     cat = Catalog.create(root='http://my.cat').save(
         os.path.join(self.path, 'catalog.json'))
     col = Collection.open(
         os.path.join(testpath, 'catalog/eo/landsat-8-l1/catalog.json'))
     cat.add_catalog(col)
     item = Item.open(
         os.path.join(testpath, 'catalog/eo/landsat-8-l1/item.json'))
     col.add_item(item)
     assert (item.parent().id == 'landsat-8-l1')
Example #17
0
 def items_by_id(cls, ids, collection):
     """ Return Items from collection with matching ids """
     col = cls.collection(collection)
     items = []
     base_url = urljoin(config.API_URL, 'collections/%s/items' % collection)
     for id in ids:
         try:
             items.append(Item(cls.query(urljoin(base_url, id))))
         except SatSearchError as err:
             pass
     return Items(items, collections=[col])
Example #18
0
    def items(self, limit=10000, page_limit=500, headers=None):
        """ Return all of the Items and Collections for this search """
        found = self.found(headers=headers)
        limit = self.limit or limit
        if found > limit:
            logger.warning(
                'There are more items found (%s) than the limit (%s) provided.'
                % (found, limit))

        nextlink = {
            'method': 'POST',
            'href': urljoin(self.url, 'search'),
            'headers': headers,
            'body': self.kwargs,
            'merge': False
        }

        items = []
        while nextlink and len(items) < limit:
            if nextlink.get('method', 'GET') == 'GET':
                resp = self.query(url=nextlink['href'],
                                  headers=headers,
                                  **self.kwargs)
            else:
                _headers = nextlink.get('headers', {})
                _body = nextlink.get('body', {})
                _body.update({'limit': page_limit})

                if nextlink.get('merge', False):
                    _headers.update(headers)
                    _body.update(self.kwargs)
                resp = self.query(url=nextlink['href'],
                                  headers=_headers,
                                  **_body)
            items += [Item(i) for i in resp['features']]
            links = [l for l in resp['links'] if l['rel'] == 'next']
            nextlink = links[0] if len(links) == 1 else None

        # retrieve collections
        collections = []
        try:
            for c in set([
                    item._data['collection'] for item in items
                    if 'collection' in item._data
            ]):
                collections.append(self.collection(c, headers=headers))
                #del collections[c]['links']
        except:
            pass
        logger.debug(f"Found: {len(items)}")
        return ItemCollection(items, collections=collections)
Example #19
0
 def test_add_item_with_subcatalogs(self):
     cat = Catalog.create(root='http://my.cat').save(
         os.path.join(self.path, 'test_subcatalogs.json'))
     col = Collection.open(
         os.path.join(testpath, 'catalog/eo/landsat-8-l1/catalog.json'))
     cat.add_catalog(col)
     item = Item.open(
         os.path.join(testpath, 'catalog/eo/landsat-8-l1/item.json'))
     col.add_item(item, path='${landsat:path}/${landsat:row}/${date}')
     assert (item.root().id == cat.id)
     assert (item.collection().id == col.id)
     # test code using existing catalogs
     col.add_item(item, '${landsat:path}/${landsat:row}/${date}')
     assert (item.root().id == cat.id)
Example #20
0
 def _test_download_paths(self):
     """ Testing of download paths and filenames """
     item = Item.open(self.filename)
     datadir = config.DATADIR
     filename = config.FILENAME
     config.DATADIR = os.path.join(testpath, '${date}')
     config.FILENAME = '${date}_${id}'
     fname = scene.download('MTL')
     _fname = os.path.join(testpath, '2017-01-01/2017-01-01_testscene_MTL.txt')
     assert(fname == _fname)
     assert(os.path.exists(fname))
     config.DATADIR = datadir
     config.FILENAME = filename
     shutil.rmtree(os.path.join(testpath, '2017-01-01'))
     assert(os.path.exists(fname) == False)
Example #21
0
 def toSTAC(self, collectionDir: str) -> Item:
     cid = os.path.basename(self.path)
     itemsDir = os.path.join(collectionDir, "items")
     stacFile = os.path.join(itemsDir, f"{cid}.json")
     os.makedirs(itemsDir, exist_ok=True)
     metadata = dict(
         id=cid,
         location=self.path,
         time_range=[str(self.start_date),
                     str(self.end_date)],
         date_range=[self.start_time_value, self.end_time_value],
         n_time_steps=self.size,
         time_units=self.units,
         variables=self.vars_list,
         calendar=str(self.calendar),
         base_date=str(self.base_date))
     item = Item(data=metadata, filename=stacFile)
     return item
Example #22
0
    def items(self, limit=10000):
        """ Return all of the Items and Collections for this search """
        _limit = 500
        if 'ids' in self.kwargs:
            col = self.kwargs.get('query', {}).get('collection',
                                                   {}).get('eq', None)
            if col is None:
                raise SatSearchError(
                    'Collection required when searching by id')
            return self.items_by_id(self.kwargs['ids'], col)

        items = []
        found = self.found()
        if found > limit:
            logger.warning(
                'There are more items found (%s) than the limit (%s) provided.'
                % (found, limit))
        maxitems = min(found, limit)
        kwargs = {'page': 1, 'limit': min(_limit, maxitems)}
        kwargs.update(self.kwargs)
        while len(items) < maxitems:
            items += [Item(i) for i in self.query(**kwargs)['features']]
            kwargs['page'] += 1

        # retrieve collections
        collections = []
        for c in set([
                item.properties['collection'] for item in items
                if 'collection' in item.properties
        ]):
            collections.append(self.collection(c))
            #del collections[c]['links']

        # merge collections into items
        #_items = []
        #for item in items:
        #    import pdb; pdb.set_trace()
        #    if 'collection' in item['properties']:
        #        item = dict_merge(item, collections[item['properties']['collection']])
        #    _items.append(Item(item))

        search = {'endpoint': config.API_URL, 'parameters': self.kwargs}
        return ItemCollection(items, collections=collections, search=search)
Example #23
0
def update_collection(event, context):
    collection_root = os.getenv('COLLECTION_ROOT')
    path = os.getenv('PATH')
    filename = os.getenv('FILENAME')

    item_count = len(event['Records'])
    stac_links = []

    for record in event['Records']:
        stac_item = json.loads(record['body'])

        print(stac_item)

        col = Collection.open(collection_root)
        collection_name = col.id
        kwargs = {'item': Item(stac_item)}
        if path:
            kwargs.update({'path': '$' + '/$'.join(path.split('/'))})
        if filename:
            kwargs.update({'filename': '$' + '/$'.join(filename.split('/'))})
        print(kwargs)
        col.add_item(**kwargs)
        col.save()

        stac_links.append(kwargs['item'].links('self')[0])

        # Send message to SNS Topic if enabled
        if NOTIFICATION_TOPIC:
            kwargs = utils.stac_to_sns(kwargs['item'].data)
            kwargs.update({
                'TopicArn':
                f"arn:aws:sns:{REGION}:{ACCOUNT_ID}:{NOTIFICATION_TOPIC}"
            })
            sns_client.publish(**kwargs)

    print(
        f"LOGS CollectionName: {collection_name}\tItemCount: {item_count}\tItemLinks: {stac_links}"
    )
Example #24
0
 def test_download_thumbnail(self):
     """ Get thumbnail for item """
     item = Item.open(self.filename)
     fname = item.download(key='thumbnail', path=self.path)
     assert(os.path.exists(fname))
Example #25
0
 def test_no_asset(self):
     item = Item.open(self.filename)
     assert(item.asset('no-such-asset') == None)
Example #26
0
 def test_class_properties(self):
     """ Test the property functions of the Item class """
     item = Item.open(self.filename)
     l = os.path.join(os.path.dirname(item.filename), item.data['links'][0]['href'])
     assert(os.path.abspath(item.links()[0]) == os.path.abspath(l))
Example #27
0
 def test_open_with_collection(self):
     item = Item.open(self.filename)
     assert(item.collection().id == 'landsat-8-l1')
     assert(len(item['eo:bands']) == 11)
     assert(item['eo:off_nadir'] == 0)
Example #28
0
 def _test_create_derived(self):
     """ Create single derived scene """
     scenes = [self.get_test_scene(), self.get_test_scene()]
     scene = Item.create_derived(scenes)
     assert(scene.date == scenes[0].date)
     assert(scene['c:id'] == scenes[0]['c:id'])
Example #29
0
 def test_download_nonexist(self):
     """ Test downloading of non-existent file """
     item = Item.open(self.filename)
     fname = item.download(key='fake_asset', path=self.path)
     assert(fname is None)
Example #30
0
def build_stac_catalog(id_list=None, verbose=False):
    prefix = '/data/'
    tempdir = tempfile.mkdtemp(prefix=prefix)
    tempthumbs = tempfile.mkdtemp(prefix=prefix)

    print("Catalog tempdir: {}".format(tempdir))
    print("Thumbnails tempdir: {}".format(tempthumbs))

    NoaaStormCatalog.verbose = verbose

    print("Running web scraper.")
    with ScrapyRunner(NoaaStormCatalog) as runner:
        scraped_items = list(runner.execute(ids=id_list))
        collections = scraped_items.pop(0)
        item_count = scraped_items.pop(0)

        collections = create_collections(collections, scraped_items, id_list)

        # Build stac catalog locally
        root_catalog = Catalog.open(os.path.join(ROOT_URL, 'catalog.json'))
        root_catalog.save_as(filename=os.path.join(tempdir, 'catalog.json'))

        # NOAA Storm catalog
        os.mkdir(os.path.join(tempdir, 'NOAAStorm'))
        noaa_storm_cat = Catalog.open(
            os.path.join(ROOT_URL, 'NOAAStorm', 'catalog.json'))
        noaa_storm_cat.save_as(
            filename=os.path.join(tempdir, 'NOAAStorm', 'catalog.json'))

        print("Creating collections.")
        d = {}
        for collection in collections:
            coll = Collection(collection)
            noaa_storm_cat.add_catalog(coll)
            d.update({collection['id']: coll})

        # Setup directories for thumbnails
        thumbdir = os.path.join(tempthumbs, 'thumbnails')
        os.mkdir(thumbdir)
        for coll in d:
            coll_dir = os.path.join(thumbdir, d[coll].id)
            if not os.path.exists(coll_dir):
                os.mkdir(coll_dir)

        # Sort assets
        archive_assets = []
        for item in scraped_items:
            if 'archive' in item:
                if item['archive'].endswith('_RGB.tar'):
                    archive_assets.append(
                        RGBArchive(
                            item,
                            os.path.join(thumbdir, d[item['event_name']].id)))
                elif item['archive'].endswith(
                    ('GCS_NAD83.tar', 'GCS_NAD83.zip')):
                    archive_assets.append(
                        JpegTilesArchive(
                            item,
                            os.path.join(thumbdir, d[item['event_name']].id)))
                elif item['archive'].endswith(('Oblique.tar', 'Oblique.zip')):
                    archive_assets.append(
                        ObliqueArchive(
                            item,
                            os.path.join(thumbdir, d[item['event_name']].id)))
            else:
                print("Found a JPG with disconnected world file")

        # Download archives
        download_archives(archive_assets, prefix)

        print("Creating items and thumbnails.")
        # Add items
        for item in build_stac_items(archive_assets):
            d[item['collection']].add_item(Item(item),
                                           path='${date}',
                                           filename='${id}')

            # Update spatial extent of collection
            try:
                if item['bbox'][0] < d[
                        item['collection']].extent['spatial'][0]:
                    d[item['collection']].extent['spatial'][0] = item['bbox'][
                        0]
                if item['bbox'][1] < d[
                        item['collection']].extent['spatial'][1]:
                    d[item['collection']].extent['spatial'][1] = item['bbox'][
                        1]
                if item['bbox'][2] < d[
                        item['collection']].extent['spatial'][2]:
                    d[item['collection']].extent['spatial'][2] = item['bbox'][
                        2]
                if item['bbox'][3] < d[
                        item['collection']].extent['spatial'][3]:
                    d[item['collection']].extent['spatial'][3] = item['bbox'][
                        3]
            except:
                d[item['collection']].extent['spatial'] = item['bbox']

            # Update temporal extent of collection
            try:
                item_dt = load_datetime(item['properties']['datetime'])
                min_dt = load_datetime(
                    d[item['collection']].extent['temporal'][0])
                max_dt = load_datetime(
                    d[item['collection']].extent['temporal'][1])
                if item_dt < min_dt:
                    d[item['collection']].extent['temporal'][0] = item[
                        'properties']['datetime']
                if item_dt > max_dt:
                    d[item['collection']].extent['temporal'][1] = item[
                        'properites']['datetime']
            except:
                d[item['collection']].extent['temporal'] = [
                    item['properties']['datetime'],
                    item['properties']['datetime']
                ]

    # Upload catalog to S3
    print("Uploading catalog to S3.")
    subprocess.call(f"aws s3 sync {tempdir} s3://cognition-disaster-data/",
                    shell=True)

    print("Uploading thumbnails to S3.")
    # Upload thumbnails to S3
    subprocess.call(
        f"aws s3 sync {thumbdir} s3://cognition-disaster-data/thumbnails/",
        shell=True)

    cleanup(prefix)