def __init__(self, url_stac=None, bucket=None): # session = boto3.Session(profile_name='default') session = boto3.Session(aws_access_key_id=AWS_KEY_ID, aws_secret_access_key=AWS_SECRET_KEY) # --------------------------- # AWS infrastructure self.S3client = session.client('s3') self.SQSclient = session.client('sqs') self.LAMBDAclient = session.client('lambda') self.Kinesisclient = session.client('kinesis') self.dynamoDBResource = session.resource('dynamodb') self.QueueUrl = {} self.bucket_name = bucket # --------------------------- # create / get DynamoDB tables self.get_dynamo_tables() # --------------------------- # create / get the SQS self.get_queue_url() # --------------------------- # init STAC instance self.url_stac = url_stac if url_stac: self.stac = STAC(url_stac)
def __init__(self, bucket=None, stac_list=[]): # session = boto3.Session(profile_name='beto') self.session = session = boto3.Session( aws_access_key_id=AWS_KEY_ID, aws_secret_access_key=AWS_SECRET_KEY) # --------------------------- # AWS infrastructure self.S3client = session.client('s3') self.SQSclient = session.client('sqs') self.LAMBDAclient = session.client('lambda') self.Kinesisclient = session.client('kinesis') self.dynamoDBResource = session.resource('dynamodb') self.bucket_name = bucket # --------------------------- # create / get DynamoDB tables self.tables = {} self.get_dynamo_tables() # --------------------------- # create / get the SQS self.queues = {} self.get_queues_url() # --------------------------- # init STAC instance self.stac_list = [] for stac in stac_list: stac_instance = STAC(stac["url"], access_token=stac["token"]) if stac.get( "token", None) else STAC(stac["url"]) self.stac_list.append(dict(**stac, instance=stac_instance))
def __init__(self): # session = boto3.Session(profile_name='africa') session = boto3.Session( aws_access_key_id=AWS_KEY_ID, aws_secret_access_key=AWS_SECRET_KEY) # --------------------------- # AWS infrastructure self.S3client = session.client('s3') self.SQSclient = session.client('sqs') self.LAMBDAclient = session.client('lambda') self.Kinesisclient = session.client('kinesis') self.dynamoDBResource = session.resource('dynamodb') self.QueueUrl = None self.prefix = 'https://s3.amazonaws.com/{}/'.format(BUCKET_NAME) #self.prefix = 's3//{}/'.format(BUCKET_NAME) # --------------------------- # create / get DynamoDB tables self.get_dynamo_tables() # --------------------------- # create / get the SQS self.get_queue_url() # --------------------------- # init STAC instance self.stac = STAC(URL_STAC)
def __init__(self, **kwargs): """Build STAC provider for Element84.""" access_token = kwargs.pop('access_token', None) self.kwargs = kwargs self.api = STAC('https://earth-search.aws.element84.com/v0', access_token=access_token) self.progress = kwargs.get('progress')
def test_catalog(): service = STAC(url) retval = service.catalog() common_keys = {'stac_version', 'id', 'description', 'links'} assert common_keys <= set(retval.keys())
def search_STAC(self, activity, extra_catalogs=None): """Search for activity in remote STAC server. Notes: By default, uses entire activity to search for data catalog. When the parameter ``extra_catalog`` is set, this function will seek into given catalogs and then merge the result as a single query server. It may be useful if you have a collection in different server provider. Args: activity (dict): Current activity scope with default STAC server and stac collection **Make sure that ``bbox`` property is a GeoJSON Feature. extra_catalogs (List[dict]): Extra catalogs to seek for collection. Default is None. """ # Get DATACUBE params _ = self.stac.catalog bands = activity['bands'] datasets = activity['datasets'] bbox_feature = activity['bbox'] time = '{}/{}'.format(activity['start'], activity['end']) scenes = {} filter_opts = dict(datetime=time, intersects=bbox_feature, limit=10000) for dataset in datasets: filter_opts['collections'] = [dataset] items = self.stac.search(filter=filter_opts) scenes.update(**self._parse_stac_result(items, dataset, bands, activity['quality_band'])) if extra_catalogs: for catalog in extra_catalogs: stac_url = catalog['stac_url'] stac_token = catalog.get('token') stac_dataset = catalog['dataset'] filter_opts['collections'] = [stac_dataset] stac = STAC(stac_url, access_token=stac_token) items = stac.search(filter=filter_opts) res = self._parse_stac_result(items, stac_dataset, bands, activity['quality_band']) for band, datasets in res.items(): internal_dataset = list(datasets.keys())[0] scenes[band][dataset].update(datasets[internal_dataset]) return scenes
def _stac(self, collection: str, url: str, **kwargs) -> STAC: """Check if collection is provided by given STAC url. The provided STAC must follow the `SpatioTemporal Asset Catalogs spec <https://stacspec.org/>`_. Exceptions: RuntimeError for any exception during STAC connection. Args: collection: Collection name url - STAC URL Returns: STAC client """ try: options = dict() if kwargs.get('token'): options['access_token'] = kwargs.get('token') stac = self.cached_stacs.get(url) or STAC(url, **options) _ = stac.catalog _ = stac.collection(collection) self.cached_stacs.setdefault(url, stac) return stac except Exception as e: # STAC Error raise RuntimeError('An error occurred in STAC {}'.format(str(e)))
def _stac(cls, collection: str, url: str) -> STAC: """Check if collection is provided by given STAC url. The provided STAC must follow the `SpatioTemporal Asset Catalogs spec <https://stacspec.org/>`_. Exceptions: RuntimeError for any exception during STAC connection. Args: collection: Collection name url - STAC URL Returns: STAC client """ try: stac = cls.cached_stacs.get(url) or STAC(url) _ = stac.catalog _ = stac.collection(collection) cls.cached_stacs.setdefault(url, stac) return stac except Exception as e: # STAC Error raise RuntimeError('An error occurred in STAC {}'.format(str(e)))
class CubeServices: def __init__(self, url_stac=None, bucket=None): # session = boto3.Session(profile_name='default') session = boto3.Session(aws_access_key_id=AWS_KEY_ID, aws_secret_access_key=AWS_SECRET_KEY) # --------------------------- # AWS infrastructure self.S3client = session.client('s3') self.SQSclient = session.client('sqs') self.LAMBDAclient = session.client('lambda') self.Kinesisclient = session.client('kinesis') self.dynamoDBResource = session.resource('dynamodb') self.QueueUrl = {} self.bucket_name = bucket # --------------------------- # create / get DynamoDB tables self.get_dynamo_tables() # --------------------------- # create / get the SQS self.get_queue_url() # --------------------------- # init STAC instance self.url_stac = url_stac if url_stac: self.stac = STAC(url_stac) def get_s3_prefix(self, bucket): # prefix = 'https://s3.amazonaws.com/{}/'.format(bucket) prefix = 's3://{}/'.format(bucket) return prefix ## ---------------------- # DYNAMO DB def get_dynamo_tables(self): # Create the cubeBuilderActivities table in DynamoDB to store all activities self.activitiesTable = self.dynamoDBResource.Table(DYNAMO_TB_ACTIVITY) table_exists = False try: self.activitiesTable.creation_date_time table_exists = True except: table_exists = False if not table_exists: self.activitiesTable = self.dynamoDBResource.create_table( TableName=DYNAMO_TB_ACTIVITY, KeySchema=[{ 'AttributeName': 'id', 'KeyType': 'HASH' }, { 'AttributeName': 'sk', 'KeyType': 'RANGE' }], AttributeDefinitions=[ { 'AttributeName': 'id', 'AttributeType': 'S' }, { 'AttributeName': 'sk', 'AttributeType': 'S' }, ], BillingMode='PAY_PER_REQUEST', ) # Wait until the table exists. self.dynamoDBResource.meta.client.get_waiter('table_exists').wait( TableName=DYNAMO_TB_ACTIVITY) # Create the cubeBuilderActivitiesControl table in DynamoDB to manage activities completion self.activitiesControlTable = self.dynamoDBResource.Table( DBNAME_TB_CONTROL) table_exists = False try: self.activitiesControlTable.creation_date_time table_exists = True except: table_exists = False if not table_exists: self.activitiesControlTable = self.dynamoDBResource.create_table( TableName=DBNAME_TB_CONTROL, KeySchema=[ { 'AttributeName': 'id', 'KeyType': 'HASH' }, ], AttributeDefinitions=[ { 'AttributeName': 'id', 'AttributeType': 'S' }, ], ProvisionedThroughput={ 'ReadCapacityUnits': 2, 'WriteCapacityUnits': 2 }) # Wait until the table exists. self.dynamoDBResource.meta.client.get_waiter('table_exists').wait( TableName=DBNAME_TB_CONTROL) # Create the cubeBuilderActivitiesControl table in DynamoDB to manage activities completion self.processTable = self.dynamoDBResource.Table(DBNAME_TB_PROCESS) table_exists = False try: self.processTable.creation_date_time table_exists = True except: table_exists = False if not table_exists: self.processTable = self.dynamoDBResource.create_table( TableName=DBNAME_TB_PROCESS, KeySchema=[ { 'AttributeName': 'id', 'KeyType': 'HASH' }, ], AttributeDefinitions=[ { 'AttributeName': 'id', 'AttributeType': 'S' }, ], ProvisionedThroughput={ 'ReadCapacityUnits': 2, 'WriteCapacityUnits': 2 }) # Wait until the table exists. self.dynamoDBResource.meta.client.get_waiter('table_exists').wait( TableName=DBNAME_TB_PROCESS) def get_activities(self): # self.activitiesTable.meta.client.delete_table(TableName=DYNAMO_TB_ACTIVITY) return self.activitiesTable.scan() def get_activities_ctrl(self): return self.activitiesControlTable.scan() def get_activities_by_key(self, dinamo_key): return self.activitiesTable.query( KeyConditionExpression=Key('id').eq(dinamo_key)) def get_activity_item(self, query): return self.activitiesTable.get_item(Key=query) def get_process_by_id(self, process_id): return self.processTable.query( KeyConditionExpression=Key('id').eq(process_id)) def get_process_by_datacube(self, datacube): return self.processTable.scan( FilterExpression=Key('datacube').eq(datacube)) def get_cube_meta( self, cube, ): filters = Key('data_cube').eq(cube) & Key('id').begins_with('merge') return self.activitiesTable.scan(FilterExpression=filters, ) def get_all_items(self, filters): response = self.activitiesTable.scan(FilterExpression=filters, Limit=100000000) items = response['Items'] while 'LastEvaluatedKey' in response: response = self.activitiesTable.scan( FilterExpression=filters, ExclusiveStartKey=response['LastEvaluatedKey']) items.extend(response['Items']) return items def get_merges(self, data_cube: str, tile_id: str, start: str, end: str): """List all merges activities used to build a data cube. Args: data_cube - Data cube name start - Filter start data end - Filter end data """ expression = Key('tile_id').eq(tile_id) & Key('period_start').between(start, end) & \ Key('period_end').between(start, end) & Key('data_cube').eq(data_cube) return self.get_all_items(expression) def get_activities_by_datacube(self, data_cube: str): """List all activities used to build a data cube. Args: data_cube - Data cube name """ expression = Key('data_cube').eq(data_cube) return self.get_all_items(expression) def put_activity(self, activity): self.activitiesTable.put_item( Item={ 'id': activity['dynamoKey'], 'sk': activity['sk'], 'tile_id': activity['tileid'], 'period_start': activity['start'], 'period_end': activity['end'], 'data_cube': activity['datacube'], 'mystatus': activity['mystatus'], 'mylaunch': activity['mylaunch'], 'mystart': activity['mystart'], 'myend': activity['myend'], 'efficacy': activity['efficacy'], 'cloudratio': activity['cloudratio'], 'instancesToBeDone': activity['instancesToBeDone'], 'totalInstancesToBeDone': activity['totalInstancesToBeDone'], 'activity': json.dumps(activity), }) return True def put_process_table(self, key, datacube_id, i_datacube_id, infos): self.processTable.put_item( Item={ 'id': key, 'datacube_id': datacube_id, 'irregular_datacube_id': i_datacube_id, 'infos': infos }) return True def put_control_table(self, key, value): self.activitiesControlTable.put_item(Item={ 'id': key, 'mycount': value, }) return True def remove_control_by_key(self, key: str): try: self.activitiesControlTable.delete_item(Key=dict(id=key)) return True except: return False def remove_process_by_key(self, key: str): try: self.processTable.delete_item(Key=dict(id=key)) return True except: return False def update_control_table(self, Key, UpdateExpression, ExpressionAttributeNames, ExpressionAttributeValues, ReturnValues): return self.activitiesControlTable.update_item( Key=Key, UpdateExpression=UpdateExpression, ExpressionAttributeNames=ExpressionAttributeNames, ExpressionAttributeValues=ExpressionAttributeValues, ReturnValues=ReturnValues) ## ---------------------- # SQS def get_queue_url(self): for action in ['merge', 'blend', 'posblend', 'publish']: queue = '{}-{}'.format(QUEUE_NAME, action) if self.QueueUrl.get(action, None) is not None: continue response = self.SQSclient.list_queues() q_exists = False if 'QueueUrls' in response: for qurl in response['QueueUrls']: if qurl.find(queue) != -1: q_exists = True self.QueueUrl[action] = qurl if not q_exists: self.create_queue(True, action) return True def create_queue(self, create_mapping=False, action=''): """ As the influx of messages to a queue increases, AWS Lambda automatically scales up polling activity until the number of concurrent function executions reaches 1000, the account concurrency limit, or the (optional) function concurrency limit, whichever is lower. Amazon Simple Queue Service supports an initial burst of 5 concurrent function invocations and increases concurrency by 60 concurrent invocations per minute. So, for example, 1000 messages arrives at the queue at once, only 5 will be processed in the first minute. 65 lambdas will run concurrently in the second minute... so on """ # Create a SQS for this experiment queue = '{}-{}'.format(QUEUE_NAME, action) response = self.SQSclient.create_queue( QueueName=queue, Attributes={'VisibilityTimeout': '500'}) self.QueueUrl[action] = response['QueueUrl'] # Get attributes attributes = self.SQSclient.get_queue_attributes( QueueUrl=self.QueueUrl[action], AttributeNames=[ 'All', ]) QueueArn = attributes['Attributes']['QueueArn'] # Create Source Mapping to Maestro from queue if create_mapping: response = self.LAMBDAclient.create_event_source_mapping( EventSourceArn=QueueArn, FunctionName=LAMBDA_FUNCTION_NAME, Enabled=True, BatchSize=1) def send_to_sqs(self, activity): if self.get_queue_url(): action = activity['action'] self.SQSclient.send_message(QueueUrl=self.QueueUrl[action], MessageBody=json.dumps(activity)) ## ---------------------- # Kinesis def put_item_kinesis(self, activity): activity['channel'] = 'kinesis' activity['db'] = 'dynamodb' status = self.sendToKinesis(activity) del activity['channel'] del activity['db'] return status def sendToKinesis(self, activity): self.Kinesisclient.put_record(StreamName=KINESIS_NAME, Data=json.dumps(activity), PartitionKey='dsKinesis') return True ## ---------------------- # STAC def get_collection_stac(self, collection_id): _ = self.stac.catalog return self.stac.collection(collection_id) def _parse_stac_result(self, items, dataset, bands, quality_band): scenes = dict() for f in items['features']: if f['type'] == 'Feature': id = f['id'] date = f['properties']['datetime'] # Get file link and name assets = f['assets'] for band in bands: band_obj = assets.get(band, None) if not band_obj: continue scenes[band] = scenes.get(band, {}) scenes[band][dataset] = scenes[band].get(dataset, {}) scene = {} scene['sceneid'] = id scene['date'] = date scene['band'] = band scene['link'] = band_obj['href'] if dataset == 'MOD13Q1' and band == quality_band: scene['link'] = scene['link'].replace( quality_band, 'reliability') # TODO: verify if scene['link'] exist if date not in scenes[band][dataset]: scenes[band][dataset][date] = [] scenes[band][dataset][date].append(scene) return scenes def search_STAC(self, activity, extra_catalogs=None): """Search for activity in remote STAC server. Notes: By default, uses entire activity to search for data catalog. When the parameter ``extra_catalog`` is set, this function will seek into given catalogs and then merge the result as a single query server. It may be useful if you have a collection in different server provider. Args: activity (dict): Current activity scope with default STAC server and stac collection **Make sure that ``bbox`` property is a GeoJSON Feature. extra_catalogs (List[dict]): Extra catalogs to seek for collection. Default is None. """ # Get DATACUBE params _ = self.stac.catalog bands = activity['bands'] datasets = activity['datasets'] bbox_feature = activity['bbox'] time = '{}/{}'.format(activity['start'], activity['end']) scenes = {} filter_opts = dict(datetime=time, intersects=bbox_feature, limit=10000) for dataset in datasets: filter_opts['collections'] = [dataset] items = self.stac.search(filter=filter_opts) scenes.update(**self._parse_stac_result(items, dataset, bands, activity['quality_band'])) if extra_catalogs: for catalog in extra_catalogs: stac_url = catalog['stac_url'] stac_token = catalog.get('token') stac_dataset = catalog['dataset'] filter_opts['collections'] = [stac_dataset] stac = STAC(stac_url, access_token=stac_token) items = stac.search(filter=filter_opts) res = self._parse_stac_result(items, stac_dataset, bands, activity['quality_band']) for band, datasets in res.items(): internal_dataset = list(datasets.keys())[0] scenes[band][dataset].update(datasets[internal_dataset]) return scenes ## ---------------------- # S3 def create_bucket(self, name, requester_pay=True): try: # Create a bucket with public access response = self.S3client.create_bucket(ACL='public-read', Bucket=name) if requester_pay: response = self.S3client.put_bucket_request_payment( Bucket=name, RequestPaymentConfiguration={'Payer': 'Requester'}) assert response['ResponseMetadata']['HTTPStatusCode'] == 200 return True except ClientError: return False return True def s3_file_exists(self, bucket_name=None, key=''): try: if not bucket_name: bucket_name = self.bucket_name return self.S3client.head_object(Bucket=bucket_name, Key=key) except ClientError: return False def get_object(self, key, bucket_name=None): return self.S3client.get_object(Bucket=bucket_name, Key=key) def delete_file_S3(self, bucket_name=None, key=''): try: if not bucket_name: bucket_name = self.bucket_name self.S3client.delete_object(Bucket=bucket_name, Key=key) except ClientError: return False return True def save_file_S3(self, bucket_name=None, key='', activity={}): if not bucket_name: bucket_name = self.bucket_name return self.S3client.put_object( Bucket=bucket_name, Key=key, Body=(bytes(json.dumps(activity).encode('UTF-8')))) def upload_file_S3(self, memfile, key, args, bucket_name=None): if not bucket_name: bucket_name = self.bucket_name return self.S3client.upload_file(memfile, Bucket=bucket_name, Key=key, ExtraArgs=args) def upload_fileobj_S3(self, memfile, key, args, bucket_name=None): if not bucket_name: bucket_name = self.bucket_name return self.S3client.upload_fileobj(memfile, Bucket=bucket_name, Key=key, ExtraArgs=args) def list_repositories(self): return [ bucket['Name'] for bucket in self.S3client.list_buckets()['Buckets'] ]
class CubeServices: def __init__(self): # session = boto3.Session(profile_name='africa') session = boto3.Session( aws_access_key_id=AWS_KEY_ID, aws_secret_access_key=AWS_SECRET_KEY) # --------------------------- # AWS infrastructure self.S3client = session.client('s3') self.SQSclient = session.client('sqs') self.LAMBDAclient = session.client('lambda') self.Kinesisclient = session.client('kinesis') self.dynamoDBResource = session.resource('dynamodb') self.QueueUrl = None self.prefix = 'https://s3.amazonaws.com/{}/'.format(BUCKET_NAME) #self.prefix = 's3//{}/'.format(BUCKET_NAME) # --------------------------- # create / get DynamoDB tables self.get_dynamo_tables() # --------------------------- # create / get the SQS self.get_queue_url() # --------------------------- # init STAC instance self.stac = STAC(URL_STAC) ## ---------------------- # DYNAMO DB def get_dynamo_tables(self): # Create the cubeBuilderActivities table in DynamoDB to store all activities self.activitiesTable = self.dynamoDBResource.Table(DYNAMO_TB_ACTIVITY) table_exists = False try: self.activitiesTable.creation_date_time table_exists = True except: table_exists = False if not table_exists: self.activitiesTable = self.dynamoDBResource.create_table( TableName=DYNAMO_TB_ACTIVITY, KeySchema=[ {'AttributeName': 'id', 'KeyType': 'HASH' }, {'AttributeName': 'sk', 'KeyType': 'RANGE'} ], AttributeDefinitions=[ {'AttributeName': 'id','AttributeType': 'S'}, {'AttributeName': 'sk','AttributeType': 'S'}, ], BillingMode='PAY_PER_REQUEST', ) # Wait until the table exists. self.dynamoDBResource.meta.client.get_waiter('table_exists').wait(TableName=DYNAMO_TB_ACTIVITY) # Create the cubeBuilderActivitiesControl table in DynamoDB to manage activities completion self.activitiesControlTable = self.dynamoDBResource.Table(DBNAME_TB_CONTROL) table_exists = False try: self.activitiesControlTable.creation_date_time table_exists = True except: table_exists = False if not table_exists: self.activitiesControlTable = self.dynamoDBResource.create_table( TableName=DBNAME_TB_CONTROL, KeySchema=[ {'AttributeName': 'id', 'KeyType': 'HASH' }, ], AttributeDefinitions=[ {'AttributeName': 'id','AttributeType': 'S'}, ], ProvisionedThroughput={ 'ReadCapacityUnits': 2, 'WriteCapacityUnits': 2 } ) # Wait until the table exists. self.dynamoDBResource.meta.client.get_waiter('table_exists').wait(TableName=DBNAME_TB_CONTROL) def get_activities(self): return self.activitiesTable.scan() def get_activities_ctrl(self): return self.activitiesControlTable.scan() def get_activities_by_key(self, dinamo_key): return self.activitiesTable.query( KeyConditionExpression=Key('id').eq(dinamo_key) ) def get_activity_item(self, query): return self.activitiesTable.get_item( Key=query ) def put_activity(self, activity): self.activitiesTable.put_item( Item = { 'id': activity['dynamoKey'], 'sk': activity['sk'], 'mystatus': activity['mystatus'], 'mylaunch': activity['mylaunch'], 'mystart': activity['mystart'], 'myend': activity['myend'], 'efficacy': activity['efficacy'], 'cloudratio': activity['cloudratio'], 'instancesToBeDone': activity['instancesToBeDone'], 'totalInstancesToBeDone': activity['totalInstancesToBeDone'], 'activity': json.dumps(activity), } ) return True def put_control_table(self, key, value): self.activitiesControlTable.put_item( Item = { 'id': key, 'mycount': value, } ) return True def update_control_table(self, Key, UpdateExpression, ExpressionAttributeNames, ExpressionAttributeValues, ReturnValues): return self.activitiesControlTable.update_item( Key=Key, UpdateExpression=UpdateExpression, ExpressionAttributeNames=ExpressionAttributeNames, ExpressionAttributeValues=ExpressionAttributeValues, ReturnValues=ReturnValues ) ## ---------------------- # SQS def get_queue_url(self): if self.QueueUrl is not None: return True response = self.SQSclient.list_queues() q_exists = False if 'QueueUrls' in response: for qurl in response['QueueUrls']: if qurl.find(QUEUE_NAME) != -1: if qurl.find('DLQ') == -1: q_exists = True self.QueueUrl = qurl if not q_exists: self.create_queue(True) return True def create_queue(self, create_mapping = False): """ As the influx of messages to a queue increases, AWS Lambda automatically scales up polling activity until the number of concurrent function executions reaches 1000, the account concurrency limit, or the (optional) function concurrency limit, whichever is lower. Amazon Simple Queue Service supports an initial burst of 5 concurrent function invocations and increases concurrency by 60 concurrent invocations per minute. So, for example, 1000 messages arrives at the queue at once, only 5 will be processed in the first minute. 65 lambdas will run concurrently in the second minute... so on """ # Create a SQS for this experiment response = self.SQSclient.create_queue( QueueName=QUEUE_NAME, Attributes={'VisibilityTimeout': '500'} ) self.QueueUrl = response['QueueUrl'] # Get attributes attributes = self.SQSclient.get_queue_attributes(QueueUrl=self.QueueUrl, AttributeNames=['All',]) QueueArn = attributes['Attributes']['QueueArn'] # Create a DLQ for this experiment response = self.SQSclient.create_queue(QueueName=QUEUE_NAME+'DLQ', Attributes={ 'VisibilityTimeout': '500' } ) DLQueueUrl = response['QueueUrl'] # Get attributes of DLQ attributes = self.SQSclient.get_queue_attributes(QueueUrl=DLQueueUrl, AttributeNames=['All',]) DLQueueArn = attributes['Attributes']['QueueArn'] redrive_policy = { 'deadLetterTargetArn': DLQueueArn, 'maxReceiveCount': '1' } # Configure queue to send messages to dead letter queue self.SQSclient.set_queue_attributes( QueueUrl=self.QueueUrl, Attributes={ 'RedrivePolicy': json.dumps(redrive_policy) } ) # Create Source Mapping to Maestro from queue if create_mapping: response = self.LAMBDAclient.create_event_source_mapping( EventSourceArn=QueueArn, FunctionName=LAMBDA_FUNCTION_NAME, Enabled=True, BatchSize=1 ) def send_to_sqs(self, activity): if self.get_queue_url(): self.SQSclient.send_message(QueueUrl=self.QueueUrl, MessageBody=json.dumps(activity)) ## ---------------------- # Kinesis def put_item_kinesis(self, activity): activity['channel'] = 'kinesis' activity['db'] = 'dynamodb' status = self.sendToKinesis(activity) del activity['channel'] del activity['db'] return status def sendToKinesis(self, activity): self.Kinesisclient.put_record( StreamName=KINESIS_NAME, Data=json.dumps(activity), PartitionKey='dsKinesis' ) return True ## ---------------------- # STAC def get_collection_stac(self, collection_id): _ = self.stac.catalog return self.stac.collection(collection_id) def search_STAC(self, activity): # Get DATACUBE params _ = self.stac.catalog bands = activity['bands'] datasets = activity['datasets'] bbox = activity['bbox'] time = '{}/{}'.format(activity['start'], activity['end']) bucket_archive_name = 'bdc-archive' scenes = {} for dataset in datasets: filter_opts = dict( time=time, bbox=bbox, limit=10000 ) items = self.stac.collection(dataset).get_items(filter=filter_opts) for f in items['features']: if f['type'] == 'Feature': id = f['id'] date = f['properties']['datetime'] # Get file link and name assets = f['assets'] for band in bands: band_obj = assets.get(band, None) if not band_obj: continue scenes[band] = scenes.get(band, {}) scenes[band][dataset] = scenes[band].get(dataset, {}) scene = {} scene['sceneid'] = id scene['date'] = date scene['band'] = band scene['link'] = band_obj['href'] if dataset == 'MOD13Q1' and band == 'quality': scene['link'] = scene['link'].replace('quality','reliability') try: key = scene['link'].replace('s3://{}/'.format(bucket_archive_name),'') self.s3fileExists(bucket_name=bucket_archive_name, key=key) except ClientError: print('STAC key not found {} - link {}'.format(key, scene['link'])) break if date not in scenes[band][dataset]: scenes[band][dataset][date] = [] scenes[band][dataset][date].append(scene) return scenes ## ---------------------- # S3 def s3fileExists(self, bucket_name=BUCKET_NAME, key=''): try: self.S3client.head_object(Bucket=bucket_name, Key=key) except ClientError: return False return True def save_file_S3(self, key, activity): return self.S3client.put_object( Bucket=BUCKET_NAME, Key=key, Body=(bytes(json.dumps(activity).encode('UTF-8'))) ) def upload_file_S3(self, memfile, key, args): return self.S3client.upload_file( memfile, Bucket=BUCKET_NAME, Key=key, ExtraArgs=args ) def upload_fileobj_S3(self, memfile, key, args): return self.S3client.upload_fileobj( memfile, Bucket=BUCKET_NAME, Key=key, ExtraArgs=args )
dbasen = datetime.datetime(year + 1, monthf, dayf) while dbase < dbasen: dstart = dbase dend = dbase + td_time_step - datetime.timedelta(days=1) basedate = dbase.strftime('%Y-%m-%d') start_date = dstart.strftime('%Y-%m-%d') end_date = dend.strftime('%Y-%m-%d') periodkey = basedate + '_' + start_date + '_' + end_date requested_period = [] requested_periods[basedate] = requested_period requested_periods[basedate].append(periodkey) dbase += td_time_step return requested_periods stac_cli = STAC(Config.STAC_URL) class Maestro: datacube = None bands = [] tiles = [] mosaics = dict() def __init__(self, datacube: str, collections: List[str], tiles: List[str], start_date: str, end_date: str, **properties): self.params = dict(datacube=datacube, collections=collections, tiles=tiles, start_date=start_date, end_date=end_date)
class EarthSearch(BaseProvider): """Define a simple abstraction of Provider for Element84. It was designed to download Sentinel-2 COGS from `Sentinel-2 Cloud-Optimized GeoTIFFs <https://registry.opendata.aws/sentinel-2-l2a-cogs/>`_ """ def __init__(self, **kwargs): """Build STAC provider for Element84.""" access_token = kwargs.pop('access_token', None) self.kwargs = kwargs self.api = STAC('https://earth-search.aws.element84.com/v0', access_token=access_token) self.progress = kwargs.get('progress') def search(self, query, *args, **kwargs) -> List[SceneResult]: """Search for scenes in STAC.""" options = dict() if 'start_date' in kwargs: options[ 'time'] = f'{kwargs.get("start_date")}/{kwargs.get("end_date")}' if 'bbox' in kwargs: options['intersects'] = mapping(box(*kwargs['bbox'])) options['collection'] = query res = self.api.search(filter=options) # TODO: Implement next page as iterator or check stac.py support return [ SceneResult(scene_id=f['properties']['sentinel:product_id'], cloud_cover=f['properties']['sentinel:cloud_cover'], **f) for f in res['features'] ] @staticmethod def _guess_parser(scene_id: str): """Get the supported parser for Scene.""" if scene_id.startswith('S2'): return Sentinel2Scene(scene_id) return LandsatScene(scene_id) def download(self, scene_id: str, *args, **kwargs) -> str: """Download files from STAC Element 84.""" output = kwargs['output'] collection = kwargs['dataset'] parsed = self._guess_parser(scene_id) stac_collection = self.api.collection(collection) product = parsed.fragments[1][-3:] item_id = f'{parsed.source()}_{parsed.tile_id()}_{parsed.sensing_date().strftime("%Y%m%d")}_0_{product}' feature = stac_collection.get_items(item_id=item_id) if feature.get('code'): raise RuntimeError( f'Scene {scene_id} not found for collection {collection}.') with TemporaryDirectory() as tmp: tmp_path = Path(tmp) / item_id for asset_name, asset in feature['assets'].items(): self._download(asset['href'], str(tmp_path)) shutil.move(str(tmp_path), output) return output def _download(self, link, output): """Download asset from STAC.""" file_name = Path(link).name path = Path(output) / file_name response = requests.get(link, stream=True, timeout=90) download_stream(str(path), response, progress=self.progress)
def test_conformance(): service = STAC(url) retval = service.conformance() assert 'conformsTo' in retval
def test_creation(): service = STAC(url) assert url.count(service.url) == 1