def stream_from_sqs( self, sqs_queue, product: Optional[OutputProduct] = None, visibility_timeout: int = 300, **kw, ) -> Iterator[Task]: from odc.aws.queue import get_messages, get_queue from ._sqs import SQSWorkToken product = self._resolve_product(product) if isinstance(sqs_queue, str): sqs_queue = get_queue(sqs_queue) for msg in get_messages(sqs_queue, visibility_timeout=visibility_timeout, **kw): # TODO: switch to JSON for SQS message body token = SQSWorkToken(msg, visibility_timeout) tidx = parse_task(msg.body) yield self.load_task(tidx, product, source=token)
def cli(queue, to_queue, limit, dryrun): """ Redrives all the messages from the given sqs queue to the destination """ logging.basicConfig( level=logging.INFO, format="[%(asctime)s] {%(filename)s:%(lineno)d} %(levelname)s - %(message)s", stream=sys.stdout, ) _log = logging.getLogger(__name__) dead_queue = get_queue(queue) alive_queue = get_queue(to_queue) messages = get_messages(dead_queue) count = 0 count_messages = dead_queue.attributes.get("ApproximateNumberOfMessages") if count_messages == 0: _log.info("No messages to redrive") return if not dryrun: for message in messages: response = alive_queue.send_message(MessageBody=message.body) if response["ResponseMetadata"]["HTTPStatusCode"] == 200: message.delete() count += 1 if limit and count >= limit: break else: _log.error(f"Unable to send message {message} to queue") _log.info(f"Completed sending {count} messages to the queue") else: _log.warning( f"DRYRUN enabled, would have pushed approx {count_messages} messages to the queue" )
def get_tasks_from_queue(self, queue, limit, queue_timeout): """Retrieve messages from the named queue, returning an iterable of (AlchemistTasks, SQS Messages)""" alive_queue = get_queue(queue) messages = get_messages(alive_queue, limit, visibility_timeout=queue_timeout) for message in messages: message_body = json.loads(message.body) uuid = message_body.get("id", None) if uuid is None: # This is probably a message created from an SNS, so it's double # JSON dumped message_body = json.loads(message_body["Message"]) transform = message_body.get("transform", None) if transform and transform != self.transform_name: _LOG.error( f"Your transform doesn't match the transform in the message. Ignoring {uuid}" ) continue task = self.generate_task_by_uuid(message_body["id"]) if task: yield task, message
def queue_to_odc( queue, dc: Datacube, products: list, record_path=None, transform=None, limit=None, update=False, archive=False, allow_unsafe=False, odc_metadata_link=False, region_code_list_uri=None, **kwargs, ) -> Tuple[int, int]: ds_success = 0 ds_failed = 0 region_codes = None if region_code_list_uri: try: region_codes = set( pd.read_csv(region_code_list_uri).values.ravel()) except FileNotFoundError as e: logging.error(f"Could not find region_code file with error: {e}") if len(region_codes) == 0: raise SQStoDCException( f"Region code list is empty, please check the list at: {region_code_list_uri}" ) doc2ds = Doc2Dataset(dc.index, products=products, **kwargs) # This is a generator of messages messages = get_messages(queue, limit) for message in messages: try: # Extract metadata from message metadata = extract_metadata_from_message(message) if archive: # Archive metadata do_archiving(metadata, dc) else: if not record_path: # Extract metadata and URI from a STAC or similar # json structure for indexing metadata, uri = handle_json_message( metadata, transform, odc_metadata_link) else: # Extract metadata from an S3 bucket notification # or similar for indexing metadata, uri = handle_bucket_notification_message( metadata, record_path) # If we have a region_code filter, do it here if region_code_list_uri: region_code = dicttoolz.get_in( ["properties", "odc:region_code"], metadata) if region_code not in region_codes: # We don't want to keep this one, so delete the message message.delete() # And fail it... raise SQStoDCException( f"Region code {region_code} not in list of allowed region codes, ignoring this dataset." ) # Index the dataset do_index_update_dataset(metadata, uri, dc, doc2ds, update, allow_unsafe) ds_success += 1 # Success, so delete the message. message.delete() except SQStoDCException as err: logging.error(err) ds_failed += 1 return ds_success, ds_failed