def redrive_to_queue(queue, to_queue, limit, dryrun): """ Redrives all the messages from the given sqs queue to the destination """ dead_queue = get_queue(queue) alive_queue = get_queue(to_queue) messages = get_messages(dead_queue) count = 0 count_messages = dead_queue.attributes.get("ApproximateNumberOfMessages") if count_messages == 0: _LOG.info("No messages to redrive") return if not dryrun: for message in messages: response = alive_queue.send_message(MessageBody=message.body) if response["ResponseMetadata"]["HTTPStatusCode"] == 200: message.delete() count += 1 if limit and count >= limit: break else: _LOG.error(f"Unable to send message {message} to queue") _LOG.info(f"Completed sending {count} messages to the queue") else: _LOG.warning( f"DRYRUN enabled, would have pushed approx {count_messages} messages to the queue" )
def test_generate_buckets_diff(s3_report_path: URL): sqs_client = boto3.client("sqs", region_name=REGION) sqs_client.create_queue(QueueName=SQS_QUEUE_NAME) s3_client = boto3.client("s3", region_name=REGION) s3_client.create_bucket( Bucket=TEST_BUCKET_NAME, CreateBucketConfiguration={ "LocationConstraint": REGION, }, ) # Upload fake gap report s3_client.upload_file( str(LANDSAT_GAP_REPORT), TEST_BUCKET_NAME, str(S3_LANDSAT_GAP_REPORT), ) print(list(boto3.resource("s3").Bucket(TEST_BUCKET_NAME).objects.all())) with patch.object(landsat_gap_filler, "S3_BUCKET_PATH", str(s3_report_path)): # No differences fill_the_gap(landsat="landsat_5", sync_queue_name=SQS_QUEUE_NAME) queue = get_queue(queue_name=SQS_QUEUE_NAME) number_of_msgs = queue.attributes.get("ApproximateNumberOfMessages") assert int(number_of_msgs) == 28
def _datasets_to_queue(self, queue, datasets): alive_queue = get_queue(queue) def post_messages(messages, count): alive_queue.send_messages(Entries=messages) sys.stdout.write(f"\rAdded {count} messages...") return [] count = 0 messages = [] sys.stdout.write("\rAdding messages...") for dataset in datasets: message = { "Id": str(count), "MessageBody": json.dumps({ "id": str(dataset.id), "transform": self.transform_name }), } messages.append(message) count += 1 if count % 10 == 0: messages = post_messages(messages, count) # Post the last messages if there are any if len(messages) > 0: post_messages(messages, count) sys.stdout.write("\r") return count
def cli(queue, to_queue, limit, dryrun): """ Redrives all the messages from the given sqs queue to the destination """ logging.basicConfig( level=logging.INFO, format="[%(asctime)s] {%(filename)s:%(lineno)d} %(levelname)s - %(message)s", stream=sys.stdout, ) _log = logging.getLogger(__name__) dead_queue = get_queue(queue) alive_queue = get_queue(to_queue) messages = get_messages(dead_queue) count = 0 count_messages = dead_queue.attributes.get("ApproximateNumberOfMessages") if count_messages == 0: _log.info("No messages to redrive") return if not dryrun: for message in messages: response = alive_queue.send_message(MessageBody=message.body) if response["ResponseMetadata"]["HTTPStatusCode"] == 200: message.delete() count += 1 if limit and count >= limit: break else: _log.error(f"Unable to send message {message} to queue") _log.info(f"Completed sending {count} messages to the queue") else: _log.warning( f"DRYRUN enabled, would have pushed approx {count_messages} messages to the queue" )
def publish_to_queue(db, queue, verbose, dryrun, bunch_size, tasks): """ Publish tasks to SQS. Task could be one of the 3 things \b 1. Comma-separated triplet: period,x,y or 'x[+-]<int>/y[+-]<int>/period 2019--P1Y,+003,-004 2019--P1Y/3/-4 `/` is also accepted x+003/y-004/2019--P1Y is accepted as well 2. A zero based index 3. A slice following python convention <start>:<stop>[:<step] ::10 -- every tenth task: 0,10,20,.. 1::10 -- every tenth but skip first one 1, 11, 21 .. :100 -- first 100 tasks If no tasks are supplied all tasks will be published the queue. """ from odc.aws.queue import get_queue, publish_messages from .tasks import TaskReader, render_task import toolz rdr = TaskReader(db) if len(tasks) == 0: tasks = rdr.all_tiles if verbose: print(f"Found {len(tasks):,d} tasks in the file") else: try: tasks = parse_all_tasks(tasks, rdr.all_tiles) except ValueError as e: print(str(e), file=sys.stderr) sys.exit(1) if dryrun: do_dry_run(tasks) sys.exit(0) queue = get_queue(queue) # TODO: switch to JSON for SQS message body messages = (dict(Id=str(idx), MessageBody=render_task(tidx)) for idx, tidx in enumerate(tasks)) for bunch in toolz.partition_all(bunch_size, messages): publish_messages(queue, bunch)
def post_messages(message_list, queue_name: str) -> dict: """ Publish messages :param message_list:(list) list of messages :param queue_name: (str) queue to be sens to :return:(None) """ count = 0 messages = [] error_list = [] failed = 0 sent = 0 queue = get_queue(queue_name=queue_name) logging.info("Sending messages") for message_dict in message_list: try: message = { "Id": str(count), "MessageBody": str(json.dumps(message_dict)), } messages.append(message) count += 1 # Send 10 messages per time if count % 10 == 0: publish_messages(queue, messages) messages = [] sent += 10 except Exception as exc: failed += 1 error_list.append(exc) messages = [] # Post the last messages if there are any if len(messages) > 0: sent += len(messages) publish_messages(queue, messages) return {"failed": failed, "sent": sent}
def test_publish_message_s2_gap_filler(s3_report_path: URL): sqs_client = boto3.client("sqs", region_name=REGION) sqs_client.create_queue(QueueName=SQS_QUEUE_NAME) s3_client = boto3.client("s3", region_name=COGS_REGION) s3_client.create_bucket( Bucket=TEST_BUCKET_NAME, CreateBucketConfiguration={ "LocationConstraint": COGS_REGION, }, ) s3_client.upload_file( str(LOCAL_REPORT_UPDATE_FILE), TEST_BUCKET_NAME, str(S3_S2_REPORT_FILE), ) missing_dict = json.loads(open(str(LOCAL_REPORT_UPDATE_FILE), "rb").read()) files = [ scene_path.strip() for scene_path in missing_dict["missing"] if scene_path ] [ s3_client.upload_file(str(FAKE_STAC_FILE_PATH), TEST_BUCKET_NAME, f"{i}/{S2_FAKE_STAC_FILE}") for i in range(len(files)) ] with patch.object(s2_gap_filler, "S3_BUCKET_PATH", str(s3_report_path)): s2_gap_filler.send_messages( limit=None, max_workers=1, idx=0, queue_name=SQS_QUEUE_NAME, slack_url=None, ) queue = get_queue(queue_name=SQS_QUEUE_NAME) number_of_msgs = queue.attributes.get("ApproximateNumberOfMessages") assert int(number_of_msgs) == 8
def stream_from_sqs( self, sqs_queue, product: Optional[OutputProduct] = None, visibility_timeout: int = 300, **kw, ) -> Iterator[Task]: from odc.aws.queue import get_messages, get_queue from ._sqs import SQSWorkToken product = self._resolve_product(product) if isinstance(sqs_queue, str): sqs_queue = get_queue(sqs_queue) for msg in get_messages(sqs_queue, visibility_timeout=visibility_timeout, **kw): # TODO: switch to JSON for SQS message body token = SQSWorkToken(msg, visibility_timeout) tidx = parse_task(msg.body) yield self.load_task(tidx, product, source=token)
def test_rdr_sqs(sqs_queue_by_name, test_db_path): q = get_queue(sqs_queue_by_name) product = OutputProduct.dummy() rdr = TaskReader(test_db_path, product) for tidx in rdr.all_tiles: publish_message(q, render_task(tidx)) for task in rdr.stream_from_sqs(sqs_queue_by_name, visibility_timeout=120, max_wait=0): _now = datetime.utcnow() assert task.source is not None assert task.source.active_seconds < 2 assert task.source.deadline > _now assert task.source.deadline < _now + timedelta(seconds=120 + 10) task.source.extend(3600) assert task.source.deadline > _now assert task.source.deadline < _now + timedelta(seconds=3600 + 10) task.source.done()
def get_tasks_from_queue(self, queue, limit, queue_timeout): """Retrieve messages from the named queue, returning an iterable of (AlchemistTasks, SQS Messages)""" alive_queue = get_queue(queue) messages = get_messages(alive_queue, limit, visibility_timeout=queue_timeout) for message in messages: message_body = json.loads(message.body) uuid = message_body.get("id", None) if uuid is None: # This is probably a message created from an SNS, so it's double # JSON dumped message_body = json.loads(message_body["Message"]) transform = message_body.get("transform", None) if transform and transform != self.transform_name: _LOG.error( f"Your transform doesn't match the transform in the message. Ignoring {uuid}" ) continue task = self.generate_task_by_uuid(message_body["id"]) if task: yield task, message
def send_messages( idx: int, queue_name: str, max_workers: int = 2, limit: int = None, slack_url: str = None, ) -> None: """ Publish a list of missing scenes to an specific queue and by the end of that it's able to notify slack the result :param limit: (int) optional limit of messages to be read from the report :param max_workers: (int) total number of pods used for the task. This number is used to split the number of scenes equally among the PODS :param idx: (int) sequential index which will be used to define the range of scenes that the POD will work with :param queue_name: (str) queue to be sens to :param slack_url: (str) Optional slack URL in case of you want to send a slack notification """ log = setup_logging() latest_report = find_latest_report( report_folder_path=S3_BUCKET_PATH, not_contains="orphaned" ) if "update" in latest_report: log.info("FORCED UPDATE FLAGGED!") log.info(f"Limited: {int(limit) if limit else 'No limit'}") log.info(f"Number of workers: {max_workers}") files = read_report_missing_scenes(report_path=latest_report, limit=limit) log.info(f"Number of scenes found {len(files)}") log.info(f"Example scenes: {files[0:10]}") # Split scenes equally among the workers split_list_scenes = split_list_equally( list_to_split=files, num_inter_lists=int(max_workers) ) # In case of the index being bigger than the number of positions in the array, the extra POD isn' necessary if len(split_list_scenes) <= idx: log.warning(f"Worker {idx} Skipped!") sys.exit(0) log.info(f"Executing worker {idx}") messages = prepare_message(scene_paths=split_list_scenes[idx], log=log) queue = get_queue(queue_name=queue_name) batch = [] failed = 0 sent = 0 error_list = [] for message in messages: try: batch.append(message) if len(batch) == 10: publish_messages(queue=queue, messages=batch) batch = [] sent += 10 except Exception as exc: failed += 1 error_list.append(exc) batch = [] if len(batch) > 0: publish_messages(queue=queue, messages=batch) sent += len(batch) environment = "DEV" if "dev" in queue_name else "PDS" error_flag = ":red_circle:" if failed > 0 else "" message = dedent( f"{error_flag}*Sentinel 2 GAP Filler - {environment}*\n" f"Sent Messages: {sent}\n" f"Failed Messages: {failed}\n" ) if slack_url is not None: send_slack_notification(slack_url, "S2 Gap Filler", message) log.info(message) if failed > 0: sys.exit(1)
def test_s2_gap_filler_cli(s3_report_path: URL): """ Test for random numbers of limits (between 1-10) for a random numbers of workers workers (between 1-30). """ sqs_client = boto3.client("sqs", region_name=REGION) sqs_client.create_queue(QueueName=SQS_QUEUE_NAME) s3_client = boto3.client("s3", region_name=COGS_REGION) s3_client.create_bucket( Bucket=TEST_BUCKET_NAME, CreateBucketConfiguration={ "LocationConstraint": COGS_REGION, }, ) s3_client.upload_file( str(LOCAL_REPORT_UPDATE_FILE), TEST_BUCKET_NAME, str(S3_S2_REPORT_FILE), ) missing_dict = json.loads(open(str(LOCAL_REPORT_UPDATE_FILE), "rb").read()) files = [ scene_path.strip() for scene_path in missing_dict["missing"] if scene_path ] [ s3_client.upload_file(str(FAKE_STAC_FILE_PATH), TEST_BUCKET_NAME, f"{i}/{S2_FAKE_STAC_FILE}") for i in range(len(files)) ] with patch.object(s2_gap_filler, "S3_BUCKET_PATH", str(s3_report_path)): runner = CliRunner() max_workers = randrange(1, 6) max_limit = randrange(1, 10) for limit in range(max_limit): for idx in range(max_workers): runner.invoke( s2_gap_filler.cli, [ str(idx), str(max_workers), str(SQS_QUEUE_NAME), "--limit", str(limit), ], ) queue = get_queue(queue_name=SQS_QUEUE_NAME) number_of_msgs = queue.attributes.get( "ApproximateNumberOfMessages") # total of messages sent won't be bigger than 8 so even with more workers and # higher limits the process must send a max of 8 messages len(files) == 8 # if limit is 0 it returns error if limit == 0: assert int(number_of_msgs) == 0 # if limit bigger than 0 and smaller than the number max of messages elif limit < len(files): assert int(number_of_msgs) == limit # if limit bigger than 8 elif limit >= len(files): assert int(number_of_msgs) == len(files) sqs_client.purge_queue(QueueUrl=queue.url) print(f"max_limit {max_limit} - max_workers {max_workers}")
def test_redrive_to_queue_cli(aws_env): resource = boto3.resource("sqs") dead_queue = resource.create_queue(QueueName=DEAD_QUEUE_NAME) alive_queue = resource.create_queue( QueueName=ALIVE_QUEUE_NAME, Attributes={ "RedrivePolicy": json.dumps({ "deadLetterTargetArn": dead_queue.attributes.get("QueueArn"), "maxReceiveCount": 2, }), }, ) for i in range(35): dead_queue.send_message( MessageBody=json.dumps({"content": f"Something {i}"})) # Invalid value string returned = CliRunner().invoke( redrive_to_queue.cli, [ str(DEAD_QUEUE_NAME), str(ALIVE_QUEUE_NAME), "--limit", "string_test" ], ) assert returned.exit_code == 1 # Invalid value 0 returned = CliRunner().invoke( redrive_to_queue.cli, [str(DEAD_QUEUE_NAME), str(ALIVE_QUEUE_NAME), "--limit", 0], ) assert returned.exit_code == 1 # Valid value 1 returned = CliRunner().invoke( redrive_to_queue.cli, [str(DEAD_QUEUE_NAME), str(ALIVE_QUEUE_NAME), "--limit", 1], ) assert returned.exit_code == 0 assert int( get_queue(ALIVE_QUEUE_NAME).attributes.get( 'ApproximateNumberOfMessages')) == 1 # Valid value None (all) returned = CliRunner().invoke( redrive_to_queue.cli, [str(DEAD_QUEUE_NAME), str(ALIVE_QUEUE_NAME), "--limit", None], ) assert returned.exit_code == 0 assert int( get_queue(DEAD_QUEUE_NAME).attributes.get( 'ApproximateNumberOfMessages')) == 0