Beispiel #1
0
def redrive_to_queue(queue, to_queue, limit, dryrun):
    """
    Redrives all the messages from the given sqs queue to the destination
    """

    dead_queue = get_queue(queue)
    alive_queue = get_queue(to_queue)

    messages = get_messages(dead_queue)

    count = 0

    count_messages = dead_queue.attributes.get("ApproximateNumberOfMessages")

    if count_messages == 0:
        _LOG.info("No messages to redrive")
        return

    if not dryrun:
        for message in messages:
            response = alive_queue.send_message(MessageBody=message.body)
            if response["ResponseMetadata"]["HTTPStatusCode"] == 200:
                message.delete()
                count += 1
                if limit and count >= limit:
                    break
            else:
                _LOG.error(f"Unable to send message {message} to queue")
        _LOG.info(f"Completed sending {count} messages to the queue")
    else:
        _LOG.warning(
            f"DRYRUN enabled, would have pushed approx {count_messages} messages to the queue"
        )
Beispiel #2
0
def test_generate_buckets_diff(s3_report_path: URL):
    sqs_client = boto3.client("sqs", region_name=REGION)
    sqs_client.create_queue(QueueName=SQS_QUEUE_NAME)

    s3_client = boto3.client("s3", region_name=REGION)
    s3_client.create_bucket(
        Bucket=TEST_BUCKET_NAME,
        CreateBucketConfiguration={
            "LocationConstraint": REGION,
        },
    )

    # Upload fake gap report
    s3_client.upload_file(
        str(LANDSAT_GAP_REPORT),
        TEST_BUCKET_NAME,
        str(S3_LANDSAT_GAP_REPORT),
    )

    print(list(boto3.resource("s3").Bucket(TEST_BUCKET_NAME).objects.all()))
    with patch.object(landsat_gap_filler, "S3_BUCKET_PATH", str(s3_report_path)):
        # No differences
        fill_the_gap(landsat="landsat_5", sync_queue_name=SQS_QUEUE_NAME)
        queue = get_queue(queue_name=SQS_QUEUE_NAME)
        number_of_msgs = queue.attributes.get("ApproximateNumberOfMessages")
        assert int(number_of_msgs) == 28
Beispiel #3
0
    def _datasets_to_queue(self, queue, datasets):
        alive_queue = get_queue(queue)

        def post_messages(messages, count):
            alive_queue.send_messages(Entries=messages)
            sys.stdout.write(f"\rAdded {count} messages...")
            return []

        count = 0
        messages = []
        sys.stdout.write("\rAdding messages...")
        for dataset in datasets:
            message = {
                "Id":
                str(count),
                "MessageBody":
                json.dumps({
                    "id": str(dataset.id),
                    "transform": self.transform_name
                }),
            }
            messages.append(message)

            count += 1
            if count % 10 == 0:
                messages = post_messages(messages, count)

        # Post the last messages if there are any
        if len(messages) > 0:
            post_messages(messages, count)
        sys.stdout.write("\r")

        return count
Beispiel #4
0
def cli(queue, to_queue, limit, dryrun):
    """
    Redrives all the messages from the given sqs queue to the destination
    """

    logging.basicConfig(
        level=logging.INFO,
        format="[%(asctime)s] {%(filename)s:%(lineno)d} %(levelname)s - %(message)s",
        stream=sys.stdout,
    )

    _log = logging.getLogger(__name__)

    dead_queue = get_queue(queue)
    alive_queue = get_queue(to_queue)

    messages = get_messages(dead_queue)

    count = 0

    count_messages = dead_queue.attributes.get("ApproximateNumberOfMessages")

    if count_messages == 0:
        _log.info("No messages to redrive")
        return

    if not dryrun:
        for message in messages:
            response = alive_queue.send_message(MessageBody=message.body)
            if response["ResponseMetadata"]["HTTPStatusCode"] == 200:
                message.delete()
                count += 1
                if limit and count >= limit:
                    break
            else:
                _log.error(f"Unable to send message {message} to queue")
        _log.info(f"Completed sending {count} messages to the queue")
    else:
        _log.warning(
            f"DRYRUN enabled, would have pushed approx {count_messages} messages to the queue"
        )
def publish_to_queue(db, queue, verbose, dryrun, bunch_size, tasks):
    """
    Publish tasks to SQS.

    Task could be one of the 3 things

    \b
    1. Comma-separated triplet: period,x,y or 'x[+-]<int>/y[+-]<int>/period
       2019--P1Y,+003,-004
       2019--P1Y/3/-4          `/` is also accepted
       x+003/y-004/2019--P1Y   is accepted as well
    2. A zero based index
    3. A slice following python convention <start>:<stop>[:<step]
        ::10 -- every tenth task: 0,10,20,..
       1::10 -- every tenth but skip first one 1, 11, 21 ..
        :100 -- first 100 tasks

    If no tasks are supplied all tasks will be published the queue.
    """
    from odc.aws.queue import get_queue, publish_messages
    from .tasks import TaskReader, render_task
    import toolz

    rdr = TaskReader(db)
    if len(tasks) == 0:
        tasks = rdr.all_tiles
        if verbose:
            print(f"Found {len(tasks):,d} tasks in the file")
    else:
        try:
            tasks = parse_all_tasks(tasks, rdr.all_tiles)
        except ValueError as e:
            print(str(e), file=sys.stderr)
            sys.exit(1)

    if dryrun:
        do_dry_run(tasks)
        sys.exit(0)

    queue = get_queue(queue)
    # TODO: switch to JSON for SQS message body
    messages = (dict(Id=str(idx), MessageBody=render_task(tidx))
                for idx, tidx in enumerate(tasks))

    for bunch in toolz.partition_all(bunch_size, messages):
        publish_messages(queue, bunch)
Beispiel #6
0
def post_messages(message_list, queue_name: str) -> dict:
    """
    Publish messages

    :param message_list:(list) list of messages
    :param queue_name: (str) queue to be sens to

    :return:(None)
    """

    count = 0
    messages = []
    error_list = []
    failed = 0
    sent = 0
    queue = get_queue(queue_name=queue_name)

    logging.info("Sending messages")
    for message_dict in message_list:
        try:
            message = {
                "Id": str(count),
                "MessageBody": str(json.dumps(message_dict)),
            }

            messages.append(message)
            count += 1

            # Send 10 messages per time
            if count % 10 == 0:
                publish_messages(queue, messages)
                messages = []
                sent += 10
        except Exception as exc:
            failed += 1
            error_list.append(exc)
            messages = []

    # Post the last messages if there are any
    if len(messages) > 0:
        sent += len(messages)
        publish_messages(queue, messages)

    return {"failed": failed, "sent": sent}
Beispiel #7
0
def test_publish_message_s2_gap_filler(s3_report_path: URL):
    sqs_client = boto3.client("sqs", region_name=REGION)
    sqs_client.create_queue(QueueName=SQS_QUEUE_NAME)

    s3_client = boto3.client("s3", region_name=COGS_REGION)
    s3_client.create_bucket(
        Bucket=TEST_BUCKET_NAME,
        CreateBucketConfiguration={
            "LocationConstraint": COGS_REGION,
        },
    )

    s3_client.upload_file(
        str(LOCAL_REPORT_UPDATE_FILE),
        TEST_BUCKET_NAME,
        str(S3_S2_REPORT_FILE),
    )

    missing_dict = json.loads(open(str(LOCAL_REPORT_UPDATE_FILE), "rb").read())

    files = [
        scene_path.strip() for scene_path in missing_dict["missing"]
        if scene_path
    ]

    [
        s3_client.upload_file(str(FAKE_STAC_FILE_PATH), TEST_BUCKET_NAME,
                              f"{i}/{S2_FAKE_STAC_FILE}")
        for i in range(len(files))
    ]

    with patch.object(s2_gap_filler, "S3_BUCKET_PATH", str(s3_report_path)):
        s2_gap_filler.send_messages(
            limit=None,
            max_workers=1,
            idx=0,
            queue_name=SQS_QUEUE_NAME,
            slack_url=None,
        )

        queue = get_queue(queue_name=SQS_QUEUE_NAME)
        number_of_msgs = queue.attributes.get("ApproximateNumberOfMessages")
        assert int(number_of_msgs) == 8
Beispiel #8
0
    def stream_from_sqs(
        self,
        sqs_queue,
        product: Optional[OutputProduct] = None,
        visibility_timeout: int = 300,
        **kw,
    ) -> Iterator[Task]:
        from odc.aws.queue import get_messages, get_queue
        from ._sqs import SQSWorkToken

        product = self._resolve_product(product)

        if isinstance(sqs_queue, str):
            sqs_queue = get_queue(sqs_queue)

        for msg in get_messages(sqs_queue, visibility_timeout=visibility_timeout, **kw):
            # TODO: switch to JSON for SQS message body
            token = SQSWorkToken(msg, visibility_timeout)
            tidx = parse_task(msg.body)
            yield self.load_task(tidx, product, source=token)
Beispiel #9
0
def test_rdr_sqs(sqs_queue_by_name, test_db_path):
    q = get_queue(sqs_queue_by_name)
    product = OutputProduct.dummy()
    rdr = TaskReader(test_db_path, product)

    for tidx in rdr.all_tiles:
        publish_message(q, render_task(tidx))

    for task in rdr.stream_from_sqs(sqs_queue_by_name,
                                    visibility_timeout=120,
                                    max_wait=0):
        _now = datetime.utcnow()
        assert task.source is not None
        assert task.source.active_seconds < 2
        assert task.source.deadline > _now
        assert task.source.deadline < _now + timedelta(seconds=120 + 10)

        task.source.extend(3600)
        assert task.source.deadline > _now
        assert task.source.deadline < _now + timedelta(seconds=3600 + 10)
        task.source.done()
Beispiel #10
0
    def get_tasks_from_queue(self, queue, limit, queue_timeout):
        """Retrieve messages from the named queue, returning an iterable of (AlchemistTasks, SQS Messages)"""
        alive_queue = get_queue(queue)
        messages = get_messages(alive_queue,
                                limit,
                                visibility_timeout=queue_timeout)

        for message in messages:
            message_body = json.loads(message.body)
            uuid = message_body.get("id", None)
            if uuid is None:
                # This is probably a message created from an SNS, so it's double
                # JSON dumped
                message_body = json.loads(message_body["Message"])
            transform = message_body.get("transform", None)

            if transform and transform != self.transform_name:
                _LOG.error(
                    f"Your transform doesn't match the transform in the message. Ignoring {uuid}"
                )
                continue
            task = self.generate_task_by_uuid(message_body["id"])
            if task:
                yield task, message
def send_messages(
    idx: int,
    queue_name: str,
    max_workers: int = 2,
    limit: int = None,
    slack_url: str = None,
) -> None:
    """
    Publish a list of missing scenes to an specific queue and by the end of that it's able to notify slack the result

    :param limit: (int) optional limit of messages to be read from the report
    :param max_workers: (int) total number of pods used for the task. This number is used to split the number of scenes
    equally among the PODS
    :param idx: (int) sequential index which will be used to define the range of scenes that the POD will work with
    :param queue_name: (str) queue to be sens to
    :param slack_url: (str) Optional slack URL in case of you want to send a slack notification
    """
    log = setup_logging()

    latest_report = find_latest_report(
        report_folder_path=S3_BUCKET_PATH, not_contains="orphaned"
    )

    if "update" in latest_report:
        log.info("FORCED UPDATE FLAGGED!")

    log.info(f"Limited: {int(limit) if limit else 'No limit'}")
    log.info(f"Number of workers: {max_workers}")

    files = read_report_missing_scenes(report_path=latest_report, limit=limit)

    log.info(f"Number of scenes found {len(files)}")
    log.info(f"Example scenes: {files[0:10]}")

    # Split scenes equally among the workers
    split_list_scenes = split_list_equally(
        list_to_split=files, num_inter_lists=int(max_workers)
    )

    # In case of the index being bigger than the number of positions in the array, the extra POD isn' necessary
    if len(split_list_scenes) <= idx:
        log.warning(f"Worker {idx} Skipped!")
        sys.exit(0)

    log.info(f"Executing worker {idx}")
    messages = prepare_message(scene_paths=split_list_scenes[idx], log=log)

    queue = get_queue(queue_name=queue_name)

    batch = []
    failed = 0
    sent = 0
    error_list = []
    for message in messages:
        try:
            batch.append(message)
            if len(batch) == 10:
                publish_messages(queue=queue, messages=batch)
                batch = []
                sent += 10
        except Exception as exc:
            failed += 1
            error_list.append(exc)
            batch = []

    if len(batch) > 0:
        publish_messages(queue=queue, messages=batch)
        sent += len(batch)

    environment = "DEV" if "dev" in queue_name else "PDS"
    error_flag = ":red_circle:" if failed > 0 else ""

    message = dedent(
        f"{error_flag}*Sentinel 2 GAP Filler - {environment}*\n"
        f"Sent Messages: {sent}\n"
        f"Failed Messages: {failed}\n"
    )
    if slack_url is not None:
        send_slack_notification(slack_url, "S2 Gap Filler", message)

    log.info(message)

    if failed > 0:
        sys.exit(1)
Beispiel #12
0
def test_s2_gap_filler_cli(s3_report_path: URL):
    """
    Test for random numbers of limits (between 1-10) for a random numbers of workers workers (between 1-30).
    """
    sqs_client = boto3.client("sqs", region_name=REGION)
    sqs_client.create_queue(QueueName=SQS_QUEUE_NAME)

    s3_client = boto3.client("s3", region_name=COGS_REGION)
    s3_client.create_bucket(
        Bucket=TEST_BUCKET_NAME,
        CreateBucketConfiguration={
            "LocationConstraint": COGS_REGION,
        },
    )

    s3_client.upload_file(
        str(LOCAL_REPORT_UPDATE_FILE),
        TEST_BUCKET_NAME,
        str(S3_S2_REPORT_FILE),
    )

    missing_dict = json.loads(open(str(LOCAL_REPORT_UPDATE_FILE), "rb").read())

    files = [
        scene_path.strip() for scene_path in missing_dict["missing"]
        if scene_path
    ]

    [
        s3_client.upload_file(str(FAKE_STAC_FILE_PATH), TEST_BUCKET_NAME,
                              f"{i}/{S2_FAKE_STAC_FILE}")
        for i in range(len(files))
    ]

    with patch.object(s2_gap_filler, "S3_BUCKET_PATH", str(s3_report_path)):
        runner = CliRunner()
        max_workers = randrange(1, 6)
        max_limit = randrange(1, 10)
        for limit in range(max_limit):
            for idx in range(max_workers):
                runner.invoke(
                    s2_gap_filler.cli,
                    [
                        str(idx),
                        str(max_workers),
                        str(SQS_QUEUE_NAME),
                        "--limit",
                        str(limit),
                    ],
                )

            queue = get_queue(queue_name=SQS_QUEUE_NAME)
            number_of_msgs = queue.attributes.get(
                "ApproximateNumberOfMessages")

            # total of messages sent won't be bigger than 8 so even with more workers and
            # higher limits the process must send a max of 8 messages len(files) == 8

            # if limit is 0 it returns error
            if limit == 0:
                assert int(number_of_msgs) == 0

            # if limit bigger than 0 and smaller than the number max of messages
            elif limit < len(files):
                assert int(number_of_msgs) == limit

            # if limit bigger than 8
            elif limit >= len(files):
                assert int(number_of_msgs) == len(files)

            sqs_client.purge_queue(QueueUrl=queue.url)

        print(f"max_limit {max_limit} - max_workers {max_workers}")
Beispiel #13
0
def test_redrive_to_queue_cli(aws_env):
    resource = boto3.resource("sqs")

    dead_queue = resource.create_queue(QueueName=DEAD_QUEUE_NAME)
    alive_queue = resource.create_queue(
        QueueName=ALIVE_QUEUE_NAME,
        Attributes={
            "RedrivePolicy":
            json.dumps({
                "deadLetterTargetArn":
                dead_queue.attributes.get("QueueArn"),
                "maxReceiveCount":
                2,
            }),
        },
    )

    for i in range(35):
        dead_queue.send_message(
            MessageBody=json.dumps({"content": f"Something {i}"}))

    # Invalid value string
    returned = CliRunner().invoke(
        redrive_to_queue.cli,
        [
            str(DEAD_QUEUE_NAME),
            str(ALIVE_QUEUE_NAME), "--limit", "string_test"
        ],
    )

    assert returned.exit_code == 1

    # Invalid value 0
    returned = CliRunner().invoke(
        redrive_to_queue.cli,
        [str(DEAD_QUEUE_NAME),
         str(ALIVE_QUEUE_NAME), "--limit", 0],
    )

    assert returned.exit_code == 1

    # Valid value 1
    returned = CliRunner().invoke(
        redrive_to_queue.cli,
        [str(DEAD_QUEUE_NAME),
         str(ALIVE_QUEUE_NAME), "--limit", 1],
    )

    assert returned.exit_code == 0
    assert int(
        get_queue(ALIVE_QUEUE_NAME).attributes.get(
            'ApproximateNumberOfMessages')) == 1

    # Valid value None (all)
    returned = CliRunner().invoke(
        redrive_to_queue.cli,
        [str(DEAD_QUEUE_NAME),
         str(ALIVE_QUEUE_NAME), "--limit", None],
    )

    assert returned.exit_code == 0
    assert int(
        get_queue(DEAD_QUEUE_NAME).attributes.get(
            'ApproximateNumberOfMessages')) == 0