예제 #1
0
 def test_poll_cluster_matching_cluster_and_service_name(self, mock_boto):
     mock_client = mock_boto.return_value
     mock_client.describe_services.return_value = GOOD_SERVICE
     mock_client.list_tasks.return_value = TASKS
     mock_client.describe_tasks.return_value = GOOD_TASKS
     ecs_utils.poll_cluster_state(mock_client, 'service-foo',
                                  ['service-foo'], POLL_S)
예제 #2
0
 def test_poll_cluster_new_arn(self, mock_boto):
     mock_client = mock_boto.return_value
     mock_client.describe_services.return_value = GOOD_SERVICE
     mock_client.list_tasks.return_value = TASKS
     mock_client.describe_tasks.return_value = GOOD_TASKS
     ecs_utils.poll_cluster_state(mock_client, 'cluster-foo',
                                  ['cluster-foo/service-foo'], POLL_S)
예제 #3
0
 def test_poll_cluster_bad_tasks(self, mock_boto, mock_print_events):
     mock_client = mock_boto.return_value
     mock_client.describe_services.return_value = GOOD_SERVICE
     mock_client.list_tasks.return_value = TASKS
     mock_client.describe_tasks.return_value = BAD_TASKS
     with self.assertRaises(ecs_utils.TimeoutException):
         ecs_utils.poll_cluster_state(mock_client, 'cluster-foo', ['service-foo'], POLL_S)
예제 #4
0
 def test_poll_cluster_with_inactive_service(self, mock_boto):
     mock_client = mock_boto.return_value
     mock_client.describe_services.return_value = INACTIVE_SERVICE
     mock_client.list_tasks.return_value = EMPTY_TASKS
     mock_client.describe_tasks.side_effect = Exception(
         'Tasks cannot be empty.')
     ecs_utils.poll_cluster_state(mock_client, 'cluster-foo',
                                  ['service-foo'], POLL_S)
예제 #5
0
def rolling_replace_instances(ecs, ec2, cluster_name, batches, ami_id, force, drain_timeout_s):

    replace_start_time = time.time()
    services = get_services(ecs, cluster_name)
    if not services:
        raise RollingException('No services found in cluster. exiting.')
    utils.print_info(
        f'Checking cluster {cluster_name}, services {str(services)} are stable'
    )
    ecs_utils.poll_cluster_state(
        ecs, cluster_name, services, polling_timeout=120
    )
    instances = get_container_instance_arns(ecs, cluster_name)
    # batches determines the number of instances you want to replace at once.
    # Choose conservatively, as this process temporarily reduces your capacity.
    # But note each batch can be time consuming (up to 10m per batch)

    batch_count = math.ceil(len(instances) / batches)
    utils.print_info(f'You have {len(instances)} instances.')
    utils.print_info(f'Terminating in batches of {batch_count}')
    if len(instances) <= batch_count:
        utils.print_warning(
            f'Terminating {batch_count} instances will cause downtime.'
        )
        if not force:
            raise RollingException('Quitting, use --force to over-ride.')
    instance_batches = batch_instances(instances, batch_count)
    for to_drain in instance_batches:
        if len(to_drain) > 100:
            utils.print_error('Batch size exceeded 100, try using more batches.')
            raise RollingException(
                f'Quitting, batch size exceeded 100: {batch_count}.'
            )
        response = ecs.describe_container_instances(
            cluster=cluster_name, containerInstances=to_drain)

        if not response.get('containerInstances'):
            raise RollingException('No containerInstances found.')

        # don't drain or teriminate any instances that are already up to date
        # (if the user provided the --ami-id flag)
        done_instances = get_already_updated_instances(response, ami_id)
        if len(done_instances) == len(to_drain):
            # move on if the whole batch is already up to date
            continue

        # drain instances in this batch
        ecs.update_container_instances_state(cluster=cluster_name,
                                             status='DRAINING',
                                             containerInstances=to_drain)
        utils.print_info(f'Wait for drain to complete with {drain_timeout_s}s timeout...')
        start_time = time.time()
        while len(done_instances) < len(to_drain):
            if (time.time() - start_time) > drain_timeout_s:
                raise RollingTimeoutException('Waiting for instance to complete draining. Giving up.')
            time.sleep(SLEEP_TIME_S)
            response = ecs.describe_container_instances(
                cluster=cluster_name, containerInstances=to_drain)
            for container_instance in response.get('containerInstances'):
                instance_id = container_instance.get('ec2InstanceId')
                running_tasks = container_instance.get('runningTasksCount')
                if running_tasks > 0:
                    PRINT_PROGRESS()
                    continue
                if instance_id not in done_instances:
                    utils.print_info(f'{instance_id} is drained, terminate!')
                    ec2.terminate_instances(InstanceIds=[instance_id])
                    done_instances.append(instance_id)
        # new instance will take as much as 10m to go into service
        # then we wait for ECS to resume a steady state before moving on
        ecs_utils.poll_cluster_state(ecs, cluster_name,
                                     services, polling_timeout=drain_timeout_s)
    utils.print_success(f'EC2 instance replacement process complete! {int(time.time() - replace_start_time)}s elapsed')