def tasks_are_healthy(ecs_client, cluster_name, service_name): next_token = '' healthy = 0 while True: task_response = ecs_client.list_tasks(cluster=cluster_name, serviceName=service_name, nextToken=next_token, maxResults=100) tasks = task_response.get('taskArns') next_token = task_response.get('nextToken') for task in ecs_client.describe_tasks(cluster=cluster_name, tasks=tasks).get('tasks'): task_arn = task.get('taskArn') status = task.get('healthStatus') if status != 'HEALTHY': utils.print_warning(f'task {task_arn} status: {status}') return False healthy += 1 if not next_token: break utils.print_info(f'{service_name} {healthy} tasks are healthy') return True
def delete_param(name, region): """Remove SSM parameter.""" ssm = boto3.client('ssm', region) try: utils.print_info(json.dumps(ssm.delete_parameter(Name=name))) except botocore.exceptions.ClientError as e: if (e.response['Error']['Code'] == 'ParameterNotFound'): utils.print_error(f'Cannot find {name}') sys.exit(1) raise e
def poll_cluster_state(ecs_client, cluster_name, service_names, polling_timeout, stale_s=None): """ Poll services in an ECS cluster for service stability """ utils.print_info( f'Polling cluster services: {service_names} in cluster: {cluster_name} with timeout: {polling_timeout}s' ) start_time = time.time() services = service_names.copy() is_2019_arn_format = services[0].startswith(f'{cluster_name}/') last_response = [] while services: time.sleep(SLEEP_TIME_S) elapsed = time.time() - start_time if elapsed > polling_timeout: print_events(last_response) raise TimeoutException( f'Polling timed out! Check {service_names} status.') response = ecs_client.describe_services(cluster=cluster_name, services=services) last_response = response if not response.get('services'): utils.print_warning( 'describe_services got an empty services response') continue for service_response in response.get('services'): if stale_s: # check that the service has started to change based on events if not has_recent_event(service_response, start_time, stale_s): continue service_name = service_response.get('serviceName') is_active = service_response.get('desiredCount') > 0 if service_is_stable(service_response): # only check services that are active (desiredCount > 0) if is_active and not tasks_are_healthy( ecs_client, cluster_name, service_name): utils.print_warning( f'{service_name} tasks are still not healthy') continue if is_2019_arn_format: services.remove(f'{cluster_name}/{service_name}') else: services.remove(service_name) elapsed = int(time.time() - start_time) utils.print_success( f'{service_name} tasks are healthy. Elapsed: {elapsed}s')
def encrypt(data, alias, context, region): """generates a kms encrypted data blob""" if isinstance(data, str): plaintext = str.encode(data, 'ascii') else: plaintext = data client = boto3.client('kms', region) key_id = get_kms_key_id(alias, region) kms_encryption = client.encrypt(KeyId=key_id, Plaintext=plaintext, EncryptionContext=context) utils.print_info( f'Encryption using keyId {key_id} with context: {context}') return base64.b64encode(kms_encryption['CiphertextBlob']).decode('ascii')
def get_already_updated_instances(ecs_response, ami_id): instances = [] for container_instance in ecs_response.get('containerInstances'): instance_id = container_instance.get('ec2InstanceId') status = container_instance.get('status') if status == 'DRAINING': # unexpected but we should proceed with terminating it # because we already verified that the services were in a steady # state. utils.print_warning(f'{instance_id} was already draining') continue this_ami_id = get_ami_id(container_instance) utils.print_info(f'Instance to drain: {instance_id}/{this_ami_id}') if this_ami_id == ami_id: utils.print_warning( f'{instance_id} already uses ami_id {ami_id}. Skipping.') instances.append(instance_id) return instances
def poll_deployment_state(ecs_client, cluster_name, service_name, polling_timeout, stale_s=None): """ Poll service in an ECS cluster for a complete deployment. """ utils.print_info( f'Polling for deploy state service: {service_name} in cluster: {cluster_name}' ) start_time = time.time() last_response = [] while True: time.sleep(SLEEP_TIME_S) if (time.time() - start_time) > polling_timeout: if last_response: print_events(last_response) raise TimeoutException( f'Polling timed out! Check {service_name} status.') response = ecs_client.describe_services(cluster=cluster_name, services=[service_name]) last_response = response if not response.get('services'): utils.print_warning( 'describe_services got an empty services response') continue service_response = response.get('services')[0] deployments = service_response.get('deployments') if deployment_is_stable(deployments[0], start_time, stale_s): # double check that tasks are healthy if not tasks_are_healthy(ecs_client, cluster_name, service_name): utils.print_warning( f'{service_name} tasks are still not healthy') continue elapsed = int(time.time() - start_time) utils.print_success( f'{service_name} deploy is complete. Elapsed: {elapsed}s') break
def put_param(name, value, region, kms_key_alias=None, overwrite=False, plaintext=True): """Store the name and value""" ssm = boto3.client('ssm', region) try: if kms_key_alias: kms_key = kms.get_kms_key_id(kms_key_alias, region) if not kms_key: raise ParamException( f'No key found for alias {kms_key_alias} {region}') result = ssm.put_parameter(Name=name, Description=name, Value=value, Type='SecureString', KeyId=kms_key, Overwrite=overwrite) else: utils.print_warning('Creating without encryption') result = ssm.put_parameter(Name=name, Description=name, Value=value, Type='String', Overwrite=overwrite) utils.print_info(json.dumps(result)) except botocore.exceptions.ClientError as e: if (e.response['Error']['Code'] == 'ParameterAlreadyExists'): utils.print_error( f'setting "{name}" already exists, use -f to overwrite.') sys.exit(1) raise e
def rolling_replace_instances(ecs, ec2, cluster_name, batches, ami_id, force, drain_timeout_s): replace_start_time = time.time() services = get_services(ecs, cluster_name) if not services: raise RollingException('No services found in cluster. exiting.') utils.print_info( f'Checking cluster {cluster_name}, services {str(services)} are stable' ) ecs_utils.poll_cluster_state( ecs, cluster_name, services, polling_timeout=120 ) instances = get_container_instance_arns(ecs, cluster_name) # batches determines the number of instances you want to replace at once. # Choose conservatively, as this process temporarily reduces your capacity. # But note each batch can be time consuming (up to 10m per batch) batch_count = math.ceil(len(instances) / batches) utils.print_info(f'You have {len(instances)} instances.') utils.print_info(f'Terminating in batches of {batch_count}') if len(instances) <= batch_count: utils.print_warning( f'Terminating {batch_count} instances will cause downtime.' ) if not force: raise RollingException('Quitting, use --force to over-ride.') instance_batches = batch_instances(instances, batch_count) for to_drain in instance_batches: if len(to_drain) > 100: utils.print_error('Batch size exceeded 100, try using more batches.') raise RollingException( f'Quitting, batch size exceeded 100: {batch_count}.' ) response = ecs.describe_container_instances( cluster=cluster_name, containerInstances=to_drain) if not response.get('containerInstances'): raise RollingException('No containerInstances found.') # don't drain or teriminate any instances that are already up to date # (if the user provided the --ami-id flag) done_instances = get_already_updated_instances(response, ami_id) if len(done_instances) == len(to_drain): # move on if the whole batch is already up to date continue # drain instances in this batch ecs.update_container_instances_state(cluster=cluster_name, status='DRAINING', containerInstances=to_drain) utils.print_info(f'Wait for drain to complete with {drain_timeout_s}s timeout...') start_time = time.time() while len(done_instances) < len(to_drain): if (time.time() - start_time) > drain_timeout_s: raise RollingTimeoutException('Waiting for instance to complete draining. Giving up.') time.sleep(SLEEP_TIME_S) response = ecs.describe_container_instances( cluster=cluster_name, containerInstances=to_drain) for container_instance in response.get('containerInstances'): instance_id = container_instance.get('ec2InstanceId') running_tasks = container_instance.get('runningTasksCount') if running_tasks > 0: PRINT_PROGRESS() continue if instance_id not in done_instances: utils.print_info(f'{instance_id} is drained, terminate!') ec2.terminate_instances(InstanceIds=[instance_id]) done_instances.append(instance_id) # new instance will take as much as 10m to go into service # then we wait for ECS to resume a steady state before moving on ecs_utils.poll_cluster_state(ecs, cluster_name, services, polling_timeout=drain_timeout_s) utils.print_success(f'EC2 instance replacement process complete! {int(time.time() - replace_start_time)}s elapsed')
for blockuid in blocks: block = blocks[blockuid] if block["@type"] == "image": if "@@images" in block["url"]: if block["url"].split("/")[-1] == "large": block["size"] = "l" block["url"] = block["url"].split("/@@images")[0] return blocks if __name__ == "__main__": pc = api.portal.get_tool("portal_catalog") for brain in pc.unrestrictedSearchResults( object_provides=IBlocks.__identifier__, path=PATH): try: obj = brain.getObject() except KeyError: obj = None if obj: blocks = obj.blocks utils.print_info(f"Processing: {obj.absolute_url()}") # Search for any image block and replaces scales blocks = remove_image_scales(blocks) obj.blocks = blocks transaction.commit()