Esempio n. 1
0
def handle_logs(tasks: list, clean_stale_log_groups: bool = False) -> None:
    # Prepare log groups
    defined_log_group_names = _get_defined_log_group_names(tasks)
    existing_log_group_names = _get_existing_log_group_names()

    # Create missing log groups
    missing_log_group_names = defined_log_group_names.difference(
        existing_log_group_names)
    _create_missing_log_groups(missing_log_group_names)

    # Set retention policy to 7 days.
    retention_days = 7  # TODO: Should be configurable
    for log_group_name in defined_log_group_names:
        logger.info('Setting retention days to %s for log group: %s ',
                    retention_days, log_group_name)
        response = logs_client.put_retention_policy(
            logGroupName=log_group_name, retentionInDays=retention_days)
        logger.debug('Set retetion days to %s. Response: %s', retention_days,
                     response)

    # Clean stale log groups.
    if clean_stale_log_groups:
        stale_log_group_names = existing_log_group_names.difference(
            defined_log_group_names)
        _clean_stale_log_groups(stale_log_group_names)
Esempio n. 2
0
def _register_scalable_target(scale_dict: dict, resource_id: str) -> None:
    logger.info('Registering service as a scalable target: %s', resource_id)
    response = scaling_client.register_scalable_target(
            ServiceNamespace='ecs',
            ResourceId=resource_id,
            ScalableDimension='ecs:service:DesiredCount',
            MinCapacity=scale_dict['MinCapacity'],
            MaxCapacity=scale_dict['MaxCapacity'],
    )
    logger.debug('Registered service as a scalable target details: %s', response)
Esempio n. 3
0
def _deregister_scalable_target(resource_id: str) -> None:
    try:
        response = scaling_client.deregister_scalable_target(
                ServiceNamespace='ecs',
                ResourceId=resource_id,
                ScalableDimension='ecs:service:DesiredCount',
        )
        logger.info('Deregistered service as a scalable target: %s', resource_id)
        logger.debug('Service deregistration response: %s', response)
    except ClientError as e:
        if e.response['Error']['Code'] == 'ObjectNotFoundException':
            logger.debug('No need to deregister..')
Esempio n. 4
0
def _clean_stale_policies(services: list, existing_policies: list) -> None:
    for existing_policy_dict in existing_policies:
        if not _is_stale_policy(existing_policy_dict, services):
            continue

        logger.info('Removing state policy: %s', existing_policy_dict['PolicyName'])
        response = scaling_client.delete_scaling_policy(
                PolicyName=existing_policy_dict['PolicyName'],
                ServiceNamespace='ecs',
                ResourceId=existing_policy_dict['ResourceId'],
                ScalableDimension='ecs:service:DesiredCount'
        )
        logger.debug('Removed stale policy details: %s', response)
Esempio n. 5
0
def _update_services(services: list) -> None:
    for service_dict in services:
        logger.info('Updating service: %s', service_dict['serviceName'])

        # If a task revision is not specified, the latest ACTIVE revision is used.
        response = ecs_client.update_service(
            cluster=service_dict['cluster'],
            service=service_dict['serviceName'],
            desiredCount=service_dict['desiredCount'],
            taskDefinition=service_dict['taskDefinition'],
            deploymentConfiguration=service_dict.get('deploymentConfiguration',
                                                     {}),
            forceNewDeployment=True,
        )
        logger.debug('Updated service details: %s', response)
Esempio n. 6
0
def _create_missing_metrics(missing_metrics: list) -> None:
    for missing_metric in missing_metrics:
        logger.info('Creating metric: %s', missing_metric['MetricStat']['Metric']['MetricName'])
        response = cloudwatch_client.put_metric_data(
            Namespace=missing_metric['MetricStat']['Metric']['Namespace'],
            MetricData=[
                {
                    'MetricName': missing_metric['MetricStat']['Metric']['MetricName'],
                    'Timestamp': datetime.utcnow(),
                    'Value': 0,
                    'Unit': missing_metric['MetricStat']['Unit']
                },
            ]
        )
        logger.debug('Created metric details: %s', response)
Esempio n. 7
0
def _clean_stale_alarms(existing_alarms: list, services: list) -> None:
    # Extract alarm name definitions to a list.
    defined_alarm_names = [sd['alarm']['AlarmName'] for sd in services]

    # If an alarm exist on Cloudwatch but is not defined now, it is an stale alarm, anymore.
    stale_alarm_names = [
        ea['AlarmName'] for ea in existing_alarms
        if ea['AlarmName'] not in defined_alarm_names
    ]

    # Batch delete for stale alarms.
    if stale_alarm_names:
        logger.info('Deleting stale alarms: %s', stale_alarm_names)
        response = cloudwatch_client.delete_alarms(
            AlarmNames=stale_alarm_names)
        logger.debug('Deleted stale alarms details: %s', response)
Esempio n. 8
0
def clean_stale_tasks() -> None:
    """
    Clean stale tasks. Leave only active tasks those are used by services.
    """
    service_task_definitions = _retrieve_service_task_definitions()

    all_task_definitions = []  # type: List[Dict[str, Any]]
    next_token = None
    while True:
        if next_token:
            resp = ecs_client.list_task_definitions(status='ACTIVE', maxResults=100, nextToken=next_token)
        else:
            resp = ecs_client.list_task_definitions(status='ACTIVE', maxResults=100)

        all_task_definitions.extend(resp['taskDefinitionArns'])

        next_token = resp.get('nextToken')

        # All task definitions are loaded.
        if not next_token:
            break

    for task_definition in all_task_definitions:
        if task_definition not in service_task_definitions:
            logger.info('Deregistering task definition %s', task_definition)
            slept = 0
            while True:
                try:
                    response = ecs_client.deregister_task_definition(taskDefinition=task_definition)
                    logger.debug('Deregistered stale task: %s', response)
                except ClientError as e:
                    if e.response['Error']['Code'] == 'ThrottlingException':
                        logger.info('Request is throttled. Waiting...')
                        time.sleep(5)
                        slept += 5
                else:
                    break

                # Give up trying after 20 seconds.
                if slept >= 20:
                    break

    logger.info('Cleaned all stale tasks.')
Esempio n. 9
0
def _delete_log_streams(existing_logs: list, failed_container_ids: list,
                        days_ago: int) -> None:
    """
    Delete log streams that are older than given days_ago and their container are not running anymore.
    """
    deleted_count = 0
    for existing_log in existing_logs:
        existing_log_streams = existing_log['log_streams']
        existing_log_group_name = existing_log['log_group_name']
        for existing_log_stream in existing_log_streams:
            # Example log stream name: 'ecs/container-foo/XXXXXXX-YYYY-WWWW-ZZZZ-XXXXXXXX'
            container_id = existing_log_stream['logStreamName'].split('/')[-1]
            if container_id not in failed_container_ids:
                continue

            last_event_time = existing_log_stream.get(
                'lastEventTimestamp', existing_log_stream.get('creationTime'))
            if not last_event_time:
                logger.warning(
                    'Neither creation time nor last event time is known! %s',
                    existing_log_stream)
                continue

            # AWS returns timestamp in milliseconds.
            last_event_datetime = datetime.fromtimestamp(last_event_time /
                                                         1000)
            if datetime.utcnow() - timedelta(
                    days=days_ago) < last_event_datetime:
                continue

            logger.info(
                'Deleting log stream: %s of log group %s. Last event time: %s',
                existing_log_stream['logStreamName'], existing_log_group_name,
                last_event_datetime)

            response = logs_client.delete_log_stream(
                logGroupName=existing_log_group_name,
                logStreamName=existing_log_stream['logStreamName'],
            )
            deleted_count += 1
            logger.debug('Deleted log stream: %s', response)

    logger.info('Deleted %s stale log streams.', deleted_count)
Esempio n. 10
0
def _create_missing_services(services: list) -> None:
    # Get existing services
    existing_services = _get_existing_services(services)

    # Find missing services.
    missing_services = [
        s for s in services if s['serviceName'] not in existing_services
    ]

    # Create missing services.
    for service_dict in missing_services:
        logger.info('Creating service: %s', service_dict['serviceName'])
        params = {
            'cluster':
            service_dict['cluster'],
            'serviceName':
            service_dict['serviceName'],
            'taskDefinition':
            service_dict['taskDefinition'],
            'desiredCount':
            service_dict['desiredCount'],
            'launchType':
            service_dict['launchType'],
            'schedulingStrategy':
            service_dict['schedulingStrategy'],
            'deploymentController':
            service_dict['deploymentController'],
            'loadBalancers':
            service_dict.get('loadBalancers', []),
            'placementConstraints':
            service_dict.get('placementConstraints', []),
            'placementStrategy':
            service_dict.get('placementStrategy', []),
            'deploymentConfiguration':
            service_dict.get('deploymentConfiguration', {}),
        }
        if service_dict.get('healthCheckGracePeriodSeconds'):
            params['healthCheckGracePeriodSeconds'] = service_dict.get(
                'healthCheckGracePeriodSeconds')

        response = ecs_client.create_service(**params)
        logger.debug('Created service details: %s', response)
Esempio n. 11
0
def _register_task_definitions(tasks: list) -> None:
    for task_dict in tasks:
        # Create container definitions.
        container_definitions = []
        for container_definition in task_dict['containerDefinitions']:
            d = {
                'name': container_definition['name'],
                'image': container_definition['image'],
                'logConfiguration': container_definition['logConfiguration'],
                'memoryReservation': container_definition['memoryReservation'],
                'cpu': container_definition.get('cpu', 0),
                'entryPoint': container_definition.get('entryPoint', []),
                'command': container_definition.get('command', []),
                'environment': container_definition.get('environment', []),
                'portMappings': container_definition.get('portMappings', []),
                'ulimits': container_definition.get('ulimits', []),
                'mountPoints': container_definition.get('mountPoints', []),
                'links': container_definition.get('links', []),
            }
            if container_definition.get('memory'):
                if container_definition['memory'] < container_definition['memoryReservation']:
                    logger.error('memory must be equal or bigger than memoryReservation')
                    raise AeropressException()

                d['memory'] = container_definition['memory']

            container_definitions.append(d)

        logger.info('Creating task definition: %s', task_dict['family'])
        response = ecs_client.register_task_definition(
            family=task_dict['family'],
            taskRoleArn=task_dict['taskRoleArn'],
            executionRoleArn=task_dict['executionRoleArn'],
            networkMode=task_dict['networkMode'],
            containerDefinitions=container_definitions,
            requiresCompatibilities=task_dict['requiresCompatibilities'],
            volumes=task_dict.get('volumes', []),
        )
        logger.debug('Created task definition details: %s', response)
Esempio n. 12
0
def _create_or_update_all_policies(services: list, existing_policies: list) -> None:
    for service_dict in services:
        resource_id = 'service/' + service_dict['cluster'] + '/' + service_dict['serviceName']

        if not service_dict.get('scale'):
            continue

        if not service_dict['scale'].get('policies'):
            continue

        # Create or update the policies.
        for policy_dict in service_dict['scale']['policies']:
            logger.info('Crating scaling policy: %s for %s', policy_dict['PolicyName'], resource_id)
            response = scaling_client.put_scaling_policy(
                    PolicyName=policy_dict['PolicyName'],
                    PolicyType=policy_dict['PolicyType'],
                    ServiceNamespace='ecs',
                    ResourceId=resource_id,
                    ScalableDimension=policy_dict['ScalableDimension'],
                    StepScalingPolicyConfiguration=policy_dict['StepScalingPolicyConfiguration'],
            )
            logger.debug('Created scaling policy details: %s', response)
Esempio n. 13
0
def create(cluster_name: str) -> None:
    logger.info('Creating cluster %s', cluster_name)
    response = ecs_client.create_cluster(clusterName=cluster_name)
    logger.debug('Created cluster details: %s', response)
Esempio n. 14
0
def _clean_stale_log_groups(stale_log_group_names: set) -> None:
    for stale_log_group_name in stale_log_group_names:
        logger.info('Cleaning stale log group: %s', stale_log_group_name)
        response = logs_client.delete_log_group(
            logGroupName=stale_log_group_name)
        logger.debug('Clean stale log group details: %s', response)
Esempio n. 15
0
def _create_missing_log_groups(missing_log_group_names: set) -> None:
    for missing_log_group_name in missing_log_group_names:
        logger.info('Creating log group: %s', missing_log_group_name)
        response = logs_client.create_log_group(
            logGroupName=missing_log_group_name)
        logger.debug('Created log group details: %s', response)