def main(config, var, output_path, dry_run): base_path = os.path.dirname(config.name) config = yaml.load(config) template_vars = dict(var) connection = Connection() cluster_name = config['cluster_name'] services = config['services'] logger.info("Starting deploy on cluster %s (%s services)", cluster_name, len(services)) # Generate the task definitions task_definitions = generate_task_definitions(config, template_vars, base_path, output_path) # Check if all task definitions required by the services exists for service_name, service in services.items(): if service['task_definition'] not in task_definitions: logger.error("Missing task definition %r for service %r", service['task_definition'], service_name) # Run the deployment if not dry_run: try: start_deployment(config, connection, task_definitions) except DeploymentFailed: sys.exit(1) sys.exit(0)
def run( filename: str, template_vars: typing.Dict[str, str], role_arn: typing.Optional[str] = None, output_path: typing.Optional[str] = None, create_missing_services=False, dry_run=False, ): base_path = os.path.dirname(filename) with open(filename, "r") as fh: config = yaml.safe_load(fh.read()) connection = Connection(role_arn) cluster_name = config["cluster_name"] services = config["services"] logger.info("Starting deploy on cluster %s (%s services)", cluster_name, len(services)) # Generate the task definitions task_definitions = generate_task_definitions(config, template_vars, base_path, output_path) # Check if all task definitions required by the services exists for service_name, service in services.items(): if service["task_definition"] not in task_definitions: logger.error( "Missing task definition %r for service %r", service["task_definition"], service_name, ) # Run the deployment if not dry_run: start_deployment(config, connection, task_definitions, create_missing_services)
def run_tasks(connection, cluster_name, task_definitions, tasks) -> None: """Run one-off tasks. :parameter connection: The internal connection object. :type connection: Connection :parameter cluster_name: The cluster name to run the task on :type cluster_name: str :parameter task_definitions: dict of task definitions. :type task_definitions: dict :parameter tasks: list of tasks to run. :type tasks: list """ num = 0 for task in tasks: task_def = task_definitions[task["task_definition"]] logger.info( "Starting one-off task '%s' via %s (%s)", task["command"], task_def.name, task["container"], ) response = connection.ecs.run_task( cluster=cluster_name, taskDefinition=task_def.name, overrides={ "containerOverrides": [{ "name": task["container"], "command": task["command"].split() }] }, startedBy="ecs-deplojo", count=1, ) if response.get("failures"): logger.error("Error starting one-off task: %r", response["failures"]) # If we already started one task then we keep retrying until # the previous task is finished. if num > 0 and num <= 30: time.sleep(5) else: sys.exit(1) num += 1
def run_tasks(connection, cluster_name, task_definitions, tasks): """Run one-off tasks. :parameter connection: The internal connection object. :type connection: Connection :parameter cluster_name: The cluster name to run the task on :type cluster_name: str :parameter task_definitions: dict of task definitions. :type task_definitions: dict :parameter tasks: list of tasks to run. :type tasks: list """ num = 0 for task in tasks: task_def = task_definitions[task['task_definition']] logger.info("Starting one-off task '%s' via %s (%s)", task['command'], task_def['name'], task['container']) response = connection.ecs.run_task(cluster=cluster_name, taskDefinition=task_def['name'], overrides={ 'containerOverrides': [{ 'name': task['container'], 'command': task['command'].split(), }] }, startedBy='ecs-deplojo', count=1) if response.get('failures'): logger.error("Error starting one-off task: %r", response['failures']) # If we already started one task then we keep retrying until # the previous task is finished. if num > 0 and num <= 30: time.sleep(5) else: sys.exit(1) num += 1
def wait_for_deployments(connection, cluster_name, service_names): """Poll ECS until all deployments are finished (status = PRIMARY) """ logger.info("Waiting for deployments") start_time = time.time() def service_description(service): """Return string in format of 'name (0/2)'""" name = service['serviceName'] for deployment in service['deployments']: if deployment.get('status') != 'PRIMARY': continue desired = deployment['desiredCount'] pending = deployment['pendingCount'] running = deployment['runningCount'] return '%s (%s/%s)' % (name, pending + running, desired) return name # Wait till all service updates are deployed time.sleep(POLL_TIME) while True: services = utils.describe_services(connection.ecs, cluster=cluster_name, services=service_names) in_progress = [s for s in services if len(s['deployments']) > 1] if in_progress: logger.info( "Waiting for services: %s", ', '.join([service_description(s) for s in in_progress])) else: logger.info("Deployment finished: %s", ', '.join([service_description(s) for s in services])) break time.sleep(5) if time.time() - start_time > (60 * 15): logger.error("Giving up after 15 minutes") return False return True
def wait_for_deployments(connection: Connection, cluster_name: str, service_names: typing.List[str]) -> bool: """Poll ECS until all deployments are finished (status = PRIMARY)""" logger.info("Waiting for deployments") start_time = time.time() def service_description(service): """Return string in format of 'name (0/2)'""" name = service["serviceName"] for deployment in service["deployments"]: if deployment.get("status") != "PRIMARY": continue desired = deployment["desiredCount"] pending = deployment["pendingCount"] running = deployment["runningCount"] return "%s (%s/%s)" % (name, pending + running, desired) return name # Wait till all service updates are deployed time.sleep(5) utc_timestamp = datetime.datetime.utcnow().replace( tzinfo=pytz.utc) - datetime.timedelta(seconds=5) last_event_timestamps = {name: utc_timestamp for name in service_names} logged_message_ids: typing.Set[str] = set() ready_timestamp = None last_message = datetime.datetime.now() while True: services = utils.describe_services(connection.ecs, cluster=cluster_name, services=service_names) in_progress = [s for s in services if len(s["deployments"]) > 1] messages = extract_new_event_messages(services, last_event_timestamps, logged_message_ids) for message in messages: logger.info("%s - %s", message["createdAt"].strftime("%H:%M:%S"), message["message"]) last_message = datetime.datetime.now() # 5 Seconds after the deployment is no longer in progress we mark it # as done. offset = datetime.datetime.utcnow() - datetime.timedelta(seconds=5) if ready_timestamp and offset > ready_timestamp: logger.info( "Deployment finished: %s", ", ".join([service_description(s) for s in services]), ) break # Set is_ready after the previous check so that we can wait for x # more seconds before ending the operation successfully. if not in_progress: ready_timestamp = datetime.datetime.utcnow() # So we haven't printed something for a while, let's give some feedback elif last_message < datetime.datetime.now() - datetime.timedelta( seconds=10): logger.info( "Still waiting for: %s", ", ".join([s["serviceName"] for s in in_progress]), ) time.sleep(5) if time.time() - start_time > (60 * 15): logger.error("Giving up after 15 minutes") return False return True