def stop(args): # 1) Set resource limits on compute fleet to min/max/desired = 0/0/0 # 2) Stop master server print('Stopping: %s' % args.cluster_name) stack_name = ('cfncluster-' + args.cluster_name) config = cfnconfig.CfnClusterConfig(args) # Set Resource limits asg = get_asg(stack_name=stack_name, config=config) set_asg_limits(asg=asg, min=0, max=0, desired=0) # Stop master Server master_server_id = get_master_server_id(stack_name, config) ec2conn = boto.ec2.connect_to_region(config.region,aws_access_key_id=config.aws_access_key_id, aws_secret_access_key=config.aws_secret_access_key) try: response = ec2conn.stop_instances(master_server_id) except boto.exception.BotoServerError as e: if e.message.endswith("does not exist"): print e.message sys.stdout.flush() sys.exit(0) else: raise e except KeyboardInterrupt: print('\nExiting...') sys.exit(0) # Poll for status poll_master_server_state(stack_name, config)
def start(args): # 1) Start master server # 2) Set resource limits on compute fleet to min/max/desired = 0/max/0 print('Starting: %s' % args.cluster_name) stack_name = ('cfncluster-' + args.cluster_name) config = cfnconfig.CfnClusterConfig(args) master_server_id = get_master_server_id(stack_name, config) ec2conn = boto.ec2.connect_to_region(config.region,aws_access_key_id=config.aws_access_key_id, aws_secret_access_key=config.aws_secret_access_key) try: response = ec2conn.start_instances(master_server_id) except boto.exception.BotoServerError as e: if e.message.endswith("does not exist"): print e.message sys.stdout.flush() sys.exit(0) else: raise e except KeyboardInterrupt: print('\nExiting...') sys.exit(0) # Set asg limits max_queue_size = [param[1] for param in config.parameters if param[0] == 'MaxQueueSize'] max_queue_size = max_queue_size[0] if len(max_queue_size) > 0 else 10 initial_queue_size = [param[1] for param in config.parameters if param[0] == 'InitialQueueSize'] if args.reset_desired else [0] initial_queue_size = initial_queue_size[0] if len(initial_queue_size) > 0 else 0 asg = get_asg(stack_name=stack_name, config=config) set_asg_limits(asg=asg, min=initial_queue_size, max=max_queue_size, desired=initial_queue_size) # Poll for status poll_master_server_state(stack_name, config)
def create(args): print('Starting: %s' % (args.cluster_name)) # Build the config based on args config = cfnconfig.CfnClusterConfig(args) # Set the ComputeWaitConditionCount parameter to match InitialQueueSize try: i = [p[0] for p in config.parameters].index('InitialQueueSize') initial_queue_size = config.parameters[i][1] config.parameters.append(('ComputeWaitConditionCount', initial_queue_size)) except ValueError: pass # Get the MasterSubnetId and use it to determine AvailabilityZone try: i = [p[0] for p in config.parameters].index('MasterSubnetId') master_subnet_id = config.parameters[i][1] try: vpcconn = boto.vpc.connect_to_region(config.region,aws_access_key_id=config.aws_access_key_id, aws_secret_access_key=config.aws_secret_access_key) availability_zone = str(vpcconn.get_all_subnets(subnet_ids=master_subnet_id)[0].availability_zone) except boto.exception.BotoServerError as e: print e.message sys.exit(1) config.parameters.append(('AvailabilityZone', availability_zone)) except ValueError: pass capabilities = ["CAPABILITY_IAM"] cfnconn = boto.cloudformation.connect_to_region(config.region,aws_access_key_id=config.aws_access_key_id, aws_secret_access_key=config.aws_secret_access_key) try: logger.debug((config.template_url, config.parameters)) stack = cfnconn.create_stack(('cfncluster-' + args.cluster_name),template_url=config.template_url, parameters=config.parameters, capabilities=capabilities, disable_rollback=args.norollback, tags=args.tags) status = cfnconn.describe_stacks(stack)[0].stack_status if not args.nowait: while status == 'CREATE_IN_PROGRESS': status = cfnconn.describe_stacks(stack)[0].stack_status events = cfnconn.describe_stack_events(stack)[0] resource_status = ('Status: %s - %s' % (events.logical_resource_id, events.resource_status)).ljust(80) sys.stdout.write('\r%s' % resource_status) sys.stdout.flush() time.sleep(5) outputs = cfnconn.describe_stacks(stack)[0].outputs for output in outputs: print output else: status = cfnconn.describe_stacks(stack)[0].stack_status print('Status: %s' % status) except boto.exception.BotoServerError as e: print e.message sys.exit(1) except KeyboardInterrupt: print('\nExiting...') sys.exit(0)
def status(args): stack = ('cfncluster-' + args.cluster_name) config = cfnconfig.CfnClusterConfig(args) cfnconn = boto.cloudformation.connect_to_region( config.region, aws_access_key_id=config.aws_access_key_id, aws_secret_access_key=config.aws_secret_access_key) try: status = cfnconn.describe_stacks(stack)[0].stack_status sys.stdout.write('\rStatus: %s' % status) sys.stdout.flush() if not args.nowait: while ((status != 'CREATE_COMPLETE') and (status != 'UPDATE_COMPLETE') and (status != 'UPDATE_ROLLBACK_COMPLETE') and (status != 'ROLLBACK_COMPLETE') and (status != 'CREATE_FAILED') and (status != 'DELETE_FAILED')): time.sleep(5) status = cfnconn.describe_stacks(stack)[0].stack_status events = cfnconn.describe_stack_events(stack)[0] resource_status = ('Status: %s - %s' % (events.logical_resource_id, events.resource_status)).ljust(80) sys.stdout.write('\r%s' % resource_status) sys.stdout.flush() sys.stdout.write('\rStatus: %s\n' % status) sys.stdout.flush() if ((status == 'CREATE_COMPLETE') or (status == 'UPDATE_COMPLETE')): outputs = cfnconn.describe_stacks(stack)[0].outputs for output in outputs: print output elif ((status == 'ROLLBACK_COMPLETE') or (status == 'CREATE_FAILED') or (status == 'DELETE_FAILED') or (status == 'UPDATE_ROLLBACK_COMPLETE')): events = cfnconn.describe_stack_events(stack) for event in events: if ((event.resource_status == 'CREATE_FAILED') or (event.resource_status == 'DELETE_FAILED') or (event.resource_status == 'UPDATE_FAILED')): print event.timestamp, event.resource_status, event.resource_type, event.logical_resource_id, \ event.resource_status_reason else: sys.stdout.write('\n') sys.stdout.flush() except boto.exception.BotoServerError as e: if e.message.endswith("does not exist"): sys.stdout.write('\r') print e.message sys.stdout.flush() sys.exit(0) else: raise e except KeyboardInterrupt: print('\nExiting...') sys.exit(0)
def instances(args): stack = ('cfncluster-' + args.cluster_name) config = cfnconfig.CfnClusterConfig(args) instances = [] instances.extend(get_ec2_instances(stack, config)) instances.extend(get_asg_instances(stack, config)) for instance in instances: print('%s %s' % (instance[0], instance[1]))
def configure(args): # Determine config file name based on args or default if args.config_file is not None: config_file = args.config_file else: config_file = os.path.expanduser(os.path.join('~', '.cfncluster', 'config')) config = ConfigParser.ConfigParser() # Check if configuration file exists if os.path.isfile(config_file): config.read(config_file) config_read = True # Prompt for required values, using existing as defaults cluster_template = prompt('Cluster Template', config.get('global', 'cluster_template') if config.has_option('global', 'cluster_template') else 'default') aws_access_key_id = prompt('AWS Access Key ID', config.get('aws', 'aws_access_key_id') if config.has_option('aws', 'aws_access_key_id') else None, True) aws_secret_access_key = prompt('AWS Secret Access Key ID', config.get('aws', 'aws_secret_access_key') if config.has_option('aws', 'aws_secret_access_key') else None, True) # Use built in boto regions as an available option aws_region_name = prompt('AWS Region ID', config.get('aws', 'aws_region_name') if config.has_option('aws', 'aws_region_name') else None, options=get_regions()) vpcname = prompt('VPC Name', config.get('cluster ' + cluster_template, 'vpc_settings') if config.has_option('cluster ' + cluster_template, 'vpc_settings') else 'public') # Query EC2 for available keys as options key_name = prompt('Key Name', config.get('cluster ' + cluster_template, 'key_name') if config.has_option('cluster ' + cluster_template, 'key_name') else None, options=list_keys(aws_access_key_id, aws_secret_access_key, aws_region_name)) vpc_id = prompt('VPC ID', config.get('vpc ' + vpcname, 'vpc_id') if config.has_option('vpc ' + vpcname, 'vpc_id') else None, options=list_vpcs(aws_access_key_id, aws_secret_access_key, aws_region_name)) master_subnet_id = prompt('Master Subnet ID', config.get('vpc ' + vpcname, 'master_subnet_id') if config.has_option('vpc ' + vpcname, 'master_subnet_id') else None, options=list_subnets(aws_access_key_id, aws_secret_access_key, aws_region_name, vpc_id)) # Dictionary of values we want to set s_global = { '__name__': 'global', 'cluster_template': cluster_template, 'update_check': 'true', 'sanity_check': 'true' } s_aws = { '__name__': 'aws', 'aws_access_key_id': aws_access_key_id, 'aws_secret_access_key': aws_secret_access_key, 'aws_region_name': aws_region_name } s_cluster = { '__name__': 'cluster ' + cluster_template, 'key_name': key_name, 'vpc_settings': vpcname } s_vpc = { '__name__': 'vpc ' + vpcname, 'vpc_id': vpc_id, 'master_subnet_id': master_subnet_id } sections = [s_aws, s_cluster, s_vpc, s_global] # Loop through the configuration sections we care about for section in sections: try: config.add_section(section['__name__']) except ConfigParser.DuplicateSectionError: pass for key, value in section.iteritems(): # Only update configuration if not set if value is not None and key is not '__name__': config.set(section['__name__'], key, value) # Write configuration to disk cfgfile = open(config_file,'w') config.write(cfgfile) cfgfile.close() # Verify the configuration cfnconfig.CfnClusterConfig(args)
def update(args): print('Updating: %s' % (args.cluster_name)) stack_name = ('cfncluster-' + args.cluster_name) config = cfnconfig.CfnClusterConfig(args) capabilities = ["CAPABILITY_IAM"] cfnconn = boto.cloudformation.connect_to_region(config.region) asgconn = boto.ec2.autoscale.connect_to_region(config.region) if not args.reset_desired: temp_resources = [] resources = cfnconn.describe_stack_resources(stack_name) while True: temp_resources.extend(resources) if not resources.next_token: break resources = cfnconn.describe_stack_resources( stack, next_token=resources.next_token) resources = temp_resources asg = [ r for r in resources if r.logical_resource_id == 'ComputeFleet' ][0].physical_resource_id desired_capacity = asgconn.get_all_groups( names=[asg])[0].desired_capacity config.parameters.append(('InitialQueueSize', desired_capacity)) try: logger.debug((config.template_url, config.parameters)) stack = cfnconn.update_stack(stack_name, template_url=config.template_url, parameters=config.parameters, capabilities=capabilities, disable_rollback=args.norollback) status = cfnconn.describe_stacks(stack)[0].stack_status if not args.nowait: while status == 'UPDATE_IN_PROGRESS': status = cfnconn.describe_stacks(stack)[0].stack_status events = cfnconn.describe_stack_events(stack)[0] resource_status = ('Status: %s - %s' % (events.logical_resource_id, events.resource_status)).ljust(80) sys.stdout.write('\r%s' % resource_status) sys.stdout.flush() time.sleep(5) else: status = cfnconn.describe_stacks(stack)[0].stack_status print('Status: %s' % status) except boto.exception.BotoServerError as e: print e.message sys.exit(1) except KeyboardInterrupt: print('\nExiting...') sys.exit(0)
def list(args): config = cfnconfig.CfnClusterConfig(args) cfnconn = boto.cloudformation.connect_to_region(config.region) try: stacks = cfnconn.describe_stacks() for stack in stacks: if stack.stack_name.startswith('cfncluster-'): print('%s' % (stack.stack_name[11:])) except boto.exception.BotoServerError as e: if e.message.endswith("does not exist"): print e.message else: raise e except KeyboardInterrupt: print('Exiting...') sys.exit(0)
def delete(args): print('Deleting: %s' % args.cluster_name) stack = ('cfncluster-' + args.cluster_name) config = cfnconfig.CfnClusterConfig(args) cfnconn = boto.cloudformation.connect_to_region( config.region, aws_access_key_id=config.aws_access_key_id, aws_secret_access_key=config.aws_secret_access_key) try: cfnconn.delete_stack(stack) if not args.nowait: time.sleep(5) status = cfnconn.describe_stacks(stack)[0].stack_status sys.stdout.write('\rStatus: %s' % status) sys.stdout.flush() if not args.nowait: while status == 'DELETE_IN_PROGRESS': time.sleep(5) status = cfnconn.describe_stacks(stack)[0].stack_status events = cfnconn.describe_stack_events(stack)[0] resource_status = ('Status: %s - %s' % (events.logical_resource_id, events.resource_status)).ljust(80) sys.stdout.write('\r%s' % resource_status) sys.stdout.flush() sys.stdout.write('\rStatus: %s\n' % status) sys.stdout.flush() else: sys.stdout.write('\n') sys.stdout.flush() if status == 'DELETE_FAILED': print( 'Cluster did not delete successfully. Run \'cluster delete %s\' again' % stack) except boto.exception.BotoServerError as e: if e.message.endswith("does not exist"): #sys.stdout.write('\r\n') print e.message sys.stdout.flush() sys.exit(0) else: raise e except KeyboardInterrupt: print('\nExiting...') sys.exit(0)
def sshmaster(args): stack = ('cfncluster-' + args.cluster_name) config = cfnconfig.CfnClusterConfig(args) cfnconn = boto.cloudformation.connect_to_region(config.region) outputs = cfnconn.describe_stacks(stack)[0].outputs if args.useprivateip: hostname = [o for o in outputs if o.key == 'MasterPrivateIP'][0].value else: hostname = [o for o in outputs if o.key == 'MasterPublicIP'][0].value port = 22 try: sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock.connect((hostname, port)) except Exception, e: print '*** Connect failed: ' + str(e) traceback.print_exc() sys.exit(1)
def create(args): print('Starting: %s' % (args.cluster_name)) config = cfnconfig.CfnClusterConfig(args) try: i = [p[0] for p in config.parameters].index('InitialQueueSize') initial_queue_size = config.parameters[i][1] config.parameters.append( ('ComputeWaitConditionCount', initial_queue_size)) except ValueError: pass capabilities = ["CAPABILITY_IAM"] cfnconn = boto.cloudformation.connect_to_region(config.region) try: logger.debug((config.template_url, config.parameters)) stack = cfnconn.create_stack(('cfncluster-' + args.cluster_name), template_url=config.template_url, parameters=config.parameters, capabilities=capabilities, disable_rollback=args.norollback) status = cfnconn.describe_stacks(stack)[0].stack_status if not args.nowait: while status == 'CREATE_IN_PROGRESS': status = cfnconn.describe_stacks(stack)[0].stack_status events = cfnconn.describe_stack_events(stack)[0] resource_status = ('Status: %s - %s' % (events.logical_resource_id, events.resource_status)).ljust(80) sys.stdout.write('\r%s' % resource_status) sys.stdout.flush() time.sleep(5) outputs = cfnconn.describe_stacks(stack)[0].outputs for output in outputs: print output else: status = cfnconn.describe_stacks(stack)[0].stack_status print('Status: %s' % status) except boto.exception.BotoServerError as e: print e.message sys.exit(1) except KeyboardInterrupt: print('\nExiting...') sys.exit(0)
def update(args): print('Updating: %s' % (args.cluster_name)) stack_name = ('cfncluster-' + args.cluster_name) config = cfnconfig.CfnClusterConfig(args) capabilities = ["CAPABILITY_IAM"] cfnconn = boto.cloudformation.connect_to_region(config.region,aws_access_key_id=config.aws_access_key_id, aws_secret_access_key=config.aws_secret_access_key) asgconn = boto.ec2.autoscale.connect_to_region(config.region,aws_access_key_id=config.aws_access_key_id, aws_secret_access_key=config.aws_secret_access_key) if not args.reset_desired: temp_resources = [] resources = cfnconn.describe_stack_resources(stack_name) while True: temp_resources.extend(resources) if not resources.next_token: break resources = cfnconn.describe_stack_resources(stack, next_token=resources.next_token) resources = temp_resources asg = [r for r in resources if r.logical_resource_id == 'ComputeFleet'][0].physical_resource_id desired_capacity = asgconn.get_all_groups(names=[asg])[0].desired_capacity config.parameters.append(('InitialQueueSize', desired_capacity)) # Get the MasterSubnetId and use it to determine AvailabilityZone try: i = [p[0] for p in config.parameters].index('MasterSubnetId') master_subnet_id = config.parameters[i][1] try: vpcconn = boto.vpc.connect_to_region(config.region,aws_access_key_id=config.aws_access_key_id, aws_secret_access_key=config.aws_secret_access_key) availability_zone = str(vpcconn.get_all_subnets(subnet_ids=master_subnet_id)[0].availability_zone) except boto.exception.BotoServerError as e: print e.message sys.exit(1) config.parameters.append(('AvailabilityZone', availability_zone)) except ValueError: pass try: logger.debug((config.template_url, config.parameters)) stack = cfnconn.update_stack(stack_name,template_url=config.template_url, parameters=config.parameters, capabilities=capabilities, disable_rollback=args.norollback) status = cfnconn.describe_stacks(stack)[0].stack_status if not args.nowait: while status == 'UPDATE_IN_PROGRESS': status = cfnconn.describe_stacks(stack)[0].stack_status events = cfnconn.describe_stack_events(stack)[0] resource_status = ('Status: %s - %s' % (events.logical_resource_id, events.resource_status)).ljust(80) sys.stdout.write('\r%s' % resource_status) sys.stdout.flush() time.sleep(5) else: status = cfnconn.describe_stacks(stack)[0].stack_status print('Status: %s' % status) except boto.exception.BotoServerError as e: print e.message sys.exit(1) except KeyboardInterrupt: print('\nExiting...') sys.exit(0)
def version(args): config = cfnconfig.CfnClusterConfig(args) print config.version