def load(self): masters, workers = ec2utils.get_existing_instances( self.ec2, self.name, quiet=True) masters = [m for m in masters if m.state['Name'] == 'running'] workers = [w for w in workers if w.state['Name'] == 'running'] if not masters: return self self.master_instance = masters[0] self.worker_instances = workers # self.opts = self.retrieve_opts(self.master_instance) # get master instance information self.master_name = [d['Value'] for d in self.master_instance.tags if 'Name' in d.values()][0] self.master = {self.master_name: self.master_instance} # get worker instance information self.workers = {} for i in self.worker_instances: worker_name = [d['Value'] for d in i.tags if 'Name' in d.values()][0] self.workers[worker_name] = i self.worker_names = sorted(self.workers.keys())
def launch(self): print('') # authorize ingress ports for master and worker security groups auth_master = False if len(self.master_group.ip_permissions) > 0 else True auth_worker = False if len(self.worker_group.ip_permissions) > 0 else True if any([auth_master, auth_worker]): ec2utils.intracluster_auth(self.master_group, self.worker_group) if auth_master: ec2utils.authorize_ports( self.master_group, 'tcp', MASTER_TCP_PORT_RANGES, self.opts.authorized_address) ec2utils.authorize_ports( self.master_group, 'udp', MASTER_UDP_PORT_RANGES, self.opts.authorized_address) if auth_worker: ec2utils.authorize_ports( self.worker_group, 'tcp', WORKER_TCP_PORT_RANGES, self.opts.authorized_address) # check whether instances are already running in the cluster security groups print('') masters, workers = ec2utils.get_existing_instances(self.ec2, self.name) masters = [m for m in masters if m.state['Name'] not in ['shutting-down', 'terminated']] workers = [w for w in workers if w.state['Name'] not in ['shutting-down', 'terminated']] if any([workers, masters]): print("ERROR: There are already instances running in group {} or {}".format( self.master_group.group_name, self.worker_group.group_name), file=sys.stderr) sys.exit(1) else: print('No running instances were found.') # get AMI if self.opts.ami is None: # self.opts.ami = ABTOOLS_AMI_MAP[self.opts.abtools_version] self.opts.ami = UBUNTU_AMI_MAP[self.opts.region] try: self.image = [i for i in self.ec2.images.filter(ImageIds=[self.opts.ami])][0] except: print("Could not find AMI " + self.opts.ami, file=sys.stderr) sys.exit(1) # setup master BlockDeviceMappings master_block_device_mappings = [] for i in range(self.opts.master_ebs_vol_num): # EBS volumes are /dev/xvdaa, /dev/xvdab... device_name = "/dev/xvda" + string.ascii_lowercase[i] ebs = {'VolumeSize': self.opts.master_ebs_vol_size, 'VolumeType': self.opts.master_ebs_vol_type} device_map = {'DeviceName': device_name, 'Ebs': ebs} master_block_device_mappings.append(device_map) # ephemeral drives must be added to the BlockDeviceMappings for m3 instances # see: http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/block-device-mapping-concepts.html if self.opts.master_instance_type is None: self.opts.master_instance_type = self.opts.instance_type if self.opts.master_instance_type.split('.')[0] in ['m3', ]: for i in range(ec2utils.get_num_disks(self.opts.master_instance_type)): virtual_name = 'ephemeral{}'.format(i) # ephemeral drives start at /dev/xvdb. device_name = '/dev/xvd' + string.ascii_lowercase[i + 1] device_map = {'VirtualName': virtual_name, 'DeviceName': device_name} master_block_device_mappings.append(device_map) # launch workers if self.opts.workers > 0: if self.opts.spot_price: print('') print('Requesting {0} spot instance{1} for worker node{1}...'.format( self.opts.workers, '' if self.opts.workers == 1 else 's')) worker_response = ec2utils.request_spot_instance( self.ec2c, group_name=self.worker_group_name, price=self.opts.spot_price, ami=self.opts.ami, num=self.opts.workers, key_pair=self.opts.key_pair, instance_type=self.opts.instance_type) else: worker_response = {'SpotInstanceRequests': []} self.worker_instances = self.ec2.create_instances( ImageId=self.opts.ami, MinCount=self.opts.workers, MaxCount=self.opts.workers, KeyName=self.opts.key_pair, InstanceType=self.opts.instance_type, SecurityGroups=[self.worker_group_name]) else: worker_response = {'SpotInstanceRequests': []} # launch masters if all([self.opts.force_spot_master, self.opts.spot_price is not None]): print('Requesting a spot instance for master node...') master_response = ec2utils.request_spot_instance( self.ec2c, group_name=self.master_group_name, price=self.opts.spot_price, ami=self.opts.ami, num=1, key_pair=self.opts.key_pair, instance_type=self.opts.master_instance_type, block_device_mappings=master_block_device_mappings) else: master_response = {'SpotInstanceRequests': []} master_instances = self.ec2.create_instances( ImageId=self.opts.ami, MinCount=1, MaxCount=1, KeyName=self.opts.key_pair, InstanceType=self.opts.master_instance_type, SecurityGroups=[self.master_group_name], BlockDeviceMappings=master_block_device_mappings) self.master_instance = master_instances[0] # wait for spot requests to be fulfilled master_requests = master_response['SpotInstanceRequests'] worker_requests = worker_response['SpotInstanceRequests'] spot_requests = master_requests + worker_requests if spot_requests: # wait for AWS to populate the list of spot instance requests time.sleep(10) print('') print('Waiting for spot requests to be fulfulled...') spot_request_ids = [r['SpotInstanceRequestId'] for r in spot_requests] waiter = self.ec2c.get_waiter('spot_instance_request_fulfilled') waiter.wait(SpotInstanceRequestIds=spot_request_ids) if master_requests: master_requests = self.ec2c.describe_spot_instance_requests( SpotInstanceRequestIds=[r['SpotInstanceRequestId'] for r in master_requests]) master_instance_ids = [r['InstanceId'] for r in master_requests['SpotInstanceRequests']] self.master_instance = [self.ec2.Instance(id=i) for i in master_instance_ids][0] if worker_requests: worker_requests = self.ec2c.describe_spot_instance_requests( SpotInstanceRequestIds=[r['SpotInstanceRequestId'] for r in worker_requests]) worker_instance_ids = [r['InstanceId'] for r in worker_requests['SpotInstanceRequests']] self.worker_instances = [self.ec2.Instance(id=i) for i in worker_instance_ids] # wait for instances to state == 'running' all_instances = [self.master_instance] + self.worker_instances ec2utils.wait_for_instance_state(self.ec2c, [i.id for i in all_instances], 'running') # wait for instances to be reachable print('') print('Waiting for instance{} to be reachable...'.format( 's' if len(all_instances) > 1 else '')) instance_ids = [i.id for i in all_instances] waiter = self.ec2c.get_waiter('instance_status_ok') waiter.wait(InstanceIds=instance_ids) # name all instances if self.opts.workers: self.workers = {} self.master_name = 'master' self.worker_names = [] self.master = {self.master_name: self.master_instance} self.master_instance.create_tags(Tags=[{'Key': 'Name', 'Value': 'master'}]) for i, inst in enumerate(self.worker_instances): zeros = 3 - len(str(i + 1)) name = 'node{}{}'.format('0' * zeros, i + 1) self.workers[name] = inst self.worker_names.append(name) inst.create_tags(Tags=[{'Key': 'Name', 'Value': name}]) else: self.master_name = self.name self.worker_names = [] self.master_instance.create_tags(Tags=[{'Key': 'Name', 'Value': self.master_name}]) # configure the cluster instances self.configure()