예제 #1
0
파일: cluster.py 프로젝트: briney/abcloud
 def load(self):
     masters, workers = ec2utils.get_existing_instances(
         self.ec2,
         self.name,
         quiet=True)
     masters = [m for m in masters if m.state['Name'] == 'running']
     workers = [w for w in workers if w.state['Name'] == 'running']
     if not masters:
         return self
     self.master_instance = masters[0]
     self.worker_instances = workers
     # self.opts = self.retrieve_opts(self.master_instance)
     # get master instance information
     self.master_name = [d['Value'] for d in self.master_instance.tags if 'Name' in d.values()][0]
     self.master = {self.master_name: self.master_instance}
     # get worker instance information
     self.workers = {}
     for i in self.worker_instances:
         worker_name = [d['Value'] for d in i.tags if 'Name' in d.values()][0]
         self.workers[worker_name] = i
     self.worker_names = sorted(self.workers.keys())
예제 #2
0
파일: cluster.py 프로젝트: briney/abcloud
    def launch(self):
        print('')
        # authorize ingress ports for master and worker security groups
        auth_master = False if len(self.master_group.ip_permissions) > 0 else True
        auth_worker = False if len(self.worker_group.ip_permissions) > 0 else True
        if any([auth_master, auth_worker]):
            ec2utils.intracluster_auth(self.master_group, self.worker_group)
        if auth_master:
            ec2utils.authorize_ports(
                self.master_group,
                'tcp',
                MASTER_TCP_PORT_RANGES,
                self.opts.authorized_address)
            ec2utils.authorize_ports(
                self.master_group,
                'udp',
                MASTER_UDP_PORT_RANGES,
                self.opts.authorized_address)
        if auth_worker:
            ec2utils.authorize_ports(
                self.worker_group,
                'tcp',
                WORKER_TCP_PORT_RANGES,
                self.opts.authorized_address)

        # check whether instances are already running in the cluster security groups
        print('')
        masters, workers = ec2utils.get_existing_instances(self.ec2, self.name)
        masters = [m for m in masters if m.state['Name'] not in ['shutting-down', 'terminated']]
        workers = [w for w in workers if w.state['Name'] not in ['shutting-down', 'terminated']]
        if any([workers, masters]):
            print("ERROR: There are already instances running in group {} or {}".format(
                self.master_group.group_name,
                self.worker_group.group_name),
                file=sys.stderr)
            sys.exit(1)
        else:
            print('No running instances were found.')

        # get AMI
        if self.opts.ami is None:
            # self.opts.ami = ABTOOLS_AMI_MAP[self.opts.abtools_version]
            self.opts.ami = UBUNTU_AMI_MAP[self.opts.region]
        try:
            self.image = [i for i in self.ec2.images.filter(ImageIds=[self.opts.ami])][0]
        except:
            print("Could not find AMI " + self.opts.ami, file=sys.stderr)
            sys.exit(1)

        # setup master BlockDeviceMappings
        master_block_device_mappings = []
        for i in range(self.opts.master_ebs_vol_num):
            # EBS volumes are /dev/xvdaa, /dev/xvdab...
            device_name = "/dev/xvda" + string.ascii_lowercase[i]
            ebs = {'VolumeSize': self.opts.master_ebs_vol_size,
                   'VolumeType': self.opts.master_ebs_vol_type}
            device_map = {'DeviceName': device_name,
                          'Ebs': ebs}
            master_block_device_mappings.append(device_map)
        # ephemeral drives must be added to the BlockDeviceMappings for m3 instances
        # see: http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/block-device-mapping-concepts.html
        if self.opts.master_instance_type is None:
            self.opts.master_instance_type = self.opts.instance_type
        if self.opts.master_instance_type.split('.')[0] in ['m3', ]:
            for i in range(ec2utils.get_num_disks(self.opts.master_instance_type)):
                virtual_name = 'ephemeral{}'.format(i)
                # ephemeral drives start at /dev/xvdb.
                device_name = '/dev/xvd' + string.ascii_lowercase[i + 1]
                device_map = {'VirtualName': virtual_name,
                              'DeviceName': device_name}
                master_block_device_mappings.append(device_map)

        # launch workers
        if self.opts.workers > 0:
            if self.opts.spot_price:
                print('')
                print('Requesting {0} spot instance{1} for worker node{1}...'.format(
                    self.opts.workers, '' if self.opts.workers == 1 else 's'))
                worker_response = ec2utils.request_spot_instance(
                    self.ec2c,
                    group_name=self.worker_group_name,
                    price=self.opts.spot_price,
                    ami=self.opts.ami,
                    num=self.opts.workers,
                    key_pair=self.opts.key_pair,
                    instance_type=self.opts.instance_type)
            else:
                worker_response = {'SpotInstanceRequests': []}
                self.worker_instances = self.ec2.create_instances(
                    ImageId=self.opts.ami,
                    MinCount=self.opts.workers,
                    MaxCount=self.opts.workers,
                    KeyName=self.opts.key_pair,
                    InstanceType=self.opts.instance_type,
                    SecurityGroups=[self.worker_group_name])
        else:
            worker_response = {'SpotInstanceRequests': []}

        # launch masters
        if all([self.opts.force_spot_master, self.opts.spot_price is not None]):
            print('Requesting a spot instance for master node...')
            master_response = ec2utils.request_spot_instance(
                self.ec2c,
                group_name=self.master_group_name,
                price=self.opts.spot_price,
                ami=self.opts.ami,
                num=1,
                key_pair=self.opts.key_pair,
                instance_type=self.opts.master_instance_type,
                block_device_mappings=master_block_device_mappings)
        else:
            master_response = {'SpotInstanceRequests': []}
            master_instances = self.ec2.create_instances(
                ImageId=self.opts.ami,
                MinCount=1,
                MaxCount=1,
                KeyName=self.opts.key_pair,
                InstanceType=self.opts.master_instance_type,
                SecurityGroups=[self.master_group_name],
                BlockDeviceMappings=master_block_device_mappings)
            self.master_instance = master_instances[0]

        # wait for spot requests to be fulfilled
        master_requests = master_response['SpotInstanceRequests']
        worker_requests = worker_response['SpotInstanceRequests']
        spot_requests = master_requests + worker_requests
        if spot_requests:
            # wait for AWS to populate the list of spot instance requests
            time.sleep(10)
            print('')
            print('Waiting for spot requests to be fulfulled...')
            spot_request_ids = [r['SpotInstanceRequestId'] for r in spot_requests]
            waiter = self.ec2c.get_waiter('spot_instance_request_fulfilled')
            waiter.wait(SpotInstanceRequestIds=spot_request_ids)
        if master_requests:
            master_requests = self.ec2c.describe_spot_instance_requests(
                SpotInstanceRequestIds=[r['SpotInstanceRequestId'] for r in master_requests])
            master_instance_ids = [r['InstanceId'] for r in master_requests['SpotInstanceRequests']]
            self.master_instance = [self.ec2.Instance(id=i) for i in master_instance_ids][0]
        if worker_requests:
            worker_requests = self.ec2c.describe_spot_instance_requests(
                SpotInstanceRequestIds=[r['SpotInstanceRequestId'] for r in worker_requests])
            worker_instance_ids = [r['InstanceId'] for r in worker_requests['SpotInstanceRequests']]
            self.worker_instances = [self.ec2.Instance(id=i) for i in worker_instance_ids]

        # wait for instances to state == 'running'
        all_instances = [self.master_instance] + self.worker_instances
        ec2utils.wait_for_instance_state(self.ec2c, [i.id for i in all_instances], 'running')

        # wait for instances to be reachable
        print('')
        print('Waiting for instance{} to be reachable...'.format(
            's' if len(all_instances) > 1 else ''))
        instance_ids = [i.id for i in all_instances]
        waiter = self.ec2c.get_waiter('instance_status_ok')
        waiter.wait(InstanceIds=instance_ids)

        # name all instances
        if self.opts.workers:
            self.workers = {}
            self.master_name = 'master'
            self.worker_names = []
            self.master = {self.master_name: self.master_instance}
            self.master_instance.create_tags(Tags=[{'Key': 'Name',
                                                    'Value': 'master'}])
            for i, inst in enumerate(self.worker_instances):
                zeros = 3 - len(str(i + 1))
                name = 'node{}{}'.format('0' * zeros, i + 1)
                self.workers[name] = inst
                self.worker_names.append(name)
                inst.create_tags(Tags=[{'Key': 'Name',
                                        'Value': name}])
        else:
            self.master_name = self.name
            self.worker_names = []
            self.master_instance.create_tags(Tags=[{'Key': 'Name',
                                                    'Value': self.master_name}])

        # configure the cluster instances
        self.configure()