def get_default_subnet(self, availability_zone=None): """ Get the default subnet on the VPC ID. Raises ------ If there is not a default subnet on the VPC """ logger.debug("Searching for default subnet in VPC %s", self.vpc_id) for vpc in self.ec2.vpcs.all(): if vpc.id == self.vpc_id: for subnet in vpc.subnets.all(): if availability_zone is None and subnet.default_for_az: logger.debug( "Default subnet found - Using Subnet ID: %s", subnet.id) return subnet.id else: if subnet.availability_zone == availability_zone and subnet.default_for_az: logger.debug( "Default subnet found - Using Subnet ID: %s", subnet.id) return subnet.id if not self.vpc_id: raise DaskEc2Exception( "There is no VPC, please pass VPC ID or assign a default VPC") raise DaskEc2Exception( "There is no default subnet on VPC %s, please pass a subnet ID" % self.vpc_id)
def check_sg(self, security_group): """Checks if the security groups exists in the EC2 account If security_group is the default one it will create it. """ logger.debug("Checking that security group '%s' exists on EC2", security_group) try: sg = self.get_security_groups(security_group) if not sg: if security_group == DEFAULT_SG_GROUP_NAME: logger.debug("Default security group '%s' not found, creating it", DEFAULT_SG_GROUP_NAME) self.create_default_sg() else: raise DaskEc2Exception("Security group '%s' not found, please create or use the default '%s'" % (security_group, DEFAULT_SG_GROUP_NAME)) except ClientError: raise DaskEc2Exception("Security group '%s' not found, please create or use the default '%s'" % (security_group, DEFAULT_SG_GROUP_NAME))
def check_image_is_ebs(self, image_id): """Check if AMI is EBS based and raises an exception if not """ images = self.client.describe_images(ImageIds=[image_id]) image = images["Images"][0] root_type = image["RootDeviceType"] if root_type != "ebs": raise DaskEc2Exception("The AMI {} Root Device Type is not EBS. " "Only EBS Root Device AMI are supported.".format(image_id))
def get_default_vpc(self): """ Get the default VPC of the account. Raises ------ If there is not a default VPC """ logger.debug("Searching for default VPC") for vpc in self.ec2.vpcs.all(): if vpc.is_default: logger.debug("Default VPC found - Using VPC ID: %s", vpc.id) return vpc.id raise DaskEc2Exception("There is no default VPC, please pass VPC ID")
def destroy(self, ids): """Terminate a set of EC2 instances by ID Parameters ---------- ids : list of strings The EC2 ids of the instances that should be terminated """ if ids is None or ids == []: raise DaskEc2Exception("Instances ids cannot be None or empty list") logger.debug("Terminating instances: %s", ids) self.ec2.instances.filter(InstanceIds=ids).terminate() waiter = self.client.get_waiter("instance_terminated") waiter.wait(InstanceIds=ids)
def check_keyname(self, keyname): """Checks that a keyname exists on the EC2 account" Raises ------ DaskEc2Exception if the keyname doesn't exists """ logger.debug("Checking that keyname '%s' exists on EC2", keyname) try: key_pair = self.client.describe_key_pairs(KeyNames=[keyname]) [i for i in key_pair] except ClientError as e: error_code = e.response["Error"]["Code"] if error_code == "InvalidKeyPair.NotFound": raise DaskEc2Exception("The keyname '%s' does not exist, " "please create it in the EC2 console" % keyname) else: raise e
def launch(self, name, image_id, instance_type, count, keyname, security_group_name=DEFAULT_SG_GROUP_NAME, security_group_id=None, volume_type="gp2", volume_size=500, keypair=None, tags=None, check_ami=True): tags = tags or [] self.check_keyname(keyname) if check_ami: self.check_image_is_ebs(image_id) self.check_sg(security_group_name) # assumed to be formatted correctly in ec2.py custom_tags = [] for t in tags: k, v = t.split(":") custom_tags.append({"Key": k, "Value": v}) device_map = [ { "DeviceName": "/dev/sda1", "Ebs": { "VolumeSize": volume_size, "DeleteOnTermination": True, "VolumeType": volume_type, }, }, ] if security_group_id: security_groups_ids = [security_group_id] else: security_groups_ids = self.get_security_groups_ids( security_group_name) logger.debug("Creating %i instances on EC2", count) kwargs = dict(ImageId=image_id, KeyName=keyname, MinCount=count, MaxCount=count, InstanceType=instance_type, SecurityGroupIds=security_groups_ids, BlockDeviceMappings=device_map) if self.subnet_id is not None and self.subnet_id != "": kwargs['SubnetId'] = self.subnet_id if self.iaminstance_name is not None and self.iaminstance_name != "": kwargs['IamInstanceProfile'] = {'Name': self.iaminstance_name} instances = self.ec2.create_instances(**kwargs) time.sleep(5) ids = [i.id for i in instances] waiter = self.client.get_waiter("instance_running") try: waiter.wait(InstanceIds=ids) except WaiterError: raise DaskEc2Exception( "An unexpected error occurred when launching the requested instances. " "Refer to the AWS Management Console for more information.") collection = self.ec2.instances.filter(InstanceIds=ids) instances = [] for i, instance in enumerate(collection): instances.append(instance) for v in instance.volumes.all(): v.create_tags(DryRun=False, Tags=custom_tags) if name: logger.debug("Tagging instance '%s'", instance.id) tags_ = [{"Key": "Name", "Value": "{0}-{1}".format(name, i)}] tags_.extend(custom_tags) self.ec2.create_tags(Resources=[instance.id], Tags=tags_) return instances
def launch(self, name, image_id, instance_type, count, keyname, security_group_name=DEFAULT_SG_GROUP_NAME, security_group_id=None, volume_type="gp2", volume_size=500, keypair=None, tags=None, check_ami=True): tags = tags or [] self.check_keyname(keyname) if check_ami: self.check_image_is_ebs(image_id) self.check_sg(security_group_name) device_map = [ { "DeviceName": "/dev/sda1", "Ebs": { "VolumeSize": volume_size, "DeleteOnTermination": True, "VolumeType": volume_type, }, }, ] if security_group_id is not None: security_groups_ids = [security_group_id] else: security_groups_ids = self.get_security_groups_ids( security_group_name) logger.debug("Creating %i instances on EC2", count) if self.subnet_id is not None and self.subnet_id != "": instances = self.ec2.create_instances( ImageId=image_id, KeyName=keyname, MinCount=count, MaxCount=count, InstanceType=instance_type, SecurityGroupIds=security_groups_ids, BlockDeviceMappings=device_map, SubnetId=self.subnet_id) else: instances = self.ec2.create_instances( ImageId=image_id, KeyName=keyname, MinCount=count, MaxCount=count, InstanceType=instance_type, SecurityGroupIds=self.get_security_groups_ids( security_group_name), BlockDeviceMappings=device_map) time.sleep(5) ids = [i.id for i in instances] waiter = self.client.get_waiter("instance_running") try: waiter.wait(InstanceIds=ids) except WaiterError: raise DaskEc2Exception( "An unexpected error occurred when launching the requested instances. Refer to the AWS Management Console for more information." ) collection = self.ec2.instances.filter(InstanceIds=ids) instances = [] for i, instance in enumerate(collection): instances.append(instance) if name: logger.debug("Tagging instance '%s'", instance.id) tags_ = [{"Key": "Name", "Value": "{0}-{1}".format(name, i)}] for tag_pair in tags: parts = tag_pair.split(":") if len(parts) == 2: key = parts[0] value = parts[1] else: key = "Tag" value = tag_pair tags_.append({"Key": key, "Value": value}) self.ec2.create_tags(Resources=[instance.id], Tags=tags_) return instances
def install_salt_minion(cluster): dask_ec2_src = os.path.realpath(os.path.dirname(dask_ec2.__file__)) templates_src = os.path.join(dask_ec2_src, "templates") logger.debug("Installing salt-minion on all the nodes") results, threads = {}, [] master_ip = cluster.instances[0].ip for i, instance in enumerate(cluster.instances): minion_id = "node-{}".format(i) cmd = "curl -L https://bootstrap.saltstack.com | sh -s -- " cmd += "-d -X -P -L -A {master_ip} -i {minion_id} stable".format( master_ip=master_ip, minion_id=minion_id) t = threading.Thread(target=async_cmd, args=(results, instance, cmd)) t.start() threads.append(t) for t in threads: t.join() all_ok = all([r is False for r in results]) if not all_ok: failed_nodes = [] for minion_ip, minion_data in results.items(): if minion_data is False: failed_nodes.append(minion_ip) if failed_nodes: raise DaskEc2Exception( "Error bootstrapping salt-minion at nodes: %s (maybe try again)" % failed_nodes) logger.debug("Configuring salt-mine on the salt minions") results, threads = {}, [] for i, instance in enumerate(cluster.instances): local = os.path.join(templates_src, "mine_functions.conf") remote = "/etc/salt/minion.d/mine.conf" t = threading.Thread(target=async_upload, args=(results, instance, local, remote)) t.start() threads.append(t) for t in threads: t.join() all_ok = all([r is False for r in results]) if not all_ok: failed_nodes = [] for minion_ip, minion_data in results.items(): if minion_data is False: failed_nodes.append(minion_ip) if failed_nodes: raise DaskEc2Exception( "Error configuring the salt-mine in the salt-minion at nodes: %s (maybe try again)" % failed_nodes) logger.debug("Restarting the salt-minion service") results, threads = {}, [] for i, instance in enumerate(cluster.instances): cmd = "service salt-minion restart" t = threading.Thread(target=async_cmd, args=(results, instance, cmd)) t.start() threads.append(t) for t in threads: t.join() all_ok = all([r is False for r in results]) if not all_ok: failed_nodes = [] for minion_ip, minion_data in results.items(): if minion_data is False: failed_nodes.append(minion_ip) if failed_nodes: raise DaskEc2Exception( "Error restarting the salt-minion at nodes: %s (maybe try again)" % failed_nodes)
def install_salt_master(cluster): master = cluster.instances[0].ssh_client dask_ec2_src = os.path.realpath(os.path.dirname(dask_ec2.__file__)) templates_src = os.path.join(dask_ec2_src, "templates") @retry(retries=3, wait=0) def __install_salt_master(): cmd = "curl -sS -L https://bootstrap.saltstack.com | sh -s -- -d -X -M -N stable" ret = master.exec_command(cmd, sudo=True) if ret["exit_code"] != 0: raise Exception(ret["stderr"].decode('utf-8')) return True try: __install_salt_master() except RetriesExceededException as e: raise DaskEc2Exception( "%s\nCouldn't bootstrap salt-master. Error is above (maybe try again)" % e.last_exception) @retry(retries=3, wait=0) def __install_salt_api(): cmd = "curl -L https://bootstrap.saltstack.com | sh -s -- -d -X -M -N -P -L -p salt-api stable" ret = master.exec_command(cmd, sudo=True) if ret["exit_code"] != 0: raise Exception(ret["stderr"].decode('utf-8')) try: __install_salt_api() except RetriesExceededException as e: raise DaskEc2Exception( "%s\nCouldn't bootstrap salt-api. Error is above (maybe try again)" % e.last_exception) @retry(retries=3, wait=0) def __setup_salt_master(): local = os.path.join(templates_src, "auto_accept.conf") remote = "/etc/salt/master.d/auto_accept.conf" master.put(local, remote, sudo=True) try: __setup_salt_master() except RetriesExceededException as e: raise DaskEc2Exception( "%s\nCouldn't setup salt-master settings. Error is above (maybe try again)" % e.last_exception) @retry(retries=3, wait=0) def __apt_installs(): cmd = "apt-get install -y python-pip libssl-dev libffi-dev python-dev" ret = master.exec_command(cmd, sudo=True) if ret["exit_code"] != 0: raise Exception(ret["stderr"].decode('utf-8')) try: __apt_installs() except RetriesExceededException as e: raise DaskEc2Exception( "%s\nCouldn't install ubuntu dependencies. Error is above (maybe try again)" % e.last_exception) @retry(retries=3, wait=0) def __upgrade_pip(): cmd = "pip install --upgrade pip packaging appdirs six" ret = master.exec_command(cmd, sudo=True) if ret["exit_code"] != 0: raise Exception(ret["stderr"]) try: __upgrade_pip() except RetriesExceededException as e: raise DaskEc2Exception( "%s\nCouldn't upgrade pip. Error is above (maybe try again)" % e.last_exception) @retry(retries=3, wait=0) def __install_salt_rest_api(): cmd = "pip install cherrypy" ret = master.exec_command(cmd, sudo=True) if ret["exit_code"] != 0: raise Exception(ret["stderr"].decode('utf-8')) try: __install_salt_rest_api() except RetriesExceededException as e: raise DaskEc2Exception( "%s\nCouldn't install CherryPy. Error is above (maybe try again)" % e.last_exception) @retry(retries=3, wait=0) def __install_pyopensll(): cmd = "pip install PyOpenSSL==16.2.0" ret = master.exec_command(cmd, sudo=True) if ret["exit_code"] != 0: raise Exception(ret["stderr"]) try: __install_pyopensll() except RetriesExceededException as e: raise DaskEc2Exception( "%s\nCouldn't install PyOpenSSL. Error is above (maybe try again)" % e.last_exception) @retry(retries=3, wait=0) def __create_ssl_cert(): cmd = "salt-call --local tls.create_self_signed_cert" ret = master.exec_command(cmd, sudo=True) if ret["exit_code"] != 0: raise Exception(ret["stderr"].decode('utf-8')) try: __create_ssl_cert() except RetriesExceededException as e: raise DaskEc2Exception( "%s\nCouldn't generate SSL certificate. Error is above (maybe try again)" % e.last_exception) @retry(retries=3, wait=0) def __setup_rest_cherrypy(): local = os.path.join(templates_src, "rest_cherrypy.conf") remote = "/etc/salt/master.d/rest_cherrypy.conf" master.put(local, remote, sudo=True) try: __setup_rest_cherrypy() except RetriesExceededException as e: raise DaskEc2Exception( "%s\nCouldn't setup salt-rest server. Error is above (maybe try again)" % e.last_exception) @retry(retries=3, wait=0) def __setup_salt_external_auth(): local = os.path.join(templates_src, "external_auth.conf") remote = "/etc/salt/master.d/external_auth.conf" master.put(local, remote, sudo=True) try: __setup_salt_external_auth() except RetriesExceededException as e: raise DaskEc2Exception( "%s\nCouldn't setup salt external auth system. Error is above (maybe try again)" % e.last_exception) @retry(retries=3, wait=0) def __create_saltdev_user(): cmd = "id -u saltdev &>/dev/null || useradd -p $(openssl passwd -1 saltdev) saltdev" ret = master.exec_command(cmd, sudo=True) if ret["exit_code"] != 0: raise Exception(ret["stderr"].decode('utf-8')) try: __create_saltdev_user() except RetriesExceededException as e: raise DaskEc2Exception( "%s\nCouldn't create 'saltdev' user. Error is above (maybe try again)" % e.last_exception) @retry(retries=3, wait=0) def __restart_salt_master(): cmd = "service salt-master restart" ret = master.exec_command(cmd, sudo=True) if ret["exit_code"] != 0: raise Exception(ret["stderr"].decode('utf-8')) try: __restart_salt_master() except RetriesExceededException as e: raise DaskEc2Exception( "%s\nCouldn't restart salt-master service. Error is above (maybe try again)" % e.last_exception) @retry(retries=3, wait=0) def __restart_salt_api(): cmd = "service salt-api restart" ret = master.exec_command(cmd, sudo=True) if ret["exit_code"] != 0: raise Exception(ret["stderr"].decode('utf-8')) try: __restart_salt_api() except RetriesExceededException as e: raise DaskEc2Exception( "%s\nCouldn't restart salt-api service. Error is above (maybe try again)" % e.last_exception)