コード例 #1
0
ファイル: ec2_boto.py プロジェクト: yarikoptic/elasticluster
    def start_instance(self, key_name, public_key_path, private_key_path,
                       security_group, flavor, image_id, image_userdata,
                       username=None):
        """
        Starts an instance in the cloud on the specified cloud
        provider (configuration option) and returns the id of the
        started instance.
        """
        connection = self._connect()

        log.debug("Checking keypair `%s`.", key_name)
        self._check_keypair(key_name, public_key_path, private_key_path)
        log.debug("Checking security group `%s`.", security_group)
        self._check_security_group(security_group)
        # image_id = self._find_image_id(image_id)

        try:
            reservation = connection.run_instances(
                image_id, key_name=key_name, security_groups=[security_group],
                instance_type=flavor, user_data=image_userdata)
        except Exception, ex:
            log.error("Error starting instance: %s", ex)
            if "TooManyInstances" in ex:
                raise ClusterError(ex)
            else:
                raise InstanceError(ex)
コード例 #2
0
ファイル: conf.py プロジェクト: gmauro/elasticluster
def _validate_and_convert(cfgtree, evict_on_error=True):
    objtree = {}
    for section, model in SCHEMA.iteritems():
        if section not in cfgtree:
            continue
        stanzas = cfgtree[section]
        objtree[section] = {}
        for name, properties in stanzas.iteritems():
            log.debug("Checking section `%s/%s` ...", section, name)
            try:
                objtree[section][name] = Schema(model).validate(properties)
                # further checks for cloud providers
                if section == 'cloud':
                    objtree[section][name] = _validate_cloud_section(objtree[section][name])
                # check node name pattern in clusters conforms to RFC952
                if section == 'cluster':
                    _validate_node_group_names(objtree[section][name])
            except (SchemaError, ValueError) as err:
                log.error("In section `%s/%s`: %s", section, name, err)
                if evict_on_error:
                    log.error(
                        "Dropping configuration section `%s/%s`"
                        " because of the above errors", section, name)
                    # `objtree[section][name]` exists if the except was raised
                    # by the second validation (line 650)
                    if name in objtree[section]:
                        del objtree[section][name]
    return objtree
コード例 #3
0
ファイル: openstack.py プロジェクト: gc3-uzh-ch/elasticluster
 def _get_os_config_value(thing, value, varnames, default=_NO_DEFAULT):
     assert varnames, "List of env variable names cannot be empty"
     for varname in varnames:
         env_value = os.getenv(varname, None)
         if env_value is not None:
             if value is not None and env_value != value:
                 warn("OpenStack {thing} is present both in the environment"
                      " and the config file. Environment variable {varname}"
                      " takes precedence, but this may change in the future."
                      .format(thing=thing, varname=varname),
                      FutureWarning)
             else:
                 log.debug('OpenStack %s taken from env variable %s',
                           thing, varname)
             return env_value
     if value:
         return value
     elif default is _NO_DEFAULT:
         # first variable name is preferred; others are for backwards-compatibility only
         raise RuntimeError(
             "There is no default value for OpenStack {0};"
             " please specify one in the config file"
             " or using environment variable {1}."
             .format(thing, varnames[0]))
     else:
         return default
コード例 #4
0
ファイル: ec2_boto.py プロジェクト: kzielnicki/elasticluster
    def _connect(self):
        """Connects to the ec2 cloud provider

        :return: :py:class:`boto.ec2.connection.EC2Connection`
        :raises: Generic exception on error
        """
        # check for existing connection
        if self._connection:
            return self._connection

        try:
            log.debug("Connecting to ec2 host %s", self._ec2host)
            region = ec2.regioninfo.RegionInfo(name=self._region_name,
                                               endpoint=self._ec2host)

            # connect to webservice
            self._connection = boto.connect_ec2(
                aws_access_key_id=self._access_key,
                aws_secret_access_key=self._secret_key,
                is_secure=self._secure,
                host=self._ec2host, port=self._ec2port,
                path=self._ec2path, region=region)

            # list images to see if the connection works
            log.debug("Connection has been successful.")
            # images = self._connection.get_all_images()
            # log.debug("%d images found on cloud %s",
            #           len(images), self._ec2host)

        except Exception as e:
            log.error("connection to cloud could not be "
                      "established: message=`%s`", str(e))
            raise

        return self._connection
コード例 #5
0
ファイル: cluster.py プロジェクト: dvischi/TissueMAPS
    def stop(self, force=False, wait=False):
        """
        Terminate all VMs in this cluster and delete its repository.

        :param bool force:
          remove cluster from storage even if not all nodes could be stopped.
        """
        log.debug("Stopping cluster `%s` ...", self.name)

        failed = self._stop_all_nodes(wait)

        if failed:
            if force:
                self._delete_saved_data()
                log.warning(
                    "Not all cluster nodes have been terminated."
                    " However, as requested, data about the cluster"
                    " has been removed from local storage.")
            else:
                self.repository.save_or_update(self)
                log.warning(
                    "Not all cluster nodes have been terminated."
                    " Fix errors above and re-run `elasticluster stop %s`",
                    self.name)
        else:
            self._delete_saved_data()
コード例 #6
0
ファイル: cluster.py プロジェクト: eduardoMTB/elasticluster
    def _start_node(node):
        """Static method to start a specific node on a cloud

        :return: bool -- True on success, False otherwise
        """
        log.debug("_start_node: working on node %s" % node.name)
        # TODO: the following check is not optimal yet. When a
        # node is still in a starting state,
        # it will start another node here,
        # since the `is_alive` method will only check for
        # running nodes (see issue #13)
        if node.is_alive():
            log.info("Not starting node %s which is "
                     "already up&running.", node.name)
            return True
        else:
            try:
                node.start()
                log.info("_start_node: node has been started")
                return True
            except KeypairError as e:
                return e
            except Exception as e:
                log.error("could not start node `%s` for reason "
                          "`%s`" % (node.name, e))
                return None
コード例 #7
0
ファイル: ec2_boto.py プロジェクト: ngbinh/elasticluster
    def _allocate_address(self, instance):
        """Allocates a free public ip address to the given instance

        :param instance: instance to assign address to
        :type instance: py:class:`boto.ec2.instance.Reservation`

        :return: public ip address
        """
        connection = self._connect()
        addresses = connection.get_all_addresses()
        for address in addresses:
            # Find an unused address
            if not address.instance_id:
                # Free address, use it.
                instance.use_ip(address)
                log.debug("Assigning ip address `%s` to instance `%s`"
                          % (address.public_ip, instance.id))
                return address.public_ip

        # No allocated addresses available.
        try:
            address = connection.allocate_address()
            instance.use_ip(address)
            return address.public_ip
        except Exception, ex:
            log.error("Unable to allocate a public IP address to instance `%s`",
                      instance.id)
コード例 #8
0
ファイル: cluster.py プロジェクト: briceburg/elasticluster
 def stop(self, force=False):
     """
     Terminates all instances corresponding to this cluster and
     deletes the cluster storage.
     """
     for node in self.get_all_nodes():
         try:
             node.stop()
             self.nodes[node.type].remove(node)
         except:
             # Boto does not always raises an `Exception` class!
             log.error("could not stop instance `%s`, it might "
                       "already be down.", node.instance_id)
     if not self.get_all_nodes():
         log.debug("Removing cluster %s.", self.name)
         self._setup_provider.cleanup()
         self._storage.delete_cluster(self.name)
     elif not force:
         log.warning("Not all instances have been terminated. "
                     "Please rerun the `elasticluster stop %s`", self.name)
         self._storage.dump_cluster(self)
     else:
         log.warning("Not all instances have been terminated. However, "
                     "as requested, the cluster has been force-removed.")
         self._setup_provider.cleanup()
         self._storage.delete_cluster(self.name)
コード例 #9
0
    def __prepare_key_pair(self, key_name, private_key_path, public_key_path, password):
        if not key_name:
            log.warn('user_key_name has not been defined, assuming password-based authentication')
            return

        if key_name in [k.name for k in self.driver.list_key_pairs()]:
            log.info('Key pair `%s` already exists, skipping import.', key_name)
            return

        if public_key_path:
            log.debug("importing public key from file %s ...", public_key_path)
            if not self.driver.import_key_pair_from_file(
                    name=key_name,
                    key_file_path=os.path.expandvars(os.path.expanduser(public_key_path))):
                raise KeypairError(
                    'Could not upload public key {p}'
                    .format(p=public_key_path))
        elif private_key_path:
            if not private_key_path.endswith('.pem'):
                raise KeypairError(
                    'can only work with .pem private keys,'
                    ' derive public key and set user_key_public')
            log.debug("deriving and importing public key from private key")
            self.__import_pem(key_name, private_key_path, password)
        else:
            pem_file_path = os.path.join(self.storage_path, key_name + '.pem')
            if not os.path.exists(pem_file_path):
                with open(pem_file_path, 'w') as new_key_file:
                    new_key_file.write(
                        self.driver.create_key_pair(name=key_name))
            self.__import_pem(key_name, pem_file_path, password)
コード例 #10
0
    def __init__(self, driver_name, storage_path=None, **options):
        self.storage_path = storage_path
        driver_name = driver_name.lower()
        try:
            req_args = self.provider_args[driver_name]
            if not set(req_args).issubset(options):
                raise ValueError(
                    'Cloud provider {0} requires all of {1} to be set'
                    .format(driver_name, ' '.join(req_args)))
            args = [options.pop(name) for name in req_args]
        except KeyError:
            # no required args?!
            args = []
        # fix for openstack
        if 'auth_url' in options and 'ex_force_auth_url' not in options:
            options['ex_force_auth_url'] = options['auth_url'].rsplit('/', 1)[0]

        try:
            provider_name = getattr(Provider, driver_name.upper())
        except AttributeError:
            raise ValueError(
                "No libcloud driver for provider {name}"
                .format(name=driver_name))
        driver_class = get_driver(provider_name)
        log.debug(
            "Initializing libcloud driver `%s` ...",
            driver_class.__name__)
        self.driver = driver_class(*args, **options)
コード例 #11
0
ファイル: cluster.py プロジェクト: dvischi/TissueMAPS
    def _stop_all_nodes(self, wait=False):
        """
        Terminate all cluster nodes. Return number of failures.
        """
        failed = 0
        for node in self.get_all_nodes():
            if not node.instance_id:
                log.warning(
                    "Node `%s` has no instance ID."
                    " Assuming it did not start correctly,"
                    " so removing it anyway from the cluster.", node.name)
                self.nodes[node.kind].remove(node)
                continue
            # try and stop node
            try:
                # wait and pause for and recheck.
                node.stop(wait)

                self.nodes[node.kind].remove(node)
                log.debug(
                    "Removed node `%s` from cluster `%s`", node.name, self.name)
            except InstanceNotFoundError as err:
                log.info(
                    "Node `%s` (instance ID `%s`) was not found;"
                    " assuming it has already been terminated.",
                    node.name, node.instance_id)
            except Exception as err:
                failed += 1
                log.error(
                    "Could not stop node `%s` (instance ID `%s`): %s %s",
                    node.name, node.instance_id, err, err.__class__)
        return failed
コード例 #12
0
    def _write_extra_vars(self, cluster, filename='extra_vars.yml'):
        # build dict of "extra vars"
        # XXX: we should not repeat here names of attributes that
        # should not be exported... it would be better to use a simple
        # naming convention (e.g., omit whatever starts with `_`)

        extra_vars = cluster.to_vars_dict()
        extra_vars.update(extra_vars.pop('extra', {}))
        extra_vars['cloud'] = cluster.cloud_provider.to_vars_dict()
        nodes = extra_vars.pop('nodes')
        extra_vars['nodes'] = {}
        for kind, instances in nodes.items():
            for node in instances:
                node_vars = node.to_vars_dict()
                node_vars.update(node_vars.pop('extra', {}))
                extra_vars['nodes'][node.name] = node_vars
        extra_vars['output_dir'] = os.getcwd()
        # save it to a YAML file
        log.debug("Writing extra vars %r to file %s", extra_vars, filename)
        with open(filename, 'w') as output:
            # ensure output file is not readable to other users,
            # as it may contain passwords
            os.fchmod(output.fileno(), 0o600)
            # dump variables in YAML format for Ansible to read
            yaml.dump({ 'elasticluster': extra_vars }, output)
        return filename
コード例 #13
0
ファイル: ec2_boto.py プロジェクト: gc3-uzh-ch/elasticluster
    def _find_vpc_by_name(self, vpc_name):
        vpc_connection = boto.vpc.connect_to_region(
            self._region_name,
            aws_access_key_id=self._access_key,
            aws_secret_access_key=self._secret_key,
            is_secure=self._secure,
            host=self._ec2host,
            port=self._ec2port,
            path=self._ec2path,
        )
        log.debug("VPC connection has been successful.")

        for vpc in vpc_connection.get_all_vpcs():
            matches = [vpc.id]
            if 'Name' in vpc.tags:
                matches.append(vpc.tags['Name'])
            if vpc_name in matches:
                vpc_id = vpc.id
                if vpc_name != vpc_id:
                    # then `vpc_name` is the VPC name
                    log.debug("VPC `%s` has ID `%s`", vpc_name, vpc_id)
                break
        else:
            raise VpcError('Cannot find VPC `{0}`.'.format(vpc_name))

        return (vpc_connection, vpc_id)
コード例 #14
0
ファイル: openstack.py プロジェクト: gc3-uzh-ch/elasticluster
 def __init_keystone_session_v2(self, check=False):
     """Create and return a session object using Keystone API v2."""
     from keystoneauth1 import loading as keystone_v2
     loader = keystone_v2.get_plugin_loader('password')
     auth = loader.load_from_options(
         auth_url=self._os_auth_url,
         username=self._os_username,
         password=self._os_password,
         project_name=self._os_tenant_name,
     )
     sess = keystoneauth1.session.Session(auth=auth, verify=self._os_cacert)
     if check:
         log.debug("Checking that Keystone API v2 session works...")
         try:
             # if session is invalid, the following will raise some exception
             nova = nova_client.Client(self._compute_api_version, session=sess, cacert=self._os_cacert)
             nova.flavors.list()
         except keystoneauth1.exceptions.NotFound as err:
             log.warning("Creating Keystone v2 session failed: %s", err)
             return None
         except keystoneauth1.exceptions.ClientException as err:
             log.error("OpenStack server rejected request (likely configuration error?): %s", err)
             return None  # FIXME: should we be raising an error instead?
     # if we got to this point, v2 session is valid
     log.info("Using Keystone API v2 session to authenticate to OpenStack")
     return sess
コード例 #15
0
ファイル: openstack.py プロジェクト: gc3-uzh-ch/elasticluster
    def _allocate_address_nova(self, instance, network_ids):
        """
        Allocates a floating/public ip address to the given instance,
        using the OpenStack Compute ('Nova') API.

        :param instance: instance to assign address to

        :param list network_id: List of IDs (as strings) of networks
          where to request allocation the floating IP.  **Ignored**
          (only used by the corresponding Neutron API function).

        :return: public ip address
        """
        self._init_os_api()
        with OpenStackCloudProvider.__node_start_lock:
            # Use the `novaclient` API (works with python-novaclient <8.0.0)
            free_ips = [ip for ip in self.nova_client.floating_ips.list() if not ip.fixed_ip]
            if not free_ips:
                log.debug("Trying to allocate a new floating IP ...")
                free_ips.append(self.nova_client.floating_ips.create())
            if free_ips:
                ip = free_ips.pop()
            else:
                raise RuntimeError(
                    "Could not allocate floating IP for VM {0}"
                    .format(instance_id))
            instance.add_floating_ip(ip)
        return ip.ip
コード例 #16
0
ファイル: openstack.py プロジェクト: dvischi/TissueMAPS
    def _check_security_groups(self, names):
        """
        Raise an exception if any of the named security groups does not exist.

        :param List[str] groups: List of security group names
        :raises: `SecurityGroupError` if group does not exist
        """
        log.debug("Checking existence of security group(s) %s ...", names)
        try:
            # python-novaclient < 8.0.0
            security_groups = self.nova_client.security_groups.list()
            existing = set(sg.name for sg in security_groups)
        except AttributeError:
            security_groups = self.neutron_client.list_security_groups()['security_groups']
            existing = set(sg[u'name'] for sg in security_groups)

        # TODO: We should be able to create the security group if it
        # doesn't exist and at least add a rule to accept ssh access.
        # Also, we should be able to add new rules to a security group
        # if needed.
        nonexisting = set(names) - existing
        if nonexisting:
            raise SecurityGroupError(
                "Security group(s) `{0}` do not exist"
                .format(', '.join(nonexisting)))

        # if we get to this point, all sec groups exist
        return True
コード例 #17
0
    def execute(self):
        creator = make_creator(self.params.config,
                               storage_path=self.params.storage)
        cluster_name = self.params.cluster
        try:
            cluster = creator.load_cluster(cluster_name)
            cluster.update()
        except (ClusterNotFound, ConfigurationError) as ex:
            log.error("Setting up cluster %s: %s\n" %
                      (cluster_name, ex))
            return

        if self.params.ssh_to:
            try:
                nodes = dict((n.name,n) for n in cluster.get_all_nodes())
                frontend = nodes[self.params.ssh_to]
            except KeyError:
                raise ValueError(
                    "Hostname %s not found in cluster %s" % (self.params.ssh_to, cluster_name))
        else:
            frontend = cluster.get_frontend_node()
        try:
            # ensure we can connect to the host
            if not frontend.preferred_ip:
                # Ensure we can connect to the node, and save the value of `preferred_ip`

                ssh = frontend.connect(keyfile=cluster.known_hosts_file)
                if ssh:
                    ssh.close()
                cluster.repository.save_or_update(cluster)

        except NodeNotFound as ex:
            log.error("Unable to connect to the frontend node: %s" % str(ex))
            sys.exit(1)
        host = frontend.connection_ip()

        # check for nonstandard port, either IPv4 or IPv6
        addr = host
        port = str(SSH_PORT)
        if ':' in host:
            match = IPV6_RE.match(host)
            if match:
                addr = match.groups()[0]
                port = match.groups()[1]
            else:
                addr, _, port = host.partition(':')

        username = frontend.image_user
        knownhostsfile = cluster.known_hosts_file if cluster.known_hosts_file \
                         else '/dev/null'
        ssh_cmdline = ["ssh",
                       "-i", frontend.user_key_private,
                       "-o", "UserKnownHostsFile=%s" % knownhostsfile,
                       "-o", "StrictHostKeyChecking=yes",
                       "-p", port,
                       '%s@%s' % (username, addr)]
        ssh_cmdline.extend(self.params.ssh_args)
        log.debug("Running command `%s`" % str.join(' ', ssh_cmdline))
        os.execlp("ssh", *ssh_cmdline)
コード例 #18
0
ファイル: ec2_boto.py プロジェクト: HPCNow/elasticluster
    def _connect(self):
        """Connects to the ec2 cloud provider

        :return: :py:class:`boto.ec2.connection.EC2Connection`
        :raises: Generic exception on error
        """
        # check for existing connection
        if self._ec2_connection:
            return self._ec2_connection

        try:
            log.debug("Connecting to ec2 host %s", self._ec2host)
            region = ec2.regioninfo.RegionInfo(name=self._region_name,
                                               endpoint=self._ec2host)

            # connect to webservice
            ec2_connection = boto.connect_ec2(
                aws_access_key_id=self._access_key,
                aws_secret_access_key=self._secret_key,
                is_secure=self._secure,
                host=self._ec2host, port=self._ec2port,
                path=self._ec2path, region=region)
            log.debug("EC2 connection has been successful.")

            if self._vpc:
                vpc_connection = boto.connect_vpc(
                    aws_access_key_id=self._access_key,
                    aws_secret_access_key=self._secret_key,
                    is_secure=self._secure,
                    host=self._ec2host, port=self._ec2port,
                    path=self._ec2path, region=region)
                log.debug("VPC connection has been successful.")

                for vpc in vpc_connection.get_all_vpcs():
                    log.debug("Checking whether %s matches %s/%s" %
                        (self._vpc, vpc.tags['Name'], vpc.id))
                    if self._vpc in [vpc.tags['Name'], vpc.id]:
                        self._vpc_id = vpc.id
                        if self._vpc != self._vpc_id:
                            log.debug("VPC %s matches %s" %
                                (self._vpc, self._vpc_id))
                        break
                else:
                    raise VpcError('VPC %s does not exist.' % self._vpc)

            # list images to see if the connection works
            # images = self._ec2_connection.get_all_images()
            # log.debug("%d images found on cloud %s",
            #           len(images), self._ec2host)

        except Exception as e:
            log.error("connection to ec2 could not be "
                      "established: message=`%s`", str(e))
            raise

        self._ec2_connection, self._vpc_connection = (
            ec2_connection, vpc_connection)
        return self._ec2_connection
コード例 #19
0
ファイル: cluster.py プロジェクト: briceburg/elasticluster
 def start(self):
     """
     Starts an instance for this node on the cloud through the
     clode provider. This method is non-blocking, as soon as the
     node id is returned from the cloud provider, it will return.
     """
     log.info("Starting node %s.", self.name)
     self.instance_id = self._cloud_provider.start_instance(
         self.user_key_name, self.user_key_public, self.security_group,
         self.flavor, self.image, self.image_userdata)
     log.debug("Node %s has instance_id: `%s`", self.name, self.instance_id)
コード例 #20
0
ファイル: cluster.py プロジェクト: corburn/elasticluster
    def update_config(self, cluster_config, login_config):
        """Update current configuration.

        This method is usually called after loading a `Cluster`
        instance from a persistent storage. Note that not all fields
        are actually updated, but only those that can be safely
        updated.
        """

        oldvalue = self.__update_option(cluster_config, 'ssh_to', 'ssh_to')
        if oldvalue:
            log.debug("Attribute 'ssh_to' updated: %s -> %s", oldvalue, self.ssh_to)
コード例 #21
0
ファイル: openstack.py プロジェクト: ngbinh/elasticluster
    def start_instance(self, key_name, public_key_path, private_key_path,
                       security_group, flavor, image_id, image_userdata,
                       username=None, node_name=None, **kwargs):
        """Starts a new instance on the cloud using the given properties.
        The following tasks are done to start an instance:

        * establish a connection to the cloud web service
        * check ssh keypair and upload it if it does not yet exist. This is
          a locked process, since this function might be called in multiple
          threads and we only want the key to be stored once.
        * check if the security group exists
        * run the instance with the given properties

        :param str key_name: name of the ssh key to connect
        :param str public_key_path: path to ssh public key
        :param str private_key_path: path to ssh private key
        :param str security_group: firewall rule definition to apply on the
                                   instance
        :param str flavor: machine type to use for the instance
        :param str image_id: image type (os) to use for the instance
        :param str image_userdata: command to execute after startup
        :param str username: username for the given ssh key, default None

        :return: str - instance id of the started instance
        """

        log.debug("Checking keypair `%s`.", key_name)
        with OpenStackCloudProvider.__node_start_lock:
            self._check_keypair(key_name, public_key_path, private_key_path)

        log.debug("Checking security group `%s`.", security_group)
        self._check_security_group(security_group)

        # Check if the image id is present.
        images = self._get_images()
        if image_id not in [img.id for img in images]:
            raise ImageError("No image found with id '%s' on cloud "
                             "%s" % (image_id, self._os_auth_url))

        # Check if the flavor exists
        flavors = [fl for fl in self._get_flavors() if fl.name == flavor]
        if not flavors:
            raise FlavorError("No flavor found with name %s on cloud "
                              "%s" % (flavor, self._os_auth_url))
        flavor = flavors[0]

        vm = self.client.servers.create(
            node_name, image_id, flavor, key_name=key_name,
            security_groups=[security_group], userdata=image_userdata)

        self._instances[vm.id] = vm
        return vm.id
コード例 #22
0
    def execute(self):
        creator = make_creator(self.params.config,
                               storage_path=self.params.storage)
        cluster_name = self.params.cluster
        try:
            cluster = creator.load_cluster(cluster_name)
        except (ClusterNotFound, ConfigurationError) as ex:
            log.error("Setting up cluster %s: %s", cluster_name, ex)
            return

        # XXX: the default value of `self.params.ssh_to` should = the
        # default value for `ssh_to` in `Cluster.get_ssh_to_node()`
        frontend = cluster.get_ssh_to_node(self.params.ssh_to)
        log.debug("Updating the ip addresses of `%s`.", frontend.name)
        frontend.update_ips()

        # ensure we can connect to the host
        try:
            if not frontend.preferred_ip:
                # Ensure we can connect to the node, and save the value of `preferred_ip`
                ssh = frontend.connect(keyfile=cluster.known_hosts_file)
                if ssh:
                    ssh.close()
                cluster.repository.save_or_update(cluster)
        except NodeNotFound as ex:
            log.error("Unable to connect to the frontend node: %s", ex)
            sys.exit(1)

        # now delegate real connection to `ssh`
        host = frontend.connection_ip()
        if not host:
            log.error("No IP address known for node %s", frontend.name)
            sys.exit(1)
        addr, port = parse_ip_address_and_port(host)
        username = frontend.image_user
        knownhostsfile = cluster.known_hosts_file if cluster.known_hosts_file \
                         else '/dev/null'
        cmdline = [self.command,
                   "-i", frontend.user_key_private,
                   "-o", "UserKnownHostsFile={0}".format(knownhostsfile),
                   "-o", "StrictHostKeyChecking=yes",
                   "-o", "Port={0:d}".format(port),
                   '%s@%s' % (username, addr)]
        if cluster.ssh_proxy_command:
            cmdline[1:1] = [
                '-o', ('ProxyCommand=' +
                       expand_ssh_proxy_command(
                           cluster.ssh_proxy_command,
                           username, addr, port))]
        cmdline.extend(self.params.cmds)
        log.debug("Running command `%s`", ' '.join(cmdline))
        os.execlp(self.command, *cmdline)
コード例 #23
0
ファイル: ec2_boto.py プロジェクト: gc3-uzh-ch/elasticluster
    def _connect(self):
        """
        Connect to the EC2 cloud provider.

        :return: :py:class:`boto.ec2.connection.EC2Connection`
        :raises: Generic exception on error
        """
        # check for existing connection
        if self._ec2_connection:
            return self._ec2_connection

        try:
            log.debug("Connecting to EC2 endpoint %s", self._ec2host)

            # connect to webservice
            ec2_connection = boto.ec2.connect_to_region(
                self._region_name,
                aws_access_key_id=self._access_key,
                aws_secret_access_key=self._secret_key,
                is_secure=self._secure,
                host=self._ec2host,
                port=self._ec2port,
                path=self._ec2path,
            )
            # With the loose setting `BOTO_USE_ENDPOINT_HEURISTICS`
            # which is necessary to work around issue #592, Boto will
            # now accept *any* string as an AWS region name;
            # furthermore, it *always* returns a connection object --
            # so the only way to check that we are not going to run
            # into trouble is to check that there *is* a valid host
            # name on the other end of the connection.
            if ec2_connection.host:
                log.debug("EC2 connection has been successful.")
            else:
                raise CloudProviderError(
                    "Cannot establish connection to EC2 region {0}"
                    .format(self._region_name))

            if not self._vpc:
                vpc_connection = None
                self._vpc_id = None
            else:
                vpc_connection, self._vpc_id = self._find_vpc_by_name(self._vpc)

        except Exception as err:
            log.error("Error connecting to EC2: %s", err)
            raise

        self._ec2_connection, self._vpc_connection = (
            ec2_connection, vpc_connection)
        return self._ec2_connection
コード例 #24
0
ファイル: cluster.py プロジェクト: corburn/elasticluster
 def start(self):
     """Starts the node on the cloud using the given
     instance properties. This method is non-blocking, as soon
     as the node id is returned from the cloud provider, it will return.
     Therefore the `is_alive` and `update_ips` methods can be used to
     further gather details about the state of the node.
     """
     log.info("Starting node %s.", self.name)
     self.instance_id = self._cloud_provider.start_instance(
         self.user_key_name, self.user_key_public, self.user_key_private,
         self.security_group,
         self.flavor, self.image_id, self.image_userdata,
         username=self.image_user, node_name="%s-%s" % (self.cluster_name, self.name), **self.extra)
     log.debug("Node %s has instance_id: `%s`", self.name, self.instance_id)
コード例 #25
0
ファイル: cluster.py プロジェクト: supr/elasticluster
    def connect(self):
        """Connect to the node via ssh using the paramiko library.

        :return: :py:class:`paramiko.SSHClient` - ssh connection or None on
                 failure
        """
        ssh = paramiko.SSHClient()
        ssh.set_missing_host_key_policy(IgnorePolicy())
        remote_ip = self.connection_ip()
        if not self.ip_public:
            log.debug("Instance id '%s' has no public ip, using private IP "
                      "'%s' for connecting", self.instance_id, self.ip_private)

        try:
            log.debug("Trying to connect to host %s (%s)",
                      self.name, remote_ip)
            ssh.connect(remote_ip,
                        username=self.image_user,
                        allow_agent=True,
                        key_filename=self.user_key_private,
                        timeout=Node.connection_timeout)
            log.debug("Connection to %s succeded!", remote_ip)
            return ssh
        except socket.error, ex:
            log.debug("Host %s (%s) not reachable: %s.",
                      self.name, remote_ip, ex)
コード例 #26
0
ファイル: conf.py プロジェクト: atulmathur/elasticluster
    def create_setup_provider(self, cluster_template, name=None):
        """Creates the setup provider for the given cluster template.

        :param str cluster_template: template of the cluster
        :param str name: name of the cluster to read configuration properties
        """
        conf = self.cluster_conf[cluster_template]['setup']
        conf['general_conf'] = self.general_conf.copy()
        if name:
            conf['cluster_name'] = name
        conf_login = self.cluster_conf[cluster_template]['login']

        provider_name = conf.get('provider')
        if provider_name not in Configurator.setup_providers_map:
            raise ConfigurationError(
                "Invalid value `%s` for `setup_provider` in configuration "
                "file." % provider_name)

        storage_path = self.general_conf['storage_path']
        if 'playbook_path' in conf:
            playbook_path = conf['playbook_path']
            del conf['playbook_path']
        else:
            playbook_path = None
        groups = dict((k[:-7], v.split(',')) for k, v
                      in conf.items() if k.endswith('_groups'))
        environment = dict()
        for nodekind, grps in groups.iteritems():
            if not isinstance(grps, list):
                groups[nodekind] = [grps]

            # Environment variables parsing
            environment[nodekind] = dict()
            for key, value in list(conf.items()) + list(self.cluster_conf[cluster_template]['cluster'].items()):
                # Set both group and global variables
                for prefix in ["%s_var_" % nodekind,
                               "global_var_"]:
                    if key.startswith(prefix):
                        var = key.replace(prefix, '')
                        environment[nodekind][var] = value
                        log.debug("setting variable %s=%s for node kind %s",
                                  var, value, nodekind)

        provider = Configurator.setup_providers_map[provider_name]
        return provider(groups, playbook_path=playbook_path,
                        environment_vars=environment,
                        storage_path=storage_path,
                        sudo=conf_login['image_sudo'],
                        sudo_user=conf_login['image_user_sudo'],
                        **conf)
コード例 #27
0
    def _init_az_api(self):
        """
        Initialise client objects for talking to Azure API.

        This is in a separate function so to be called by ``__init__``
        and ``__setstate__``.
        """
        with self.__lock:
            if self._resource_client is None:
                log.debug("Making Azure `ServicePrincipalcredentials` object"
                          " with tenant=%r, client_id=%r, secret=%r ...",
                          self.tenant_id, self.client_id,
                          ('<redacted>' if self.secret else None))
                credentials = ServicePrincipalCredentials(
                    tenant=self.tenant_id,
                    client_id=self.client_id,
                    secret=self.secret,
                )
                log.debug("Initializing Azure `ComputeManagementclient` ...")
                self._compute_client = ComputeManagementClient(credentials, self.subscription_id)
                log.debug("Initializing Azure `NetworkManagementclient` ...")
                self._network_client = NetworkManagementClient(credentials, self.subscription_id)
                log.debug("Initializing Azure `ResourceManagementclient` ...")
                self._resource_client = ResourceManagementClient(credentials, self.subscription_id)
                log.info("Azure API clients initialized.")
コード例 #28
0
ファイル: cluster.py プロジェクト: kern3020/elasticluster
    def is_alive(self):
        """Checks if the current node is up and running in the cloud. It
        only checks the status provided by the cloud interface. Therefore a
        node might be running, but not yet ready to ssh into it.
        """
        running = False
        if not self.instance_id:
            return False

        try:
            log.debug("Getting information for instance %s", self.instance_id)
            running = self._cloud_provider.is_instance_running(self.instance_id)
        except Exception, ex:
            log.debug("Ignoring error while looking for vm id %s: %s", self.instance_id, str(ex))
コード例 #29
0
ファイル: cluster.py プロジェクト: corburn/elasticluster
    def is_alive(self):
        """Checks if the current node is up and running in the cloud. It
        only checks the status provided by the cloud interface. Therefore a
        node might be running, but not yet ready to ssh into it.
        """
        running = False
        if not self.instance_id:
            return False

        try:
            log.debug("Getting information for instance %s",
                      self.instance_id)
            running = self._cloud_provider.is_instance_running(
                self.instance_id)
        except Exception as ex:
            log.debug("Ignoring error while looking for vm id %s: %s",
                      self.instance_id, str(ex))
        if running:
            log.debug("node `%s` (instance id %s) is up and running",
                      self.name, self.instance_id)
            self.update_ips()
        else:
            log.debug("node `%s` (instance id `%s`) still building...",
                      self.name, self.instance_id)

        return running
コード例 #30
0
ファイル: ec2_boto.py プロジェクト: kzielnicki/elasticluster
    def start_instance(self, key_name, public_key_path, private_key_path,
                       security_group, flavor, image_id, image_userdata,
                       username=None, node_name=None, **kwargs):
        """Starts a new instance on the cloud using the given properties.
        The following tasks are done to start an instance:

        * establish a connection to the cloud web service
        * check ssh keypair and upload it if it does not yet exist. This is
          a locked process, since this function might be called in multiple
          threads and we only want the key to be stored once.
        * check if the security group exists
        * run the instance with the given properties

        :param str key_name: name of the ssh key to connect
        :param str public_key_path: path to ssh public key
        :param str private_key_path: path to ssh private key
        :param str security_group: firewall rule definition to apply on the
                                   instance
        :param str flavor: machine type to use for the instance
        :param str image_id: image type (os) to use for the instance
        :param str image_userdata: command to execute after startup
        :param str username: username for the given ssh key, default None

        :return: str - instance id of the started instance
        """
        connection = self._connect()

        log.debug("Checking keypair `%s`.", key_name)
        # the `_check_keypair` method has to be called within a lock,
        # since it will upload the key if it does not exist and if this
        # happens for every node at the same time ec2 will throw an error
        # message (see issue #79)
        with BotoCloudProvider.__node_start_lock:
            self._check_keypair(key_name, public_key_path, private_key_path)

        log.debug("Checking security group `%s`.", security_group)
        self._check_security_group(security_group)
        # image_id = self._find_image_id(image_id)

        try:
            reservation = connection.run_instances(
                image_id, key_name=key_name, security_groups=[security_group],
                instance_type=flavor, user_data=image_userdata)
        except Exception, ex:
            log.error("Error starting instance: %s", ex)
            if "TooManyInstances" in ex:
                raise ClusterError(ex)
            else:
                raise InstanceError(ex)
コード例 #31
0
    def _build_inventory(self, cluster):
        """
        Builds the inventory for the given cluster and returns its path

        :param cluster: cluster to build inventory for
        :type cluster: :py:class:`elasticluster.cluster.Cluster`
        """
        inventory_data = defaultdict(list)

        for node in cluster.get_all_nodes():
            if node.preferred_ip is None:
                log.warning(
                    "Ignoring node `{0}`: No IP address."
                    .format(node.name))
                continue
            if node.kind not in self.groups:
                # FIXME: should this raise a `ConfigurationError` instead?
                log.warning(
                    "Ignoring node `{0}`:"
                    " Node kind `{1}` not defined in cluster!"
                    .format(node.name, node.kind))
                continue

            extra_vars = ['ansible_user=%s' % node.image_user]

            ip_addr, port = parse_ip_address_and_port(node.preferred_ip)
            if port != 22:
                extra_vars.append('ansible_port=%s' % port)

            if node.kind in self.environment:
                extra_vars.extend('%s=%s' % (k, v) for k, v in
                                  self.environment[node.kind].items())
            for group in self.groups[node.kind]:
                inventory_data[group].append(
                    (node.name, ip_addr, str.join(' ', extra_vars)))

        if not inventory_data:
            log.info("No inventory file was created.")
            return None

        # create a temporary file to pass to ansible, since the
        # api is not stable yet...
        if self._storage_path_tmp:
            if not self._storage_path:
                self._storage_path = tempfile.mkdtemp()
            elasticluster.log.warning(
                "Writing inventory file to tmp dir `%s`", self._storage_path)

        inventory_path = os.path.join(
            self._storage_path, (cluster.name + '.inventory'))
        log.debug("Writing Ansible inventory to file `%s` ...", inventory_path)
        with open(inventory_path, 'w+') as inventory_file:
            for section, hosts in inventory_data.items():
                # Ansible throws an error "argument of type 'NoneType' is not
                # iterable" if a section is empty, so ensure we have something
                # to write in there
                if hosts:
                    inventory_file.write("\n[" + section + "]\n")
                    for host in hosts:
                        hostline = "{0} ansible_host={1} {2}\n".format(*host)
                        inventory_file.write(hostline)
        return inventory_path
コード例 #32
0
    def start_instance(self,
                       # these are common to any
                       # CloudProvider.start_instance() call
                       key_name, public_key_path, private_key_path,
                       security_group, flavor, image_id, image_userdata,
                       username=None,
                       # these params are specific to the
                       # GoogleCloudProvider
                       node_name=None,
                       boot_disk_type='pd-standard',
                       boot_disk_size=10,
                       tags=None,
                       scheduling=None,
                       accelerator_count=0,
                       accelerator_type='default',
                       allow_project_ssh_keys=True,
                       min_cpu_platform=None,
                       **kwargs):
        """
        Starts a new instance with the given properties and returns
        the instance id.

        :param str key_name: name of the ssh key to connect
        :param str public_key_path: path to ssh public key
        :param str private_key_path: path to ssh private key
        :param str security_group: firewall rule definition to apply on the
                                   instance
        :param str flavor: machine type to use for the instance
        :param str image_id: image type (os) to use for the instance
        :param str image_userdata: command to execute after startup
        :param str username: username for the given ssh key, default None
        :param str node_name: name of the instance
        :param str|Sequence tags: "Tags" to label the instance.
          Can be either a single string (individual tags are comma-separated),
          or a sequence of strings (each string being a single tag).
        :param str scheduling: scheduling option to use for the instance ("preemptible")
        :param int accelerator_count: Number of accelerators (e.g., GPUs) to make available in instance
        :param str accelerator_type: Type of accelerator to request.  Can be one of:

          * Full URL specifying an accelerator type valid for the zone and project VMs are being created in.  For example, ``https://www.googleapis.com/compute/v1/projects/[PROJECT_ID]/zones/[ZONE]/acceleratorTypes/[ACCELERATOR_TYPE]``
          * An accelerator type name (any string which is not a valid URL).  This is internally prefixed with the string ``https://www.googleapis.com/compute/v1/projects/[PROJECT_ID]/zones/[ZONE]/acceleratorTypes/`` to form a full URL.
        :param bool allow_project_ssh_keys:
          When ``True`` (default), SSH login is allowed to a node
          using any of the project-wide SSH keys (if they are
          defined).  When ``False``, only the SSH key specified by
          ElastiCluster config's ``[login/*]`` section will be allowed
          to log in (instance-level key).
        :param str min_cpu_platform: require CPUs of this type or better (e.g., "Intel Skylake")

          Only used if ``accelerator_count`` is > 0.

        :return: str - instance id of the started instance
        """
        # construct URLs
        project_url = '%s%s' % (GCE_URL, self._project_id)
        machine_type_url = '%s/zones/%s/machineTypes/%s' \
                           % (project_url, self._zone, flavor)
        boot_disk_type_url = '%s/zones/%s/diskTypes/%s' \
                           % (project_url, self._zone, boot_disk_type)
        # FIXME: `conf.py` should ensure that `boot_disk_size` has the right
        # type, so there would be no need to convert here
        boot_disk_size_gb = int(boot_disk_size)
        network_url = '%s/global/networks/%s' % (project_url, self._network)
        if image_id.startswith('http://') or image_id.startswith('https://'):
            image_url = image_id
        else:
            # allow image shortcuts (see docstring for IMAGE_NAME_SHORTCUTS)
            for prefix, os_cloud in self.IMAGE_NAME_SHORTCUTS.iteritems():
                if image_id.startswith(prefix + '-'):
                    image_url = '%s%s/global/images/%s' % (
                        GCE_URL, os_cloud, image_id)
                    break
            else:
                raise InstanceError(
                    "Unknown image name shortcut '{0}',"
                    " please use the full `https://...` self-link URL."
                    .format(image_id))

        scheduling_option = {}
        if scheduling == 'preemptible':
            scheduling_option['preemptible'] = True
        elif scheduling is not None:
            raise InstanceError("Unknown scheduling option: '%s'" % scheduling)

        if isinstance(tags, types.StringTypes):
            tags = tags.split(',')
        elif isinstance(tags, collections.Sequence):
            # ok, nothing to do
            pass
        elif tags is not None:
            raise TypeError(
                "The `tags` argument to `gce.start_instance`"
                " should be a string or a list, got {T} instead"
                .format(T=type(tags)))

        with open(public_key_path, 'r') as f:
            public_key_content = f.read()

        compute_metadata = [
            {
                "key": "ssh-keys",
                "value": "%s:%s" % (username, public_key_content),
            },
            {
                "key": "block-project-ssh-keys",
                "value": (not allow_project_ssh_keys),
            },
        ]
        if image_userdata:
            compute_metadata.append({
                "key": "startup-script",
                "value": image_userdata,
            })

        # construct the request body
        if node_name:
            instance_id = node_name.lower().replace('_', '-')  # GCE doesn't allow "_"
        else:
            instance_id = 'elasticluster-%s' % uuid.uuid4()

        instance = {
            'name': instance_id,
            'machineType': machine_type_url,
            'tags': {
              'items': tags,
            },
            'scheduling': scheduling_option,
            'disks': [{
                'autoDelete': 'true',
                'boot': 'true',
                'type': 'PERSISTENT',
                'initializeParams' : {
                    'diskName': "%s-disk" % instance_id,
                    'diskType': boot_disk_type_url,
                    'diskSizeGb': boot_disk_size_gb,
                    'sourceImage': image_url,
                    }
                }],
            'networkInterfaces': [
                {'accessConfigs': [
                    {'type': 'ONE_TO_ONE_NAT',
                     'name': 'External NAT'
                    }],
                 'network': network_url
                }],
            'serviceAccounts': [
                {'email': self._email,
                 'scopes': GCE_DEFAULT_SCOPES
                }],
            "metadata": {
                "kind": "compute#metadata",
                "items": compute_metadata,
            }
        }

        if min_cpu_platform is not None:
            instance['minCpuPlatform'] = min_cpu_platform

        # add accelerators/GPUs if requested
        if accelerator_count > 0:
            if (accelerator_type.startswith('https://')
                or accelerator_type.startswith('http://')):
                # use URL as-is
                accelerator_type_url = accelerator_type
            else:
                accelerator_type_url = (
                    'https://www.googleapis.com/compute/{api_version}/'
                    'projects/{project_id}/zones/{zone}/'
                    'acceleratorTypes/{accelerator_type}'
                    .format(
                        api_version=GCE_API_VERSION,
                        project_id=self._project_id,
                        zone=self._zone,
                        accelerator_type=accelerator_type
                    ))
            log.debug(
                "VM instance `%s`:"
                " Requesting %d accelerator%s of type '%s'",
                instance_id, accelerator_count,
                ('s' if accelerator_count > 1 else ''),
                accelerator_type_url)
            instance['guestAccelerators'] = [
             {
                 'acceleratorCount': accelerator_count,
                 'acceleratorType': accelerator_type_url,
             }
            ]
            # no live migration with GPUs,
            # see: https://cloud.google.com/compute/docs/gpus#restrictions
            instance['scheduling']['onHostMaintenance'] = 'TERMINATE'
            instance['scheduling']['automaticRestart'] = True

        # create the instance
        gce = self._connect()
        request = gce.instances().insert(
            project=self._project_id, body=instance, zone=self._zone)
        try:
            response = self._execute_request(request)
            response = self._wait_until_done(response)
            self._check_response(response)
            return instance_id
        except (HttpError, CloudProviderError) as e:
            log.error("Error creating instance `%s`" % e)
            raise InstanceError("Error creating instance `%s`" % e)
コード例 #33
0
    def start_instance(self,
                       key_name,
                       public_key_path,
                       private_key_path,
                       security_group,
                       flavor,
                       image_id,
                       image_userdata,
                       username='******',
                       node_name=None,
                       **extra):
        """
        Start a new VM using the given properties.

        :param str key_name:
          **unused in Azure**, only present for interface compatibility
        :param str public_key_path:
          path to ssh public key to authorize on the VM (for user `username`, see below)
        :param str private_key_path:
          **unused in Azure**, only present for interface compatibility
        :param str security_group:
          network security group to attach VM to, **currently unused**
        :param str flavor:
          machine type to use for the instance
        :param str image_id:
          disk image to use for the instance;
          has the form *publisher/offer/sku/version*
          (e.g., ``canonical/ubuntuserver/16.04.0-LTS/latest``)
        :param str image_userdata:
          command to execute after startup, **currently unused**
        :param str username:
          username for the given ssh key
          (default is ``root`` as it's always guaranteed to exist,
          but you probably don't want to use that)

        :return: tuple[str, str] -- resource group and node name of the started VM
        """
        self._init_az_api()

        # Warn of unsupported parameters, if set.  We do not warn
        # about `user_key` or `private_key_path` since they come from
        # a `[login/*]` section and those can be shared across
        # different cloud providers.
        if security_group and security_group != 'default':
            warn("Setting `security_group` is currently not supported"
                 " in the Azure cloud; VMs will all be attached to"
                 " a network security group named after the cluster name.")
        if image_userdata:
            warn("Parameter `image_userdata` is currently not supported"
                 " in the Azure cloud and will be ignored.")

        # Use the cluster name to identify the Azure resource group;
        # however, `Node.cluster_name` is not passed down here so
        # extract it from the node name, which always contains it as
        # the substring before the leftmost dash (see `cluster.py`,
        # line 1182)
        cluster_name, _ = node_name.split('-', 1)
        with self.__lock:
            if cluster_name not in self._resource_groups_created:
                self._resource_client.resource_groups.create_or_update(
                    cluster_name, {'location': self.location})
                self._resource_groups_created.add(cluster_name)

        # read public SSH key
        with open(public_key_path, 'r') as public_key_file:
            public_key = public_key_file.read()

        image_publisher, image_offer, \
            image_sku, image_version = self._split_image_id(image_id)

        if not security_group:
            security_group = (cluster_name + '-secgroup')

        net_parameters = {
            'networkSecurityGroupName': {
                'value': security_group,
            },
            'subnetName': {
                'value': cluster_name
            },
        }
        net_name = net_parameters['subnetName']['value']
        with self.__lock:
            if net_name not in self._networks_created:
                log.debug("Creating network `%s` in Azure ...", net_name)
                oper = self._resource_client.deployments.create_or_update(
                    cluster_name, net_name, {
                        'mode': DeploymentMode.incremental,
                        'template': _NET_TEMPLATE,
                        'parameters': net_parameters,
                    })
                oper.wait()
                self._networks_created.add(net_name)

        vm_parameters = {
            'adminUserName': {
                'value': username
            },
            'imagePublisher': {
                'value': image_publisher
            },  # e.g., 'canonical'
            'imageOffer': {
                'value': image_offer
            },  # e.g., ubuntuserver
            'imageSku': {
                'value': image_sku
            },  # e.g., '16.04.0-LTS'
            'imageVersion': {
                'value': image_version
            },  # e.g., 'latest'
            'networkSecurityGroupName': {
                'value': security_group,
            },
            'sshKeyData': {
                'value': public_key
            },
            'storageAccountName': {
                'value':
                self._make_storage_account_name(cluster_name, node_name)
            },
            'subnetName': {
                'value': cluster_name
            },
            'vmName': {
                'value': node_name
            },
            'vmSize': {
                'value': flavor
            },
        }
        log.debug("Deploying `%s` VM template to Azure ...",
                  vm_parameters['vmName']['value'])
        oper = self._resource_client.deployments.create_or_update(
            cluster_name, node_name, {
                'mode': DeploymentMode.incremental,
                'template': _VM_TEMPLATE,
                'parameters': vm_parameters,
            })
        oper.wait()

        # the `instance_id` is a composite type since we need both the
        # resource group name and the vm name to uniquely identify a VM
        return [cluster_name, node_name]
コード例 #34
0
ファイル: ec2_boto.py プロジェクト: mdbishop/elasticluster
    def _connect(self):
        """Connects to the ec2 cloud provider

        :return: :py:class:`boto.ec2.connection.EC2Connection`
        :raises: Generic exception on error
        """
        # check for existing connection
        if self._ec2_connection:
            return self._ec2_connection

        if not self._vpc:
            vpc_connection = None

        try:
            log.debug("Connecting to ec2 host %s", self._ec2host)
            region = ec2.regioninfo.RegionInfo(name=self._region_name,
                                               endpoint=self._ec2host)

            # connect to webservice
            ec2_connection = boto.connect_ec2(
                aws_access_key_id=self._access_key,
                aws_secret_access_key=self._secret_key,
                is_secure=self._secure,
                host=self._ec2host,
                port=self._ec2port,
                path=self._ec2path,
                region=region)
            log.debug("EC2 connection has been successful.")

            if self._vpc:
                vpc_connection = boto.connect_vpc(
                    aws_access_key_id=self._access_key,
                    aws_secret_access_key=self._secret_key,
                    is_secure=self._secure,
                    host=self._ec2host,
                    port=self._ec2port,
                    path=self._ec2path,
                    region=region)
                log.debug("VPC connection has been successful.")

                for vpc in vpc_connection.get_all_vpcs():
                    log.debug("Checking whether %s matches %s/%s" %
                              (self._vpc, vpc.tags['Name'], vpc.id))
                    if self._vpc in [vpc.tags['Name'], vpc.id]:
                        self._vpc_id = vpc.id
                        if self._vpc != self._vpc_id:
                            log.debug("VPC %s matches %s" %
                                      (self._vpc, self._vpc_id))
                        break
                else:
                    raise VpcError('VPC %s does not exist.' % self._vpc)

            # list images to see if the connection works
            # images = self._ec2_connection.get_all_images()
            # log.debug("%d images found on cloud %s",
            #           len(images), self._ec2host)

        except Exception as e:
            log.error(
                "connection to ec2 could not be "
                "established: message=`%s`", str(e))
            raise

        self._ec2_connection, self._vpc_connection = (ec2_connection,
                                                      vpc_connection)
        return self._ec2_connection
コード例 #35
0
    def start_instance(self,
                       key_name,
                       public_key_path,
                       private_key_path,
                       security_group,
                       flavor,
                       image_id,
                       image_userdata,
                       username=None,
                       node_name=None,
                       network_ids=None,
                       price=None,
                       timeout=None,
                       boot_disk_device=None,
                       boot_disk_size=None,
                       boot_disk_type=None,
                       boot_disk_iops=None,
                       placement_group=None,
                       **kwargs):
        """Starts a new instance on the cloud using the given properties.
        The following tasks are done to start an instance:

        * establish a connection to the cloud web service
        * check ssh keypair and upload it if it does not yet exist. This is
          a locked process, since this function might be called in multiple
          threads and we only want the key to be stored once.
        * check if the security group exists
        * run the instance with the given properties

        :param str key_name: name of the ssh key to connect
        :param str public_key_path: path to ssh public key
        :param str private_key_path: path to ssh private key
        :param str security_group: firewall rule definition to apply on the
                                   instance
        :param str flavor: machine type to use for the instance
        :param str image_id: image type (os) to use for the instance
        :param str image_userdata: command to execute after startup
        :param str username: username for the given ssh key, default None
        :param float price: Spot instance price (if 0, do not use spot instances).
        :param int price: Timeout (in seconds) waiting for spot instances;
                          only used if price > 0.
        :param str boot_disk_device: Root volume device path if not /dev/sda1
        :param str boot_disk_size: Target size, in GiB, for the root volume
        :param str boot_disk_type: Type of root volume (standard, gp2, io1)
        :param str boot_disk_iops: Provisioned IOPS for the root volume
        :param str placement_group: Enable low-latency networking between
                                    compute nodes.

        :return: str - instance id of the started instance
        """
        connection = self._connect()

        log.debug("Checking keypair `%s`.", key_name)
        # the `_check_keypair` method has to be called within a lock,
        # since it will upload the key if it does not exist and if this
        # happens for every node at the same time ec2 will throw an error
        # message (see issue #79)
        with BotoCloudProvider.__node_start_lock:
            self._check_keypair(key_name, public_key_path, private_key_path)

        log.debug("Checking security group `%s`.", security_group)
        security_group_id = self._check_security_group(security_group)
        # image_id = self._find_image_id(image_id)

        if network_ids:
            interfaces = []
            for subnet in network_ids.split(','):
                subnet_id = self._check_subnet(subnet)

                interfaces.append(
                    boto.ec2.networkinterface.NetworkInterfaceSpecification(
                        subnet_id=subnet_id,
                        groups=[security_group_id],
                        associate_public_ip_address=self.request_floating_ip))
            interfaces = boto.ec2.networkinterface.NetworkInterfaceCollection(
                *interfaces)

            security_groups = []
        else:
            interfaces = None
            security_groups = [security_group]

        # get defaults for `price` and `timeout` from class instance
        if price is None:
            price = self.price
        if timeout is None:
            timeout = self.timeout

        if boot_disk_size:
            dev_root = boto.ec2.blockdevicemapping.BlockDeviceType()
            dev_root.size = int(boot_disk_size)
            dev_root.delete_on_termination = True
            if boot_disk_type:
                dev_root.volume_type = boot_disk_type
            if boot_disk_iops:
                dev_root.iops = int(boot_disk_iops)
            bdm = boto.ec2.blockdevicemapping.BlockDeviceMapping()
            dev_name = boot_disk_device if boot_disk_device else "/dev/sda1"
            bdm[dev_name] = dev_root
        else:
            bdm = None

        try:
            #start spot instance if bid is specified
            if price:
                log.info("Requesting spot instance with price `%s` ...", price)
                request = connection.request_spot_instances(
                    price,
                    image_id,
                    key_name=key_name,
                    security_groups=security_groups,
                    instance_type=flavor,
                    user_data=image_userdata,
                    network_interfaces=interfaces,
                    placement_group=placement_group,
                    block_device_map=bdm,
                    instance_profile_name=self._instance_profile)[-1]

                # wait until spot request is fullfilled (will wait
                # forever if no timeout is given)
                start_time = time.time()
                timeout = (float(timeout) if timeout else 0)
                log.info(
                    "Waiting for spot instance (will time out in %d seconds) ...",
                    timeout)
                while request.status.code != 'fulfilled':
                    if timeout and time.time() - start_time > timeout:
                        request.cancel()
                        raise RuntimeError('spot instance timed out')
                    time.sleep(self.POLL_INTERVAL)
                    # update request status
                    request = connection.get_all_spot_instance_requests(
                        request_ids=request.id)[-1]
            else:
                reservation = connection.run_instances(
                    image_id,
                    key_name=key_name,
                    security_groups=security_groups,
                    instance_type=flavor,
                    user_data=image_userdata,
                    network_interfaces=interfaces,
                    placement_group=placement_group,
                    block_device_map=bdm,
                    instance_profile_name=self._instance_profile)
        except Exception as ex:
            log.error("Error starting instance: %s", ex)
            if "TooManyInstances" in ex:
                raise ClusterError(ex)
            else:
                raise InstanceError(ex)
        if price:
            vm = connection.get_only_instances(
                instance_ids=[request.instance_id])[-1]
        else:
            vm = reservation.instances[-1]
        vm.add_tag("Name", node_name)

        # cache instance object locally for faster access later on
        self._instances[vm.id] = vm

        return vm.id
コード例 #36
0
    def start_instance(self,
                       key_name,
                       public_key_path,
                       private_key_path,
                       security_group,
                       flavor,
                       image_id,
                       image_userdata,
                       username=None,
                       node_name=None,
                       network_ids=None,
                       **kwargs):
        """Starts a new instance on the cloud using the given properties.
        The following tasks are done to start an instance:

        * establish a connection to the cloud web service
        * check ssh keypair and upload it if it does not yet exist. This is
          a locked process, since this function might be called in multiple
          threads and we only want the key to be stored once.
        * check if the security group exists
        * run the instance with the given properties

        :param str key_name: name of the ssh key to connect
        :param str public_key_path: path to ssh public key
        :param str private_key_path: path to ssh private key
        :param str security_group: firewall rule definition to apply on the
                                   instance
        :param str flavor: machine type to use for the instance
        :param str image_id: image type (os) to use for the instance
        :param str image_userdata: command to execute after startup
        :param str username: username for the given ssh key, default None

        :return: str - instance id of the started instance
        """
        connection = self._connect()

        log.debug("Checking keypair `%s`.", key_name)
        # the `_check_keypair` method has to be called within a lock,
        # since it will upload the key if it does not exist and if this
        # happens for every node at the same time ec2 will throw an error
        # message (see issue #79)
        with BotoCloudProvider.__node_start_lock:
            self._check_keypair(key_name, public_key_path, private_key_path)

        log.debug("Checking security group `%s`.", security_group)
        security_group_id = self._check_security_group(security_group)
        # image_id = self._find_image_id(image_id)

        if network_ids:
            interfaces = []
            for subnet in network_ids.split(','):
                subnet_id = self._check_subnet(subnet)

                interfaces.append(
                    ec2.networkinterface.NetworkInterfaceSpecification(
                        subnet_id=subnet_id,
                        groups=[security_group_id],
                        associate_public_ip_address=self.request_floating_ip))
            interfaces = ec2.networkinterface.NetworkInterfaceCollection(
                *interfaces)

            security_groups = []
        else:
            interfaces = None
            security_groups = [security_group]

        try:
            reservation = connection.run_instances(
                image_id,
                key_name=key_name,
                security_groups=security_groups,
                instance_type=flavor,
                user_data=image_userdata,
                network_interfaces=interfaces)
        except Exception as ex:
            log.error("Error starting instance: %s", ex)
            if "TooManyInstances" in ex:
                raise ClusterError(ex)
            else:
                raise InstanceError(ex)

        vm = reservation.instances[-1]
        vm.add_tag("Name", node_name)

        # cache instance object locally for faster access later on
        self._instances[vm.id] = vm

        return vm.id
コード例 #37
0
    def start_instance(self,
                       key_name,
                       public_key_path,
                       private_key_path,
                       security_group,
                       flavor,
                       image_id,
                       image_userdata,
                       username=None,
                       node_name=None,
                       network_ids=None,
                       **kwargs):
        """Starts a new instance on the cloud using the given properties.
        The following tasks are done to start an instance:

        * establish a connection to the cloud web service
        * check ssh keypair and upload it if it does not yet exist. This is
          a locked process, since this function might be called in multiple
          threads and we only want the key to be stored once.
        * check if the security group exists
        * run the instance with the given properties

        :param str key_name: name of the ssh key to connect
        :param str public_key_path: path to ssh public key
        :param str private_key_path: path to ssh private key
        :param str security_group: firewall rule definition to apply on the
                                   instance
        :param str flavor: machine type to use for the instance
        :param str image_id: image type (os) to use for the instance
        :param str image_userdata: command to execute after startup
        :param str username: username for the given ssh key, default None
        :param str network_ids: network ids to use

        :return: str - instance id of the started instance
        """

        log.debug("Checking keypair `%s` ...", key_name)
        with OpenStackCloudProvider.__node_start_lock:
            self._check_keypair(key_name, public_key_path, private_key_path)

        log.debug("Checking security group `%s` ...", security_group)
        self._check_security_group(security_group)

        # Check if the image id is present.
        images = self._get_images()
        if image_id not in [img.id for img in images]:
            raise ImageError(
                "No image found with ID `{0}` in project `{1}` of cloud {2}".
                format(image_id, self._os_tenant_name, self._os_auth_url))

        # Check if the flavor exists
        flavors = [fl for fl in self._get_flavors() if fl.name == flavor]
        if not flavors:
            raise FlavorError(
                "No flavor found with name %s on cloud "
                "No flavor found with name `{0}` in project `{1}` of cloud {2}"
                .format(flavor, self._os_tenant_name, self._os_auth_url))
        flavor = flavors[0]

        # nics = None
        # if network_ids:
        #     nics=[{'net-id': netid.strip(), 'v4-fixed-ip': ''}
        #           for netid in network_ids.split(',') ]
        #     log.debug("Specifying networks for node %s: %s",
        #               node_name, ', '.join([nic['net-id'] for nic in nics]))
        # elif self._os_network_ids:
        #     nics = [{'net-id': netid.strip(), 'v4-fixed-ip': ''}
        #             for netid in self._os_network_ids.split(',')]
        #     log.debug("Specifying networks for node %s: %s",
        #               node_name, ', '.join([nic['net-id'] for nic in nics]))
        # elif 'network_ids' in kwargs:
        #     nics=[{'net-id': netid.strip(), 'v4-fixed-ip': ''}
        #           for netid in kwargs['network_ids'].split(',') ]
        #     log.debug("Specifying networks for node %s: %s",
        #               node_name, ', '.join([nic['net-id'] for nic in nics]))

        nics = [{
            'net-id': '10a92c55-bbc5-4713-94c0-175bf66ea489',
            'v4-fixed-ip': ''
        }]

        vm = self.client.servers.create(node_name,
                                        image_id,
                                        flavor,
                                        key_name=key_name,
                                        security_groups=[security_group],
                                        userdata=image_userdata,
                                        nics=nics)

        self._instances[vm.id] = vm

        return vm.id