def start_instance(self, key_name, public_key_path, private_key_path, security_group, flavor, image_id, image_userdata, username=None): """ Starts an instance in the cloud on the specified cloud provider (configuration option) and returns the id of the started instance. """ connection = self._connect() log.debug("Checking keypair `%s`.", key_name) self._check_keypair(key_name, public_key_path, private_key_path) log.debug("Checking security group `%s`.", security_group) self._check_security_group(security_group) # image_id = self._find_image_id(image_id) try: reservation = connection.run_instances( image_id, key_name=key_name, security_groups=[security_group], instance_type=flavor, user_data=image_userdata) except Exception, ex: log.error("Error starting instance: %s", ex) if "TooManyInstances" in ex: raise ClusterError(ex) else: raise InstanceError(ex)
def _validate_and_convert(cfgtree, evict_on_error=True): objtree = {} for section, model in SCHEMA.iteritems(): if section not in cfgtree: continue stanzas = cfgtree[section] objtree[section] = {} for name, properties in stanzas.iteritems(): log.debug("Checking section `%s/%s` ...", section, name) try: objtree[section][name] = Schema(model).validate(properties) # further checks for cloud providers if section == 'cloud': objtree[section][name] = _validate_cloud_section(objtree[section][name]) # check node name pattern in clusters conforms to RFC952 if section == 'cluster': _validate_node_group_names(objtree[section][name]) except (SchemaError, ValueError) as err: log.error("In section `%s/%s`: %s", section, name, err) if evict_on_error: log.error( "Dropping configuration section `%s/%s`" " because of the above errors", section, name) # `objtree[section][name]` exists if the except was raised # by the second validation (line 650) if name in objtree[section]: del objtree[section][name] return objtree
def _get_os_config_value(thing, value, varnames, default=_NO_DEFAULT): assert varnames, "List of env variable names cannot be empty" for varname in varnames: env_value = os.getenv(varname, None) if env_value is not None: if value is not None and env_value != value: warn("OpenStack {thing} is present both in the environment" " and the config file. Environment variable {varname}" " takes precedence, but this may change in the future." .format(thing=thing, varname=varname), FutureWarning) else: log.debug('OpenStack %s taken from env variable %s', thing, varname) return env_value if value: return value elif default is _NO_DEFAULT: # first variable name is preferred; others are for backwards-compatibility only raise RuntimeError( "There is no default value for OpenStack {0};" " please specify one in the config file" " or using environment variable {1}." .format(thing, varnames[0])) else: return default
def _connect(self): """Connects to the ec2 cloud provider :return: :py:class:`boto.ec2.connection.EC2Connection` :raises: Generic exception on error """ # check for existing connection if self._connection: return self._connection try: log.debug("Connecting to ec2 host %s", self._ec2host) region = ec2.regioninfo.RegionInfo(name=self._region_name, endpoint=self._ec2host) # connect to webservice self._connection = boto.connect_ec2( aws_access_key_id=self._access_key, aws_secret_access_key=self._secret_key, is_secure=self._secure, host=self._ec2host, port=self._ec2port, path=self._ec2path, region=region) # list images to see if the connection works log.debug("Connection has been successful.") # images = self._connection.get_all_images() # log.debug("%d images found on cloud %s", # len(images), self._ec2host) except Exception as e: log.error("connection to cloud could not be " "established: message=`%s`", str(e)) raise return self._connection
def stop(self, force=False, wait=False): """ Terminate all VMs in this cluster and delete its repository. :param bool force: remove cluster from storage even if not all nodes could be stopped. """ log.debug("Stopping cluster `%s` ...", self.name) failed = self._stop_all_nodes(wait) if failed: if force: self._delete_saved_data() log.warning( "Not all cluster nodes have been terminated." " However, as requested, data about the cluster" " has been removed from local storage.") else: self.repository.save_or_update(self) log.warning( "Not all cluster nodes have been terminated." " Fix errors above and re-run `elasticluster stop %s`", self.name) else: self._delete_saved_data()
def _start_node(node): """Static method to start a specific node on a cloud :return: bool -- True on success, False otherwise """ log.debug("_start_node: working on node %s" % node.name) # TODO: the following check is not optimal yet. When a # node is still in a starting state, # it will start another node here, # since the `is_alive` method will only check for # running nodes (see issue #13) if node.is_alive(): log.info("Not starting node %s which is " "already up&running.", node.name) return True else: try: node.start() log.info("_start_node: node has been started") return True except KeypairError as e: return e except Exception as e: log.error("could not start node `%s` for reason " "`%s`" % (node.name, e)) return None
def _allocate_address(self, instance): """Allocates a free public ip address to the given instance :param instance: instance to assign address to :type instance: py:class:`boto.ec2.instance.Reservation` :return: public ip address """ connection = self._connect() addresses = connection.get_all_addresses() for address in addresses: # Find an unused address if not address.instance_id: # Free address, use it. instance.use_ip(address) log.debug("Assigning ip address `%s` to instance `%s`" % (address.public_ip, instance.id)) return address.public_ip # No allocated addresses available. try: address = connection.allocate_address() instance.use_ip(address) return address.public_ip except Exception, ex: log.error("Unable to allocate a public IP address to instance `%s`", instance.id)
def stop(self, force=False): """ Terminates all instances corresponding to this cluster and deletes the cluster storage. """ for node in self.get_all_nodes(): try: node.stop() self.nodes[node.type].remove(node) except: # Boto does not always raises an `Exception` class! log.error("could not stop instance `%s`, it might " "already be down.", node.instance_id) if not self.get_all_nodes(): log.debug("Removing cluster %s.", self.name) self._setup_provider.cleanup() self._storage.delete_cluster(self.name) elif not force: log.warning("Not all instances have been terminated. " "Please rerun the `elasticluster stop %s`", self.name) self._storage.dump_cluster(self) else: log.warning("Not all instances have been terminated. However, " "as requested, the cluster has been force-removed.") self._setup_provider.cleanup() self._storage.delete_cluster(self.name)
def __prepare_key_pair(self, key_name, private_key_path, public_key_path, password): if not key_name: log.warn('user_key_name has not been defined, assuming password-based authentication') return if key_name in [k.name for k in self.driver.list_key_pairs()]: log.info('Key pair `%s` already exists, skipping import.', key_name) return if public_key_path: log.debug("importing public key from file %s ...", public_key_path) if not self.driver.import_key_pair_from_file( name=key_name, key_file_path=os.path.expandvars(os.path.expanduser(public_key_path))): raise KeypairError( 'Could not upload public key {p}' .format(p=public_key_path)) elif private_key_path: if not private_key_path.endswith('.pem'): raise KeypairError( 'can only work with .pem private keys,' ' derive public key and set user_key_public') log.debug("deriving and importing public key from private key") self.__import_pem(key_name, private_key_path, password) else: pem_file_path = os.path.join(self.storage_path, key_name + '.pem') if not os.path.exists(pem_file_path): with open(pem_file_path, 'w') as new_key_file: new_key_file.write( self.driver.create_key_pair(name=key_name)) self.__import_pem(key_name, pem_file_path, password)
def __init__(self, driver_name, storage_path=None, **options): self.storage_path = storage_path driver_name = driver_name.lower() try: req_args = self.provider_args[driver_name] if not set(req_args).issubset(options): raise ValueError( 'Cloud provider {0} requires all of {1} to be set' .format(driver_name, ' '.join(req_args))) args = [options.pop(name) for name in req_args] except KeyError: # no required args?! args = [] # fix for openstack if 'auth_url' in options and 'ex_force_auth_url' not in options: options['ex_force_auth_url'] = options['auth_url'].rsplit('/', 1)[0] try: provider_name = getattr(Provider, driver_name.upper()) except AttributeError: raise ValueError( "No libcloud driver for provider {name}" .format(name=driver_name)) driver_class = get_driver(provider_name) log.debug( "Initializing libcloud driver `%s` ...", driver_class.__name__) self.driver = driver_class(*args, **options)
def _stop_all_nodes(self, wait=False): """ Terminate all cluster nodes. Return number of failures. """ failed = 0 for node in self.get_all_nodes(): if not node.instance_id: log.warning( "Node `%s` has no instance ID." " Assuming it did not start correctly," " so removing it anyway from the cluster.", node.name) self.nodes[node.kind].remove(node) continue # try and stop node try: # wait and pause for and recheck. node.stop(wait) self.nodes[node.kind].remove(node) log.debug( "Removed node `%s` from cluster `%s`", node.name, self.name) except InstanceNotFoundError as err: log.info( "Node `%s` (instance ID `%s`) was not found;" " assuming it has already been terminated.", node.name, node.instance_id) except Exception as err: failed += 1 log.error( "Could not stop node `%s` (instance ID `%s`): %s %s", node.name, node.instance_id, err, err.__class__) return failed
def _write_extra_vars(self, cluster, filename='extra_vars.yml'): # build dict of "extra vars" # XXX: we should not repeat here names of attributes that # should not be exported... it would be better to use a simple # naming convention (e.g., omit whatever starts with `_`) extra_vars = cluster.to_vars_dict() extra_vars.update(extra_vars.pop('extra', {})) extra_vars['cloud'] = cluster.cloud_provider.to_vars_dict() nodes = extra_vars.pop('nodes') extra_vars['nodes'] = {} for kind, instances in nodes.items(): for node in instances: node_vars = node.to_vars_dict() node_vars.update(node_vars.pop('extra', {})) extra_vars['nodes'][node.name] = node_vars extra_vars['output_dir'] = os.getcwd() # save it to a YAML file log.debug("Writing extra vars %r to file %s", extra_vars, filename) with open(filename, 'w') as output: # ensure output file is not readable to other users, # as it may contain passwords os.fchmod(output.fileno(), 0o600) # dump variables in YAML format for Ansible to read yaml.dump({ 'elasticluster': extra_vars }, output) return filename
def _find_vpc_by_name(self, vpc_name): vpc_connection = boto.vpc.connect_to_region( self._region_name, aws_access_key_id=self._access_key, aws_secret_access_key=self._secret_key, is_secure=self._secure, host=self._ec2host, port=self._ec2port, path=self._ec2path, ) log.debug("VPC connection has been successful.") for vpc in vpc_connection.get_all_vpcs(): matches = [vpc.id] if 'Name' in vpc.tags: matches.append(vpc.tags['Name']) if vpc_name in matches: vpc_id = vpc.id if vpc_name != vpc_id: # then `vpc_name` is the VPC name log.debug("VPC `%s` has ID `%s`", vpc_name, vpc_id) break else: raise VpcError('Cannot find VPC `{0}`.'.format(vpc_name)) return (vpc_connection, vpc_id)
def __init_keystone_session_v2(self, check=False): """Create and return a session object using Keystone API v2.""" from keystoneauth1 import loading as keystone_v2 loader = keystone_v2.get_plugin_loader('password') auth = loader.load_from_options( auth_url=self._os_auth_url, username=self._os_username, password=self._os_password, project_name=self._os_tenant_name, ) sess = keystoneauth1.session.Session(auth=auth, verify=self._os_cacert) if check: log.debug("Checking that Keystone API v2 session works...") try: # if session is invalid, the following will raise some exception nova = nova_client.Client(self._compute_api_version, session=sess, cacert=self._os_cacert) nova.flavors.list() except keystoneauth1.exceptions.NotFound as err: log.warning("Creating Keystone v2 session failed: %s", err) return None except keystoneauth1.exceptions.ClientException as err: log.error("OpenStack server rejected request (likely configuration error?): %s", err) return None # FIXME: should we be raising an error instead? # if we got to this point, v2 session is valid log.info("Using Keystone API v2 session to authenticate to OpenStack") return sess
def _allocate_address_nova(self, instance, network_ids): """ Allocates a floating/public ip address to the given instance, using the OpenStack Compute ('Nova') API. :param instance: instance to assign address to :param list network_id: List of IDs (as strings) of networks where to request allocation the floating IP. **Ignored** (only used by the corresponding Neutron API function). :return: public ip address """ self._init_os_api() with OpenStackCloudProvider.__node_start_lock: # Use the `novaclient` API (works with python-novaclient <8.0.0) free_ips = [ip for ip in self.nova_client.floating_ips.list() if not ip.fixed_ip] if not free_ips: log.debug("Trying to allocate a new floating IP ...") free_ips.append(self.nova_client.floating_ips.create()) if free_ips: ip = free_ips.pop() else: raise RuntimeError( "Could not allocate floating IP for VM {0}" .format(instance_id)) instance.add_floating_ip(ip) return ip.ip
def _check_security_groups(self, names): """ Raise an exception if any of the named security groups does not exist. :param List[str] groups: List of security group names :raises: `SecurityGroupError` if group does not exist """ log.debug("Checking existence of security group(s) %s ...", names) try: # python-novaclient < 8.0.0 security_groups = self.nova_client.security_groups.list() existing = set(sg.name for sg in security_groups) except AttributeError: security_groups = self.neutron_client.list_security_groups()['security_groups'] existing = set(sg[u'name'] for sg in security_groups) # TODO: We should be able to create the security group if it # doesn't exist and at least add a rule to accept ssh access. # Also, we should be able to add new rules to a security group # if needed. nonexisting = set(names) - existing if nonexisting: raise SecurityGroupError( "Security group(s) `{0}` do not exist" .format(', '.join(nonexisting))) # if we get to this point, all sec groups exist return True
def execute(self): creator = make_creator(self.params.config, storage_path=self.params.storage) cluster_name = self.params.cluster try: cluster = creator.load_cluster(cluster_name) cluster.update() except (ClusterNotFound, ConfigurationError) as ex: log.error("Setting up cluster %s: %s\n" % (cluster_name, ex)) return if self.params.ssh_to: try: nodes = dict((n.name,n) for n in cluster.get_all_nodes()) frontend = nodes[self.params.ssh_to] except KeyError: raise ValueError( "Hostname %s not found in cluster %s" % (self.params.ssh_to, cluster_name)) else: frontend = cluster.get_frontend_node() try: # ensure we can connect to the host if not frontend.preferred_ip: # Ensure we can connect to the node, and save the value of `preferred_ip` ssh = frontend.connect(keyfile=cluster.known_hosts_file) if ssh: ssh.close() cluster.repository.save_or_update(cluster) except NodeNotFound as ex: log.error("Unable to connect to the frontend node: %s" % str(ex)) sys.exit(1) host = frontend.connection_ip() # check for nonstandard port, either IPv4 or IPv6 addr = host port = str(SSH_PORT) if ':' in host: match = IPV6_RE.match(host) if match: addr = match.groups()[0] port = match.groups()[1] else: addr, _, port = host.partition(':') username = frontend.image_user knownhostsfile = cluster.known_hosts_file if cluster.known_hosts_file \ else '/dev/null' ssh_cmdline = ["ssh", "-i", frontend.user_key_private, "-o", "UserKnownHostsFile=%s" % knownhostsfile, "-o", "StrictHostKeyChecking=yes", "-p", port, '%s@%s' % (username, addr)] ssh_cmdline.extend(self.params.ssh_args) log.debug("Running command `%s`" % str.join(' ', ssh_cmdline)) os.execlp("ssh", *ssh_cmdline)
def _connect(self): """Connects to the ec2 cloud provider :return: :py:class:`boto.ec2.connection.EC2Connection` :raises: Generic exception on error """ # check for existing connection if self._ec2_connection: return self._ec2_connection try: log.debug("Connecting to ec2 host %s", self._ec2host) region = ec2.regioninfo.RegionInfo(name=self._region_name, endpoint=self._ec2host) # connect to webservice ec2_connection = boto.connect_ec2( aws_access_key_id=self._access_key, aws_secret_access_key=self._secret_key, is_secure=self._secure, host=self._ec2host, port=self._ec2port, path=self._ec2path, region=region) log.debug("EC2 connection has been successful.") if self._vpc: vpc_connection = boto.connect_vpc( aws_access_key_id=self._access_key, aws_secret_access_key=self._secret_key, is_secure=self._secure, host=self._ec2host, port=self._ec2port, path=self._ec2path, region=region) log.debug("VPC connection has been successful.") for vpc in vpc_connection.get_all_vpcs(): log.debug("Checking whether %s matches %s/%s" % (self._vpc, vpc.tags['Name'], vpc.id)) if self._vpc in [vpc.tags['Name'], vpc.id]: self._vpc_id = vpc.id if self._vpc != self._vpc_id: log.debug("VPC %s matches %s" % (self._vpc, self._vpc_id)) break else: raise VpcError('VPC %s does not exist.' % self._vpc) # list images to see if the connection works # images = self._ec2_connection.get_all_images() # log.debug("%d images found on cloud %s", # len(images), self._ec2host) except Exception as e: log.error("connection to ec2 could not be " "established: message=`%s`", str(e)) raise self._ec2_connection, self._vpc_connection = ( ec2_connection, vpc_connection) return self._ec2_connection
def start(self): """ Starts an instance for this node on the cloud through the clode provider. This method is non-blocking, as soon as the node id is returned from the cloud provider, it will return. """ log.info("Starting node %s.", self.name) self.instance_id = self._cloud_provider.start_instance( self.user_key_name, self.user_key_public, self.security_group, self.flavor, self.image, self.image_userdata) log.debug("Node %s has instance_id: `%s`", self.name, self.instance_id)
def update_config(self, cluster_config, login_config): """Update current configuration. This method is usually called after loading a `Cluster` instance from a persistent storage. Note that not all fields are actually updated, but only those that can be safely updated. """ oldvalue = self.__update_option(cluster_config, 'ssh_to', 'ssh_to') if oldvalue: log.debug("Attribute 'ssh_to' updated: %s -> %s", oldvalue, self.ssh_to)
def start_instance(self, key_name, public_key_path, private_key_path, security_group, flavor, image_id, image_userdata, username=None, node_name=None, **kwargs): """Starts a new instance on the cloud using the given properties. The following tasks are done to start an instance: * establish a connection to the cloud web service * check ssh keypair and upload it if it does not yet exist. This is a locked process, since this function might be called in multiple threads and we only want the key to be stored once. * check if the security group exists * run the instance with the given properties :param str key_name: name of the ssh key to connect :param str public_key_path: path to ssh public key :param str private_key_path: path to ssh private key :param str security_group: firewall rule definition to apply on the instance :param str flavor: machine type to use for the instance :param str image_id: image type (os) to use for the instance :param str image_userdata: command to execute after startup :param str username: username for the given ssh key, default None :return: str - instance id of the started instance """ log.debug("Checking keypair `%s`.", key_name) with OpenStackCloudProvider.__node_start_lock: self._check_keypair(key_name, public_key_path, private_key_path) log.debug("Checking security group `%s`.", security_group) self._check_security_group(security_group) # Check if the image id is present. images = self._get_images() if image_id not in [img.id for img in images]: raise ImageError("No image found with id '%s' on cloud " "%s" % (image_id, self._os_auth_url)) # Check if the flavor exists flavors = [fl for fl in self._get_flavors() if fl.name == flavor] if not flavors: raise FlavorError("No flavor found with name %s on cloud " "%s" % (flavor, self._os_auth_url)) flavor = flavors[0] vm = self.client.servers.create( node_name, image_id, flavor, key_name=key_name, security_groups=[security_group], userdata=image_userdata) self._instances[vm.id] = vm return vm.id
def execute(self): creator = make_creator(self.params.config, storage_path=self.params.storage) cluster_name = self.params.cluster try: cluster = creator.load_cluster(cluster_name) except (ClusterNotFound, ConfigurationError) as ex: log.error("Setting up cluster %s: %s", cluster_name, ex) return # XXX: the default value of `self.params.ssh_to` should = the # default value for `ssh_to` in `Cluster.get_ssh_to_node()` frontend = cluster.get_ssh_to_node(self.params.ssh_to) log.debug("Updating the ip addresses of `%s`.", frontend.name) frontend.update_ips() # ensure we can connect to the host try: if not frontend.preferred_ip: # Ensure we can connect to the node, and save the value of `preferred_ip` ssh = frontend.connect(keyfile=cluster.known_hosts_file) if ssh: ssh.close() cluster.repository.save_or_update(cluster) except NodeNotFound as ex: log.error("Unable to connect to the frontend node: %s", ex) sys.exit(1) # now delegate real connection to `ssh` host = frontend.connection_ip() if not host: log.error("No IP address known for node %s", frontend.name) sys.exit(1) addr, port = parse_ip_address_and_port(host) username = frontend.image_user knownhostsfile = cluster.known_hosts_file if cluster.known_hosts_file \ else '/dev/null' cmdline = [self.command, "-i", frontend.user_key_private, "-o", "UserKnownHostsFile={0}".format(knownhostsfile), "-o", "StrictHostKeyChecking=yes", "-o", "Port={0:d}".format(port), '%s@%s' % (username, addr)] if cluster.ssh_proxy_command: cmdline[1:1] = [ '-o', ('ProxyCommand=' + expand_ssh_proxy_command( cluster.ssh_proxy_command, username, addr, port))] cmdline.extend(self.params.cmds) log.debug("Running command `%s`", ' '.join(cmdline)) os.execlp(self.command, *cmdline)
def _connect(self): """ Connect to the EC2 cloud provider. :return: :py:class:`boto.ec2.connection.EC2Connection` :raises: Generic exception on error """ # check for existing connection if self._ec2_connection: return self._ec2_connection try: log.debug("Connecting to EC2 endpoint %s", self._ec2host) # connect to webservice ec2_connection = boto.ec2.connect_to_region( self._region_name, aws_access_key_id=self._access_key, aws_secret_access_key=self._secret_key, is_secure=self._secure, host=self._ec2host, port=self._ec2port, path=self._ec2path, ) # With the loose setting `BOTO_USE_ENDPOINT_HEURISTICS` # which is necessary to work around issue #592, Boto will # now accept *any* string as an AWS region name; # furthermore, it *always* returns a connection object -- # so the only way to check that we are not going to run # into trouble is to check that there *is* a valid host # name on the other end of the connection. if ec2_connection.host: log.debug("EC2 connection has been successful.") else: raise CloudProviderError( "Cannot establish connection to EC2 region {0}" .format(self._region_name)) if not self._vpc: vpc_connection = None self._vpc_id = None else: vpc_connection, self._vpc_id = self._find_vpc_by_name(self._vpc) except Exception as err: log.error("Error connecting to EC2: %s", err) raise self._ec2_connection, self._vpc_connection = ( ec2_connection, vpc_connection) return self._ec2_connection
def start(self): """Starts the node on the cloud using the given instance properties. This method is non-blocking, as soon as the node id is returned from the cloud provider, it will return. Therefore the `is_alive` and `update_ips` methods can be used to further gather details about the state of the node. """ log.info("Starting node %s.", self.name) self.instance_id = self._cloud_provider.start_instance( self.user_key_name, self.user_key_public, self.user_key_private, self.security_group, self.flavor, self.image_id, self.image_userdata, username=self.image_user, node_name="%s-%s" % (self.cluster_name, self.name), **self.extra) log.debug("Node %s has instance_id: `%s`", self.name, self.instance_id)
def connect(self): """Connect to the node via ssh using the paramiko library. :return: :py:class:`paramiko.SSHClient` - ssh connection or None on failure """ ssh = paramiko.SSHClient() ssh.set_missing_host_key_policy(IgnorePolicy()) remote_ip = self.connection_ip() if not self.ip_public: log.debug("Instance id '%s' has no public ip, using private IP " "'%s' for connecting", self.instance_id, self.ip_private) try: log.debug("Trying to connect to host %s (%s)", self.name, remote_ip) ssh.connect(remote_ip, username=self.image_user, allow_agent=True, key_filename=self.user_key_private, timeout=Node.connection_timeout) log.debug("Connection to %s succeded!", remote_ip) return ssh except socket.error, ex: log.debug("Host %s (%s) not reachable: %s.", self.name, remote_ip, ex)
def create_setup_provider(self, cluster_template, name=None): """Creates the setup provider for the given cluster template. :param str cluster_template: template of the cluster :param str name: name of the cluster to read configuration properties """ conf = self.cluster_conf[cluster_template]['setup'] conf['general_conf'] = self.general_conf.copy() if name: conf['cluster_name'] = name conf_login = self.cluster_conf[cluster_template]['login'] provider_name = conf.get('provider') if provider_name not in Configurator.setup_providers_map: raise ConfigurationError( "Invalid value `%s` for `setup_provider` in configuration " "file." % provider_name) storage_path = self.general_conf['storage_path'] if 'playbook_path' in conf: playbook_path = conf['playbook_path'] del conf['playbook_path'] else: playbook_path = None groups = dict((k[:-7], v.split(',')) for k, v in conf.items() if k.endswith('_groups')) environment = dict() for nodekind, grps in groups.iteritems(): if not isinstance(grps, list): groups[nodekind] = [grps] # Environment variables parsing environment[nodekind] = dict() for key, value in list(conf.items()) + list(self.cluster_conf[cluster_template]['cluster'].items()): # Set both group and global variables for prefix in ["%s_var_" % nodekind, "global_var_"]: if key.startswith(prefix): var = key.replace(prefix, '') environment[nodekind][var] = value log.debug("setting variable %s=%s for node kind %s", var, value, nodekind) provider = Configurator.setup_providers_map[provider_name] return provider(groups, playbook_path=playbook_path, environment_vars=environment, storage_path=storage_path, sudo=conf_login['image_sudo'], sudo_user=conf_login['image_user_sudo'], **conf)
def _init_az_api(self): """ Initialise client objects for talking to Azure API. This is in a separate function so to be called by ``__init__`` and ``__setstate__``. """ with self.__lock: if self._resource_client is None: log.debug("Making Azure `ServicePrincipalcredentials` object" " with tenant=%r, client_id=%r, secret=%r ...", self.tenant_id, self.client_id, ('<redacted>' if self.secret else None)) credentials = ServicePrincipalCredentials( tenant=self.tenant_id, client_id=self.client_id, secret=self.secret, ) log.debug("Initializing Azure `ComputeManagementclient` ...") self._compute_client = ComputeManagementClient(credentials, self.subscription_id) log.debug("Initializing Azure `NetworkManagementclient` ...") self._network_client = NetworkManagementClient(credentials, self.subscription_id) log.debug("Initializing Azure `ResourceManagementclient` ...") self._resource_client = ResourceManagementClient(credentials, self.subscription_id) log.info("Azure API clients initialized.")
def is_alive(self): """Checks if the current node is up and running in the cloud. It only checks the status provided by the cloud interface. Therefore a node might be running, but not yet ready to ssh into it. """ running = False if not self.instance_id: return False try: log.debug("Getting information for instance %s", self.instance_id) running = self._cloud_provider.is_instance_running(self.instance_id) except Exception, ex: log.debug("Ignoring error while looking for vm id %s: %s", self.instance_id, str(ex))
def is_alive(self): """Checks if the current node is up and running in the cloud. It only checks the status provided by the cloud interface. Therefore a node might be running, but not yet ready to ssh into it. """ running = False if not self.instance_id: return False try: log.debug("Getting information for instance %s", self.instance_id) running = self._cloud_provider.is_instance_running( self.instance_id) except Exception as ex: log.debug("Ignoring error while looking for vm id %s: %s", self.instance_id, str(ex)) if running: log.debug("node `%s` (instance id %s) is up and running", self.name, self.instance_id) self.update_ips() else: log.debug("node `%s` (instance id `%s`) still building...", self.name, self.instance_id) return running
def start_instance(self, key_name, public_key_path, private_key_path, security_group, flavor, image_id, image_userdata, username=None, node_name=None, **kwargs): """Starts a new instance on the cloud using the given properties. The following tasks are done to start an instance: * establish a connection to the cloud web service * check ssh keypair and upload it if it does not yet exist. This is a locked process, since this function might be called in multiple threads and we only want the key to be stored once. * check if the security group exists * run the instance with the given properties :param str key_name: name of the ssh key to connect :param str public_key_path: path to ssh public key :param str private_key_path: path to ssh private key :param str security_group: firewall rule definition to apply on the instance :param str flavor: machine type to use for the instance :param str image_id: image type (os) to use for the instance :param str image_userdata: command to execute after startup :param str username: username for the given ssh key, default None :return: str - instance id of the started instance """ connection = self._connect() log.debug("Checking keypair `%s`.", key_name) # the `_check_keypair` method has to be called within a lock, # since it will upload the key if it does not exist and if this # happens for every node at the same time ec2 will throw an error # message (see issue #79) with BotoCloudProvider.__node_start_lock: self._check_keypair(key_name, public_key_path, private_key_path) log.debug("Checking security group `%s`.", security_group) self._check_security_group(security_group) # image_id = self._find_image_id(image_id) try: reservation = connection.run_instances( image_id, key_name=key_name, security_groups=[security_group], instance_type=flavor, user_data=image_userdata) except Exception, ex: log.error("Error starting instance: %s", ex) if "TooManyInstances" in ex: raise ClusterError(ex) else: raise InstanceError(ex)
def _build_inventory(self, cluster): """ Builds the inventory for the given cluster and returns its path :param cluster: cluster to build inventory for :type cluster: :py:class:`elasticluster.cluster.Cluster` """ inventory_data = defaultdict(list) for node in cluster.get_all_nodes(): if node.preferred_ip is None: log.warning( "Ignoring node `{0}`: No IP address." .format(node.name)) continue if node.kind not in self.groups: # FIXME: should this raise a `ConfigurationError` instead? log.warning( "Ignoring node `{0}`:" " Node kind `{1}` not defined in cluster!" .format(node.name, node.kind)) continue extra_vars = ['ansible_user=%s' % node.image_user] ip_addr, port = parse_ip_address_and_port(node.preferred_ip) if port != 22: extra_vars.append('ansible_port=%s' % port) if node.kind in self.environment: extra_vars.extend('%s=%s' % (k, v) for k, v in self.environment[node.kind].items()) for group in self.groups[node.kind]: inventory_data[group].append( (node.name, ip_addr, str.join(' ', extra_vars))) if not inventory_data: log.info("No inventory file was created.") return None # create a temporary file to pass to ansible, since the # api is not stable yet... if self._storage_path_tmp: if not self._storage_path: self._storage_path = tempfile.mkdtemp() elasticluster.log.warning( "Writing inventory file to tmp dir `%s`", self._storage_path) inventory_path = os.path.join( self._storage_path, (cluster.name + '.inventory')) log.debug("Writing Ansible inventory to file `%s` ...", inventory_path) with open(inventory_path, 'w+') as inventory_file: for section, hosts in inventory_data.items(): # Ansible throws an error "argument of type 'NoneType' is not # iterable" if a section is empty, so ensure we have something # to write in there if hosts: inventory_file.write("\n[" + section + "]\n") for host in hosts: hostline = "{0} ansible_host={1} {2}\n".format(*host) inventory_file.write(hostline) return inventory_path
def start_instance(self, # these are common to any # CloudProvider.start_instance() call key_name, public_key_path, private_key_path, security_group, flavor, image_id, image_userdata, username=None, # these params are specific to the # GoogleCloudProvider node_name=None, boot_disk_type='pd-standard', boot_disk_size=10, tags=None, scheduling=None, accelerator_count=0, accelerator_type='default', allow_project_ssh_keys=True, min_cpu_platform=None, **kwargs): """ Starts a new instance with the given properties and returns the instance id. :param str key_name: name of the ssh key to connect :param str public_key_path: path to ssh public key :param str private_key_path: path to ssh private key :param str security_group: firewall rule definition to apply on the instance :param str flavor: machine type to use for the instance :param str image_id: image type (os) to use for the instance :param str image_userdata: command to execute after startup :param str username: username for the given ssh key, default None :param str node_name: name of the instance :param str|Sequence tags: "Tags" to label the instance. Can be either a single string (individual tags are comma-separated), or a sequence of strings (each string being a single tag). :param str scheduling: scheduling option to use for the instance ("preemptible") :param int accelerator_count: Number of accelerators (e.g., GPUs) to make available in instance :param str accelerator_type: Type of accelerator to request. Can be one of: * Full URL specifying an accelerator type valid for the zone and project VMs are being created in. For example, ``https://www.googleapis.com/compute/v1/projects/[PROJECT_ID]/zones/[ZONE]/acceleratorTypes/[ACCELERATOR_TYPE]`` * An accelerator type name (any string which is not a valid URL). This is internally prefixed with the string ``https://www.googleapis.com/compute/v1/projects/[PROJECT_ID]/zones/[ZONE]/acceleratorTypes/`` to form a full URL. :param bool allow_project_ssh_keys: When ``True`` (default), SSH login is allowed to a node using any of the project-wide SSH keys (if they are defined). When ``False``, only the SSH key specified by ElastiCluster config's ``[login/*]`` section will be allowed to log in (instance-level key). :param str min_cpu_platform: require CPUs of this type or better (e.g., "Intel Skylake") Only used if ``accelerator_count`` is > 0. :return: str - instance id of the started instance """ # construct URLs project_url = '%s%s' % (GCE_URL, self._project_id) machine_type_url = '%s/zones/%s/machineTypes/%s' \ % (project_url, self._zone, flavor) boot_disk_type_url = '%s/zones/%s/diskTypes/%s' \ % (project_url, self._zone, boot_disk_type) # FIXME: `conf.py` should ensure that `boot_disk_size` has the right # type, so there would be no need to convert here boot_disk_size_gb = int(boot_disk_size) network_url = '%s/global/networks/%s' % (project_url, self._network) if image_id.startswith('http://') or image_id.startswith('https://'): image_url = image_id else: # allow image shortcuts (see docstring for IMAGE_NAME_SHORTCUTS) for prefix, os_cloud in self.IMAGE_NAME_SHORTCUTS.iteritems(): if image_id.startswith(prefix + '-'): image_url = '%s%s/global/images/%s' % ( GCE_URL, os_cloud, image_id) break else: raise InstanceError( "Unknown image name shortcut '{0}'," " please use the full `https://...` self-link URL." .format(image_id)) scheduling_option = {} if scheduling == 'preemptible': scheduling_option['preemptible'] = True elif scheduling is not None: raise InstanceError("Unknown scheduling option: '%s'" % scheduling) if isinstance(tags, types.StringTypes): tags = tags.split(',') elif isinstance(tags, collections.Sequence): # ok, nothing to do pass elif tags is not None: raise TypeError( "The `tags` argument to `gce.start_instance`" " should be a string or a list, got {T} instead" .format(T=type(tags))) with open(public_key_path, 'r') as f: public_key_content = f.read() compute_metadata = [ { "key": "ssh-keys", "value": "%s:%s" % (username, public_key_content), }, { "key": "block-project-ssh-keys", "value": (not allow_project_ssh_keys), }, ] if image_userdata: compute_metadata.append({ "key": "startup-script", "value": image_userdata, }) # construct the request body if node_name: instance_id = node_name.lower().replace('_', '-') # GCE doesn't allow "_" else: instance_id = 'elasticluster-%s' % uuid.uuid4() instance = { 'name': instance_id, 'machineType': machine_type_url, 'tags': { 'items': tags, }, 'scheduling': scheduling_option, 'disks': [{ 'autoDelete': 'true', 'boot': 'true', 'type': 'PERSISTENT', 'initializeParams' : { 'diskName': "%s-disk" % instance_id, 'diskType': boot_disk_type_url, 'diskSizeGb': boot_disk_size_gb, 'sourceImage': image_url, } }], 'networkInterfaces': [ {'accessConfigs': [ {'type': 'ONE_TO_ONE_NAT', 'name': 'External NAT' }], 'network': network_url }], 'serviceAccounts': [ {'email': self._email, 'scopes': GCE_DEFAULT_SCOPES }], "metadata": { "kind": "compute#metadata", "items": compute_metadata, } } if min_cpu_platform is not None: instance['minCpuPlatform'] = min_cpu_platform # add accelerators/GPUs if requested if accelerator_count > 0: if (accelerator_type.startswith('https://') or accelerator_type.startswith('http://')): # use URL as-is accelerator_type_url = accelerator_type else: accelerator_type_url = ( 'https://www.googleapis.com/compute/{api_version}/' 'projects/{project_id}/zones/{zone}/' 'acceleratorTypes/{accelerator_type}' .format( api_version=GCE_API_VERSION, project_id=self._project_id, zone=self._zone, accelerator_type=accelerator_type )) log.debug( "VM instance `%s`:" " Requesting %d accelerator%s of type '%s'", instance_id, accelerator_count, ('s' if accelerator_count > 1 else ''), accelerator_type_url) instance['guestAccelerators'] = [ { 'acceleratorCount': accelerator_count, 'acceleratorType': accelerator_type_url, } ] # no live migration with GPUs, # see: https://cloud.google.com/compute/docs/gpus#restrictions instance['scheduling']['onHostMaintenance'] = 'TERMINATE' instance['scheduling']['automaticRestart'] = True # create the instance gce = self._connect() request = gce.instances().insert( project=self._project_id, body=instance, zone=self._zone) try: response = self._execute_request(request) response = self._wait_until_done(response) self._check_response(response) return instance_id except (HttpError, CloudProviderError) as e: log.error("Error creating instance `%s`" % e) raise InstanceError("Error creating instance `%s`" % e)
def start_instance(self, key_name, public_key_path, private_key_path, security_group, flavor, image_id, image_userdata, username='******', node_name=None, **extra): """ Start a new VM using the given properties. :param str key_name: **unused in Azure**, only present for interface compatibility :param str public_key_path: path to ssh public key to authorize on the VM (for user `username`, see below) :param str private_key_path: **unused in Azure**, only present for interface compatibility :param str security_group: network security group to attach VM to, **currently unused** :param str flavor: machine type to use for the instance :param str image_id: disk image to use for the instance; has the form *publisher/offer/sku/version* (e.g., ``canonical/ubuntuserver/16.04.0-LTS/latest``) :param str image_userdata: command to execute after startup, **currently unused** :param str username: username for the given ssh key (default is ``root`` as it's always guaranteed to exist, but you probably don't want to use that) :return: tuple[str, str] -- resource group and node name of the started VM """ self._init_az_api() # Warn of unsupported parameters, if set. We do not warn # about `user_key` or `private_key_path` since they come from # a `[login/*]` section and those can be shared across # different cloud providers. if security_group and security_group != 'default': warn("Setting `security_group` is currently not supported" " in the Azure cloud; VMs will all be attached to" " a network security group named after the cluster name.") if image_userdata: warn("Parameter `image_userdata` is currently not supported" " in the Azure cloud and will be ignored.") # Use the cluster name to identify the Azure resource group; # however, `Node.cluster_name` is not passed down here so # extract it from the node name, which always contains it as # the substring before the leftmost dash (see `cluster.py`, # line 1182) cluster_name, _ = node_name.split('-', 1) with self.__lock: if cluster_name not in self._resource_groups_created: self._resource_client.resource_groups.create_or_update( cluster_name, {'location': self.location}) self._resource_groups_created.add(cluster_name) # read public SSH key with open(public_key_path, 'r') as public_key_file: public_key = public_key_file.read() image_publisher, image_offer, \ image_sku, image_version = self._split_image_id(image_id) if not security_group: security_group = (cluster_name + '-secgroup') net_parameters = { 'networkSecurityGroupName': { 'value': security_group, }, 'subnetName': { 'value': cluster_name }, } net_name = net_parameters['subnetName']['value'] with self.__lock: if net_name not in self._networks_created: log.debug("Creating network `%s` in Azure ...", net_name) oper = self._resource_client.deployments.create_or_update( cluster_name, net_name, { 'mode': DeploymentMode.incremental, 'template': _NET_TEMPLATE, 'parameters': net_parameters, }) oper.wait() self._networks_created.add(net_name) vm_parameters = { 'adminUserName': { 'value': username }, 'imagePublisher': { 'value': image_publisher }, # e.g., 'canonical' 'imageOffer': { 'value': image_offer }, # e.g., ubuntuserver 'imageSku': { 'value': image_sku }, # e.g., '16.04.0-LTS' 'imageVersion': { 'value': image_version }, # e.g., 'latest' 'networkSecurityGroupName': { 'value': security_group, }, 'sshKeyData': { 'value': public_key }, 'storageAccountName': { 'value': self._make_storage_account_name(cluster_name, node_name) }, 'subnetName': { 'value': cluster_name }, 'vmName': { 'value': node_name }, 'vmSize': { 'value': flavor }, } log.debug("Deploying `%s` VM template to Azure ...", vm_parameters['vmName']['value']) oper = self._resource_client.deployments.create_or_update( cluster_name, node_name, { 'mode': DeploymentMode.incremental, 'template': _VM_TEMPLATE, 'parameters': vm_parameters, }) oper.wait() # the `instance_id` is a composite type since we need both the # resource group name and the vm name to uniquely identify a VM return [cluster_name, node_name]
def _connect(self): """Connects to the ec2 cloud provider :return: :py:class:`boto.ec2.connection.EC2Connection` :raises: Generic exception on error """ # check for existing connection if self._ec2_connection: return self._ec2_connection if not self._vpc: vpc_connection = None try: log.debug("Connecting to ec2 host %s", self._ec2host) region = ec2.regioninfo.RegionInfo(name=self._region_name, endpoint=self._ec2host) # connect to webservice ec2_connection = boto.connect_ec2( aws_access_key_id=self._access_key, aws_secret_access_key=self._secret_key, is_secure=self._secure, host=self._ec2host, port=self._ec2port, path=self._ec2path, region=region) log.debug("EC2 connection has been successful.") if self._vpc: vpc_connection = boto.connect_vpc( aws_access_key_id=self._access_key, aws_secret_access_key=self._secret_key, is_secure=self._secure, host=self._ec2host, port=self._ec2port, path=self._ec2path, region=region) log.debug("VPC connection has been successful.") for vpc in vpc_connection.get_all_vpcs(): log.debug("Checking whether %s matches %s/%s" % (self._vpc, vpc.tags['Name'], vpc.id)) if self._vpc in [vpc.tags['Name'], vpc.id]: self._vpc_id = vpc.id if self._vpc != self._vpc_id: log.debug("VPC %s matches %s" % (self._vpc, self._vpc_id)) break else: raise VpcError('VPC %s does not exist.' % self._vpc) # list images to see if the connection works # images = self._ec2_connection.get_all_images() # log.debug("%d images found on cloud %s", # len(images), self._ec2host) except Exception as e: log.error( "connection to ec2 could not be " "established: message=`%s`", str(e)) raise self._ec2_connection, self._vpc_connection = (ec2_connection, vpc_connection) return self._ec2_connection
def start_instance(self, key_name, public_key_path, private_key_path, security_group, flavor, image_id, image_userdata, username=None, node_name=None, network_ids=None, price=None, timeout=None, boot_disk_device=None, boot_disk_size=None, boot_disk_type=None, boot_disk_iops=None, placement_group=None, **kwargs): """Starts a new instance on the cloud using the given properties. The following tasks are done to start an instance: * establish a connection to the cloud web service * check ssh keypair and upload it if it does not yet exist. This is a locked process, since this function might be called in multiple threads and we only want the key to be stored once. * check if the security group exists * run the instance with the given properties :param str key_name: name of the ssh key to connect :param str public_key_path: path to ssh public key :param str private_key_path: path to ssh private key :param str security_group: firewall rule definition to apply on the instance :param str flavor: machine type to use for the instance :param str image_id: image type (os) to use for the instance :param str image_userdata: command to execute after startup :param str username: username for the given ssh key, default None :param float price: Spot instance price (if 0, do not use spot instances). :param int price: Timeout (in seconds) waiting for spot instances; only used if price > 0. :param str boot_disk_device: Root volume device path if not /dev/sda1 :param str boot_disk_size: Target size, in GiB, for the root volume :param str boot_disk_type: Type of root volume (standard, gp2, io1) :param str boot_disk_iops: Provisioned IOPS for the root volume :param str placement_group: Enable low-latency networking between compute nodes. :return: str - instance id of the started instance """ connection = self._connect() log.debug("Checking keypair `%s`.", key_name) # the `_check_keypair` method has to be called within a lock, # since it will upload the key if it does not exist and if this # happens for every node at the same time ec2 will throw an error # message (see issue #79) with BotoCloudProvider.__node_start_lock: self._check_keypair(key_name, public_key_path, private_key_path) log.debug("Checking security group `%s`.", security_group) security_group_id = self._check_security_group(security_group) # image_id = self._find_image_id(image_id) if network_ids: interfaces = [] for subnet in network_ids.split(','): subnet_id = self._check_subnet(subnet) interfaces.append( boto.ec2.networkinterface.NetworkInterfaceSpecification( subnet_id=subnet_id, groups=[security_group_id], associate_public_ip_address=self.request_floating_ip)) interfaces = boto.ec2.networkinterface.NetworkInterfaceCollection( *interfaces) security_groups = [] else: interfaces = None security_groups = [security_group] # get defaults for `price` and `timeout` from class instance if price is None: price = self.price if timeout is None: timeout = self.timeout if boot_disk_size: dev_root = boto.ec2.blockdevicemapping.BlockDeviceType() dev_root.size = int(boot_disk_size) dev_root.delete_on_termination = True if boot_disk_type: dev_root.volume_type = boot_disk_type if boot_disk_iops: dev_root.iops = int(boot_disk_iops) bdm = boto.ec2.blockdevicemapping.BlockDeviceMapping() dev_name = boot_disk_device if boot_disk_device else "/dev/sda1" bdm[dev_name] = dev_root else: bdm = None try: #start spot instance if bid is specified if price: log.info("Requesting spot instance with price `%s` ...", price) request = connection.request_spot_instances( price, image_id, key_name=key_name, security_groups=security_groups, instance_type=flavor, user_data=image_userdata, network_interfaces=interfaces, placement_group=placement_group, block_device_map=bdm, instance_profile_name=self._instance_profile)[-1] # wait until spot request is fullfilled (will wait # forever if no timeout is given) start_time = time.time() timeout = (float(timeout) if timeout else 0) log.info( "Waiting for spot instance (will time out in %d seconds) ...", timeout) while request.status.code != 'fulfilled': if timeout and time.time() - start_time > timeout: request.cancel() raise RuntimeError('spot instance timed out') time.sleep(self.POLL_INTERVAL) # update request status request = connection.get_all_spot_instance_requests( request_ids=request.id)[-1] else: reservation = connection.run_instances( image_id, key_name=key_name, security_groups=security_groups, instance_type=flavor, user_data=image_userdata, network_interfaces=interfaces, placement_group=placement_group, block_device_map=bdm, instance_profile_name=self._instance_profile) except Exception as ex: log.error("Error starting instance: %s", ex) if "TooManyInstances" in ex: raise ClusterError(ex) else: raise InstanceError(ex) if price: vm = connection.get_only_instances( instance_ids=[request.instance_id])[-1] else: vm = reservation.instances[-1] vm.add_tag("Name", node_name) # cache instance object locally for faster access later on self._instances[vm.id] = vm return vm.id
def start_instance(self, key_name, public_key_path, private_key_path, security_group, flavor, image_id, image_userdata, username=None, node_name=None, network_ids=None, **kwargs): """Starts a new instance on the cloud using the given properties. The following tasks are done to start an instance: * establish a connection to the cloud web service * check ssh keypair and upload it if it does not yet exist. This is a locked process, since this function might be called in multiple threads and we only want the key to be stored once. * check if the security group exists * run the instance with the given properties :param str key_name: name of the ssh key to connect :param str public_key_path: path to ssh public key :param str private_key_path: path to ssh private key :param str security_group: firewall rule definition to apply on the instance :param str flavor: machine type to use for the instance :param str image_id: image type (os) to use for the instance :param str image_userdata: command to execute after startup :param str username: username for the given ssh key, default None :return: str - instance id of the started instance """ connection = self._connect() log.debug("Checking keypair `%s`.", key_name) # the `_check_keypair` method has to be called within a lock, # since it will upload the key if it does not exist and if this # happens for every node at the same time ec2 will throw an error # message (see issue #79) with BotoCloudProvider.__node_start_lock: self._check_keypair(key_name, public_key_path, private_key_path) log.debug("Checking security group `%s`.", security_group) security_group_id = self._check_security_group(security_group) # image_id = self._find_image_id(image_id) if network_ids: interfaces = [] for subnet in network_ids.split(','): subnet_id = self._check_subnet(subnet) interfaces.append( ec2.networkinterface.NetworkInterfaceSpecification( subnet_id=subnet_id, groups=[security_group_id], associate_public_ip_address=self.request_floating_ip)) interfaces = ec2.networkinterface.NetworkInterfaceCollection( *interfaces) security_groups = [] else: interfaces = None security_groups = [security_group] try: reservation = connection.run_instances( image_id, key_name=key_name, security_groups=security_groups, instance_type=flavor, user_data=image_userdata, network_interfaces=interfaces) except Exception as ex: log.error("Error starting instance: %s", ex) if "TooManyInstances" in ex: raise ClusterError(ex) else: raise InstanceError(ex) vm = reservation.instances[-1] vm.add_tag("Name", node_name) # cache instance object locally for faster access later on self._instances[vm.id] = vm return vm.id
def start_instance(self, key_name, public_key_path, private_key_path, security_group, flavor, image_id, image_userdata, username=None, node_name=None, network_ids=None, **kwargs): """Starts a new instance on the cloud using the given properties. The following tasks are done to start an instance: * establish a connection to the cloud web service * check ssh keypair and upload it if it does not yet exist. This is a locked process, since this function might be called in multiple threads and we only want the key to be stored once. * check if the security group exists * run the instance with the given properties :param str key_name: name of the ssh key to connect :param str public_key_path: path to ssh public key :param str private_key_path: path to ssh private key :param str security_group: firewall rule definition to apply on the instance :param str flavor: machine type to use for the instance :param str image_id: image type (os) to use for the instance :param str image_userdata: command to execute after startup :param str username: username for the given ssh key, default None :param str network_ids: network ids to use :return: str - instance id of the started instance """ log.debug("Checking keypair `%s` ...", key_name) with OpenStackCloudProvider.__node_start_lock: self._check_keypair(key_name, public_key_path, private_key_path) log.debug("Checking security group `%s` ...", security_group) self._check_security_group(security_group) # Check if the image id is present. images = self._get_images() if image_id not in [img.id for img in images]: raise ImageError( "No image found with ID `{0}` in project `{1}` of cloud {2}". format(image_id, self._os_tenant_name, self._os_auth_url)) # Check if the flavor exists flavors = [fl for fl in self._get_flavors() if fl.name == flavor] if not flavors: raise FlavorError( "No flavor found with name %s on cloud " "No flavor found with name `{0}` in project `{1}` of cloud {2}" .format(flavor, self._os_tenant_name, self._os_auth_url)) flavor = flavors[0] # nics = None # if network_ids: # nics=[{'net-id': netid.strip(), 'v4-fixed-ip': ''} # for netid in network_ids.split(',') ] # log.debug("Specifying networks for node %s: %s", # node_name, ', '.join([nic['net-id'] for nic in nics])) # elif self._os_network_ids: # nics = [{'net-id': netid.strip(), 'v4-fixed-ip': ''} # for netid in self._os_network_ids.split(',')] # log.debug("Specifying networks for node %s: %s", # node_name, ', '.join([nic['net-id'] for nic in nics])) # elif 'network_ids' in kwargs: # nics=[{'net-id': netid.strip(), 'v4-fixed-ip': ''} # for netid in kwargs['network_ids'].split(',') ] # log.debug("Specifying networks for node %s: %s", # node_name, ', '.join([nic['net-id'] for nic in nics])) nics = [{ 'net-id': '10a92c55-bbc5-4713-94c0-175bf66ea489', 'v4-fixed-ip': '' }] vm = self.client.servers.create(node_name, image_id, flavor, key_name=key_name, security_groups=[security_group], userdata=image_userdata, nics=nics) self._instances[vm.id] = vm return vm.id