def _start_instance(self, resource_handler, node_def): """ Start the VM instance. :param dict node_def: The resolved node definition to use. :Remark: This is a "wet method", the VM will not be started if the instance is in debug mode (``dry_run``). """ image_id = node_def['resource']['image_id'] flavor_name = node_def['resource']['flavor_name'] context = node_def.get('context', None) sec_groups = node_def['resource'].get('security_groups', None) key_name = node_def['resource'].get('key_name', None) server_name = node_def['resource'].get('server_name', unique_vmname(node_def)) network_id = node_def['resource'].get('network_id', None) nics = None if network_id is not None: nics = [{"net-id": network_id, "v4-fixed-ip": ''}] log.debug( "[%s] Creating new server using image ID %r and flavor name %r", resource_handler.name, image_id, flavor_name) try: server = None KBinterrupt = False with GracefulInterruptHandler() as h: log.debug('Server creation started for node %s...', node_def['node_id']) server = self.conn.servers.create(server_name, image_id, flavor_name, security_groups=sec_groups, key_name=key_name, userdata=context, nics=nics) KBinterrupt = h.interrupted log.debug('Server creation finished for node %s: server: %r', node_def['node_id'], server) if KBinterrupt: log.debug( 'Keyboard interrupt detected while VM was being created!') raise KeyboardInterrupt except KeyboardInterrupt: log.debug('Interrupting node creation!') if server is not None: log.debug('Rolling back...') try: self.conn.servers.delete(server) except Exception as ex: raise NodeCreationError(None, str(ex)) raise except Exception as ex: raise NodeCreationError(None, str(ex)) return server
def perform(self, resource_handler): log.debug("[%s] Creating node: %r", resource_handler.name, self.resolved_node_definition['name']) drv_id, srv_id = None, None try: drv_id, errormsg = self._clone_drive(resource_handler, self.resolved_node_definition['resource']['libdrive_id']) if not drv_id: log.error(errormsg) raise NodeCreationError(None, errormsg) drv_st, errormsg = self._get_drive_status(resource_handler, drv_id) while drv_st != 'unmounted': log.debug("[%s] Waiting for cloned drive to enter unmounted state, currently %r",resource_handler.name, drv_st) time.sleep(wait_time_between_api_call_retries) drv_st, errormsg = self._get_drive_status(resource_handler, drv_id) srv_id, errormsg = self._create_server(resource_handler, drv_id) if not srv_id: log.error(errormsg) self._delete_drive(resource_handler, drv_id) raise NodeCreationError(None, errormsg) srv_st = get_server_status(resource_handler, srv_id) while srv_st not in ['starting','started','running']: log.debug("[%s] Server is in %s state. Waiting to enter starting state...", resource_handler.name, srv_st) if srv_st == 'stopped': ret, errormsg = self._start_server(resource_handler, srv_id) if not ret: log.debug(errormsg) time.sleep(wait_time_between_api_call_retries) srv_st = get_server_status(resource_handler, srv_id) except KeyboardInterrupt: log.info('Interrupting node creation! Rolling back. Please, stand by!') if srv_id: srv_st = get_server_status(resource_handler, srv_id) while srv_st not in ['stopped','unknown']: log.debug("[%s] Server is in %s state.",resource_handler.name, srv_st) time.sleep(wait_time_between_api_call_retries) if srv_st != 'stopping': self._stop_server(resource_handler, srv_id) srv_st = get_server_status(resource_handler, srv_id) self._delete_server(resource_handler, srv_id) # if drv_id: # drv_st, _ = self._get_drive_status(resource_handler, drv_id) # while drv_st not in ['unmounted','unknown']: # log.debug("[%s] Drive is in %s state.",resource_handler.name, drv_st) # time.sleep(wait_time_between_api_call_retries) # drv_st, _ = self._get_drive_status(resource_handler, drv_id) # self._delete_drive(resource_handler, drv_id) raise return srv_id
def setup_connection(endpoint, auth_data, resolved_node_definition): """ Setup the connection to the Nova endpoint. """ project_id = resolved_node_definition['resource'].get('project_id', None) user_domain_name = resolved_node_definition['resource'].get( 'user_domain_name', 'Default') region_name = resolved_node_definition['resource'].get('region_name', None) auth_type = auth_data.get('type', None) if auth_type is None: user = auth_data['username'] password = auth_data['password'] auth = v3.Password(auth_url=endpoint, username=user, password=password, project_id=project_id, user_domain_name=user_domain_name) sess = session.Session(auth=auth) elif auth_type == 'application_credential': cred_id = auth_data['id'] cred_secret = auth_data['secret'] auth = v3.ApplicationCredential( auth_url=endpoint, application_credential_secret=cred_secret, application_credential_id=cred_id) sess = session.Session(auth=auth) else: raise NodeCreationError( None, 'Unknown authentication type provided: "%s"' % auth_type) os = connection.Connection(session=sess, region_name=region_name) return os
def __init__(self, endpoint, auth_data, name=None, dry_run=False, **config): self.dry_run = dry_run # Check if endpoint includes API version (/v3, /v3/, etc.) if re.compile('\/v\d+[\/]*$').search(endpoint) is None: # If no API version is included, assume v3 self.endpoint = ('%s/v3' % endpoint) if not endpoint.endswith('/') else ( '%sv3' % endpoint) else: self.endpoint = endpoint self.name = name if name else endpoint if (not auth_data) or \ (("type" not in auth_data) and (("username" not in auth_data) or ("password" not in auth_data))) or \ (("type" in auth_data) and (auth_data['type'] not in ALLOWED_AUTH_TYPES) or \ (("type" in auth_data) and ("application_credential" in auth_data['type']) and \ (("id" not in auth_data) or ("secret" not in auth_data)))): errormsg = "Cannot find credentials for \"" + endpoint + "\". Found only: \"" + str( auth_data) + "\". Please, specify!" raise NodeCreationError(None, errormsg) self.auth_data = auth_data self.data = config
def perform(self, resource_handler): log.debug("[%s] Acquiring private IP address for %r", resource_handler.name, self.instance_data['node_id']) try: server = self.conn.servers.get(self.instance_data['instance_id']) except Exception as ex: raise NodeCreationError(None, str(ex)) ip = "" floating_ips = self.conn.floating_ips.list() networks = self.conn.servers.ips(server) for tenant in networks.keys(): log.debug("[%s] networks[tenant]: %s", resource_handler.name, networks[tenant]) for addre in networks[tenant]: ip = addre['addr'].encode('latin-1') private_ip = ip for floating_ip in floating_ips: if floating_ip.instance_id == server.id: if floating_ip.ip == ip: private_ip = "" if private_ip != "": log.debug("[%s] Private ip found: %s", resource_handler.name, private_ip) return private_ip log.debug("[%s] Private ip not found.", resource_handler.name, ip) return None
def _start_instance(self, resource_handler): """ Start a CloudBroker instance. """ log.debug("[%s] Creating CloudBroker instance...", resource_handler.name) descr = self.resolved_node_definition['resource']['description'] descr.setdefault('disable_autostop', 'true') descr.setdefault('isolated', 'true') context = self.resolved_node_definition.get('context', None) if context is not None: descr['cloud-init'] = base64.b64encode(context) descr['cloud-init-b64'] = 'true' log.debug("[%s] XML to pass to CloudBroker: %s", resource_handler.name, dicttoxml(descr, custom_root='instance', attr_type=False)) r = requests.post(resource_handler.endpoint + '/instances.xml', dicttoxml(descr, custom_root='instance', attr_type=False), auth=get_auth(resource_handler.auth_data), headers={'Content-Type': 'application/xml'}) log.debug('[%s] CloudBroker instance create response status code %d, response: %s', resource_handler.name, r.status_code, r.text) if (r.status_code == 201): DOMTree = xml.dom.minidom.parseString(r.text) instance = DOMTree.documentElement instanceID = instance.getElementsByTagName('id')[0].childNodes[0].data log.debug("[%s] CloudBroker instance started, internal id: %s", resource_handler.name, instanceID) return instanceID else: errormsg = '[{0}] Failed to create CloudBroker instance, request status code {1}, response: {2}'.format( resource_handler.name, r.status_code, r.text) log.debug(errormsg) raise NodeCreationError(None, errormsg)
def _allocate_floating_ip(self, resource_handler, server): pool = self.resolved_node_definition['resource'].get( 'floating_ip_pool', None) if ('floating_ip' not in self.resolved_node_definition['resource'] ) and (pool is None): return flip_waiting = 10 flip_attempts = 60 attempts = 1 while attempts <= flip_attempts: floating_ip = self.connopenstack.available_floating_ip( network=pool) if not floating_ip: if pool is not None: error_msg = '[{0}] Cannot find unused floating ip address in pool "{1}"!'.format( resource_handler.name, pool) else: error_msg = '[{0}] Cannot find unused floating ip address!'.format( resource_handler.name) self.connopenstack.delete_server(server.id) raise NodeCreationError(None, error_msg) try: log.debug( "[%s] Try associating floating ip (%s) to server (%s)...", resource_handler.name, floating_ip.floating_ip_address, server.id) self.connopenstack.compute.add_floating_ip_to_server( server.id, floating_ip.floating_ip_address) break except Exception as e: log.debug(e) log.debug( "[%s] Associating floating ip (%s) to node failed. Retry after %i seconds...", resource_handler.name, floating_ip.floating_ip_address, flip_waiting) time.sleep(flip_waiting) attempts += 1 if attempts > flip_attempts: error_msg = '[{0}] Gave up associating floating ip to node! Could not get it in {1} seconds."'.format( resource_handler.name, flip_attempts * flip_waiting) log.error(error_msg) self.connopenstack.delete_server(server.id) raise NodeCreationError(None, error_msg) return
def __init__(self, endpoint, auth_data, name=None, dry_run=False, **config): self.dry_run = dry_run self.name = name if name else endpoint if (not auth_data) or (not "email" in auth_data) or (not "password" in auth_data): errormsg = "Cannot find credentials for \""+endpoint+"\". Please, specify!" log.debug(errormsg) raise NodeCreationError(None, errormsg) self.endpoint = endpoint if not dry_run else None self.auth_data = auth_data if not dry_run else None
def __init__(self, endpoint, auth_data, name=None, dry_run=False, **config): self.dry_run = dry_run self.name = name if name else endpoint self.endpoint = endpoint if (not auth_data) or (((not "username" in auth_data) or (not "password" in auth_data)) and \ ((not "type" in auth_data) or (not "proxy" in auth_data))): errormsg = "Cannot find credentials for \"" + endpoint + "\". Please, specify!" raise NodeCreationError(None, errormsg) self.auth_data = auth_data self.data = config
def perform(self, resource_handler): log.debug("[%s] Acquiring private IP address for %r", resource_handler.name, self.instance_data['node_id']) try: server = self.connopenstack.get_server( self.instance_data['instance_id']) except Exception as ex: raise NodeCreationError(None, str(ex)) addresses = server.addresses for network in addresses: for address in addresses[network]: if address['OS-EXT-IPS:type'] == 'fixed': return address['addr'] log.debug("[%s] Private ip not found.", resource_handler.name) return None
def perform(self, resource_handler): log.debug("[%s] Acquiring IP address for %r", resource_handler.name, self.instance_data['node_id']) try: server = self.conn.servers.get(self.instance_data['instance_id']) except Exception as ex: raise NodeCreationError(None, str(ex)) floating_ips = self.conn.floating_ips.list() for floating_ip in floating_ips: if floating_ip.instance_id == server.id: return floating_ip.ip networks = self.conn.servers.ips(server) for tenant in networks.keys(): for addre in networks[tenant]: return addre['addr'].encode('latin-1') return None
def perform(self, resource_handler): log.debug("[%s] Acquiring node state %r", resource_handler.name, self.instance_data['node_id']) try: server = self.conn.servers.get(self.instance_data['instance_id']) except Exception as ex: raise NodeCreationError(None, str(ex)) inst_state = server.status try: retval = STATE_MAPPING[inst_state] except KeyError: raise NotImplementedError('Unknown Nova state', inst_state) else: log.debug("[%s] Done; nova_state=%r; status=%r", resource_handler.name, inst_state, retval) return retval
def __init__(self, endpoint, regionname, auth_data, name=None, dry_run=False, **config): self.dry_run = dry_run self.name = name if name else endpoint self.endpoint = endpoint self.regionname = regionname if (not auth_data) or (not "accesskey" in auth_data) or (not "secretkey" in auth_data): errormsg = "Cannot find credentials for \"" + endpoint + "\". Please, specify!" log.debug(errormsg) raise NodeCreationError(None, errormsg) self.auth_data = auth_data
def perform(self, resource_handler): """ Terminate a VM instance. :param instance_data: Information necessary to access the VM instance. :type instance_data: :ref:`Instance Data <instancedata>` """ instance_id = self.instance_data.get('instance_id') if not instance_id: return log.debug("[%s] Dropping node %r", resource_handler.name, self.instance_data['node_id']) try: self._delete_vms(resource_handler, instance_id) except Exception as ex: raise NodeCreationError(None, str(ex)) log.debug("[%s] Done", resource_handler.name)
def __init__(self, endpoint, auth_data, name=None, dry_run=False, **config): self.endpoint = endpoint if (not auth_data) or (not "subscription_id" in auth_data) or ( not "tenant_id" in auth_data) or ( not "client_id" in auth_data) or (not "client_secret" in auth_data): errormsg = "Cannot find credentials for \"" + endpoint + "\". Please, specify!" log.debug(errormsg) raise NodeCreationError(None, errormsg) self.auth_data = auth_data self.subscription_id = auth_data['subscription_id'] self.tenant_id = auth_data['tenant_id'] self.client_id = auth_data['client_id'] self.client_secret = auth_data['client_secret'] self.dry_run = dry_run self.name = name if name else endpoint
def perform(self, resource_handler): log.debug("[%s] Creating node: %r", resource_handler.name, self.resolved_node_definition['name']) try: server = None server = self._start_instance(resource_handler, self.resolved_node_definition) log.debug("[%s] Server instance created, id: %r", resource_handler.name, server.id) self._allocate_floating_ip(resource_handler, server) except KeyboardInterrupt: try: if server is not None: log.debug( 'Interrupting node creation! Rolling back. Please, stand by!' ) self.conn.servers.delete(server) except Exception as ex: raise NodeCreationError(None, str(ex)) raise return server.id
def perform(self, resource_handler): log.debug("[%s] Creating node: %r", resource_handler.name, self.resolved_node_definition['name']) drv_id, srv_id = None, None try: drv_id, errormsg = self._clone_drive( resource_handler, self.resolved_node_definition['resource']['libdrive_id']) if not drv_id: log.error(errormsg) raise NodeCreationError(None, errormsg) drv_st, errormsg = self._get_drive_status(resource_handler, drv_id) while drv_st == 'cloning_dst': log.debug( "[%s] Waiting for cloned drive to enter unmounted state, currently %r", resource_handler.name, drv_st) time.sleep(5) drv_st, errormsg = self._get_drive_status( resource_handler, drv_id) if drv_st != 'unmounted' or drv_st == 'unknown': log.error(errormsg) self._delete_drive(resource_handler, drv_id) raise NodeCreationError(None, errormsg) srv_id, errormsg = self._create_server(resource_handler, drv_id) if not srv_id: log.error(errormsg) self._delete_drive(resource_handler, drv_id) raise NodeCreationError(None, errormsg) ret = False while not ret: ret, errormsg = self._start_server(resource_handler, srv_id) if not ret: log.debug(errormsg) #Query state to check if previous api call had positive effect json_data = get_server_json(resource_handler, srv_id) if json_data is not None and json_data.get('status') in [ 'starting', 'started', 'running' ]: log.debug( "Despite of failed server start, status of server is %s." + "Considering action success.", json_data.get('status')) ret = True else: log.debug("Result of state query: %s", json_data.get('status')) time.sleep(5) except KeyboardInterrupt: log.info( 'Interrupting node creation! Rolling back. Please, stand by!') if srv_id: srv_st = get_server_json(resource_handler, srv_id)['status'] while srv_st != 'stopped': log.debug("[%s] Server is in %s state.", resource_handler.name, srv_st) time.sleep(5) self._stop_server(resource_handler, srv_id) srv_st = get_server_json(resource_handler, srv_id)['status'] self._delete_server(resource_handler, srv_id) if drv_id: drv_st, _ = self._get_drive_status(resource_handler, drv_id) while drv_st not in ['unmounted', 'unknown']: log.debug("[%s] Drive is in %s state.", resource_handler.name, drv_st) time.sleep(5) drv_st, _ = self._get_drive_status(resource_handler, drv_id) self._delete_drive(resource_handler, drv_id) raise return srv_id
def _start_container(self, resource_handler): log.debug('Starting Azure ACI') location = self.res['location'].lower() self.resource_client.resource_groups.create_or_update( self.res['resource_group'], {'location': self.res['location']}) container_group_name = unique_vmname(self.node_def) network_type = self.res['network_type'] network_profile = None if 'gpu_type' in self.res: count = self.res['gpu_count'] if 'gpu_count' in self.res else 1 gpu = GpuResource(count=count, sku=self.res['gpu_type']) container_resource_requests = ResourceRequests( memory_in_gb=self.res['memory'], cpu=self.res['cpu_cores'], gpu=gpu) else: container_resource_requests = ResourceRequests( memory_in_gb=self.res['memory'], cpu=self.res['cpu_cores']) container_resource_requirements = ResourceRequirements( requests=container_resource_requests) ports = [] ipports = [] for porte in self.res.get('ports', []): port = porte protocol = 'TCP' if isinstance(porte, str) and '/' in porte: (port, protocol) = port.split('/') port = int(port) ports.append(ContainerPort(port=port, protocol=protocol)) ipports.append(Port(protocol=protocol, port=port)) environment = [] if network_type.lower() == 'public': pubip_var = EnvironmentVariable(name='_OCCOPUS_ALLOCATED_FQDN', value='%s.%s.azurecontainer.io' % (container_group_name, location)) environment.append(pubip_var) for env in self.env: edata = env.split('=', 1) if len(edata) != 2: continue env_var = EnvironmentVariable(name=edata[0], value=edata[1]) environment.append(env_var) container = Container( name=container_group_name, image=self.res['image'], resources=container_resource_requirements, ports=ports, command=self.command if self.command is not None else None, environment_variables=environment) if network_type.lower() == 'public': group_ip_address = IpAddress(ports=ipports, dns_name_label=container_group_name, type='Public') self.vnet_name = None elif network_type.lower() == 'private': vnet_name = unique_vmname(self.node_def) + '-vnet' if self.res.get( 'vnet_name', None) == None else self.res['vnet_name'] self.vnet_name = vnet_name subnet_name = unique_vmname( self.node_def) + '-subnet' if self.res.get( 'subnet_name', None) == None else self.res['subnet_name'] network_profile_name = unique_vmname(self.node_def) + '-netprofile' if self.res.get('vnet_name', None) == None: log.debug('Creating vnet') async_vnet_creation = self.network_client.virtual_networks.create_or_update( self.res['resource_group'], vnet_name, { 'location': location, 'address_space': { 'address_prefixes': ['10.0.0.0/16'] } }) async_vnet_creation.wait() self.created_resources['virtual_network'] = vnet_name log.debug('Created vnet') if self.res.get('subnet_name', None) == None: # Create Subnet log.debug('Creating Subnet') aci_delegation_service_name = "Microsoft.ContainerInstance/containerGroups" aci_delegation = Delegation( name=aci_delegation_service_name, service_name=aci_delegation_service_name) subnet = Subnet(name=subnet_name, location=location, address_prefix='10.0.0.0/24', delegations=[aci_delegation]) subnet_info = self.network_client.subnets.create_or_update( self.res['resource_group'], vnet_name, subnet_name, subnet).result() self.created_resources['subnet'] = subnet_name log.debug('Creatied Subnet') else: subnet_info = self.network_client.subnets.get( self.res['resource_group'], vnet_name, subnet_name) default_network_profile_name = "aci-network-profile-{}-{}".format( vnet_name, subnet_name) network_profile_ops = self.network_client.network_profiles network_profile = NetworkProfile( name=default_network_profile_name, location=location, container_network_interface_configurations=[ ContainerNetworkInterfaceConfiguration( name="eth0", ip_configurations=[ IPConfigurationProfile(name="ipconfigprofile", subnet=subnet_info) ]) ]) network_profile = network_profile_ops.create_or_update( self.res['resource_group'], network_profile_name, network_profile).result() group_ip_address = IpAddress(ports=ipports, type='Private') else: errormsg = '[{0}] Network type "{1}" is not supported. Please use either "Public" or "Private"'.format( resource_handler.name, network_type) log.debug(errormsg) raise NodeCreationError(None, errormsg) cg_network_profile = None if network_profile: cg_network_profile = ContainerGroupNetworkProfile( id=network_profile.id) self.created_resources['network_profile'] = network_profile_name group = ContainerGroup(location=location, containers=[container], os_type=self.res['os_type'], ip_address=group_ip_address, network_profile=cg_network_profile) # Create the container group self.aci_client.container_groups.create_or_update( self.res['resource_group'], container_group_name, group) return container_group_name
def _allocate_floating_ip(self, resource_handler, server): pool = self.resolved_node_definition['resource'].get( 'floating_ip_pool', None) if ('floating_ip' not in self.resolved_node_definition['resource'] ) and (pool is None): return flip_waiting = 5 flip_attempts = 60 attempts = 1 while attempts <= flip_attempts: unused_ips = [addr for addr in self.conn.floating_ips.list() \ if addr.instance_id is None and ( not pool or pool == addr.pool) ] if not unused_ips: if pool is not None: error_msg = '[{0}] Cannot find unused floating ip address in pool "{1}"!'.format( resource_handler.name, pool) else: error_msg = '[{0}] Cannot find unused floating ip address!'.format( resource_handler.name) server = self.conn.servers.get(server.id) self.conn.servers.delete(server) raise NodeCreationError(None, error_msg) log.debug("[%s] List of unused floating ips: %s", resource_handler.name, str([ip.ip for ip in unused_ips])) floating_ip = random.choice(unused_ips) try: log.debug( "[%s] Try associating floating ip (%s) to server (%s)...", resource_handler.name, floating_ip.ip, server.id) server.add_floating_ip(floating_ip) time.sleep(random.randint(1, 5)) flips = self.conn.floating_ips.list() log.debug("[%s] List of floating IPs: %s", resource_handler.name, flips) myallocation = [ addr for addr in flips if addr.instance_id == server.id ] if not myallocation: log.debug( "SOMEONE took my ip meanwhile I was allocating it!") raise Exception else: log.debug("ALLOCATION seemt to succeed: %r", myallocation[0]) log.debug( "[%s] Associating floating ip (%s) to node: success. Took %i seconds.", resource_handler.name, floating_ip.ip, (attempts - 1) * flip_waiting) break except Exception as e: log.debug(e) log.debug( "[%s] Associating floating ip (%s) to node failed. Retry after %i seconds...", resource_handler.name, floating_ip.ip, flip_waiting) time.sleep(flip_waiting) attempts += 1 if attempts > flip_attempts: error_msg = '[{0}] Gave up associating floating ip to node! Could not get it in {1} seconds."'.format( resource_handler.name, flip_attempts * flip_waiting) log.error(error_msg) server = self.conn.servers.get(server.id) self.conn.servers.delete(server) raise NodeCreationError(None, error_msg) return