class EC2Cluster(cluster_tools.ICluster): VM_STATES = { "running": "Running", "pending": "Starting", "shutting-down": "Shutdown", "terminated": "Shutdown", "error": "Error", } ERROR = 1 DEFAULT_INSTANCE_TYPE = config.default_VMInstanceType if config.default_VMInstanceType else "m1.small" DEFAULT_INSTANCE_TYPE_LIST = _attr_list_to_dict( config.default_VMInstanceTypeList) def _get_connection(self): """ _get_connection - get a boto connection object to this cluster returns a boto connection object, or none in the case of an error """ connection = None if len(self.regions) > 0: region_name = self.regions[0] else: region_name = self.name if self.cloud_type == "AmazonEC2": try: region = boto.ec2.regioninfo.RegionInfo( name=region_name, endpoint=self.network_address) connection = boto.connect_ec2( aws_access_key_id=self.access_key_id, aws_secret_access_key=self.secret_access_key, region=region) log.verbose("Created a connection to Amazon EC2") except boto.exception.EC2ResponseError, e: log.error("Couldn't connect to Amazon EC2 because: %s" % e.error_message) elif self.cloud_type == "Eucalyptus": try: region = boto.ec2.regioninfo.RegionInfo( name=region_name, endpoint=self.network_address) connection = boto.connect_ec2( aws_access_key_id=self.access_key_id, aws_secret_access_key=self.secret_access_key, is_secure=self.secure_connection, region=region, port=8773, path="/services/Eucalyptus", ) log.verbose("Created a connection to Eucalyptus (%s)" % self.name) except boto.exception.EC2ResponseError, e: log.error("Couldn't connect to Eucalyptus EC2 because: %s" % e.error_message)
def vm_create(self, vm_name, vm_type, vm_user, vm_networkassoc, vm_image, vm_mem, vm_cores, vm_storage, customization=None, pre_customization=None, vm_keepalive=0, instance_type="", maximum_price=0, job_per_core=False, securitygroup=[], key_name="", use_cloud_init=False, extra_userdata=[]): """Attempt to boot a new VM on the cluster.""" use_cloud_init = use_cloud_init or config_val.getboolean( 'global', 'use_cloud_init') log.verbose("Trying to boot %s on %s" % (vm_type, self.network_address)) if len(securitygroup) != 0: sec_group = [] for group in securitygroup: if group in self.security_groups: sec_group.append(group) if len(sec_group) == 0: log.debug( "No matching security groups - trying default config") sec_group = self.security_groups else: sec_group = self.security_groups try: if self.name in vm_image.keys(): vm_ami = vm_image[self.name] else: vm_ami = vm_image[self.network_address] except: log.debug("No AMI for %s, trying default", self.network_address) #try: # vm_ami = vm_image["default"] #except: #log.debug("No given default - trying global defaults") try: vm_default_ami = _attr_list_to_dict( config_val.get('job', 'default_VMAMI')) if self.name in vm_default_ami.keys(): vm_ami = vm_default_ami[self.name] else: vm_ami = vm_default_ami[self.network_address] except: try: vm_ami = vm_default_ami["default"] except: log.exception("Can't find a suitable AMI") self.failed_image_set.add(vm_ami) return try: if self.name in instance_type.keys(): i_type = instance_type[self.name] elif self.network_address in instance_type.keys(): i_type = instance_type[self.network_address] else: i_type = instance_type["default"] except: log.debug("No instance type for %s, trying default", self.name) #try: # i_type = instance_type["default"] #except: # if isinstance(instance_type, str): # i_type = instance_type # else: try: if self.name in self.DEFAULT_INSTANCE_TYPE_LIST.keys(): i_type = self.DEFAULT_INSTANCE_TYPE_LIST[self.name] else: i_type = self.DEFAULT_INSTANCE_TYPE_LIST[ self.network_address] except: log.debug( "No default instance type found for %s, trying single default", self.network_address) i_type = self.DEFAULT_INSTANCE_TYPE instance_type = i_type if key_name is None: key_name = self.key_name if customization: user_data = cloud_init_util.build_write_files_cloud_init( customization) else: user_data = "" if pre_customization: user_data = cloud_init_util.inject_customizations( pre_customization, user_data) elif use_cloud_init: user_data = cloud_init_util.inject_customizations([], user_data)[0] if len(extra_userdata) > 0: # need to use the multi-mime type functions user_data = cloud_init_util.build_multi_mime_message( [(user_data, 'cloud-config', 'cloud_conf.yaml')], extra_userdata) if not user_data: log.error("Problem building cloud-config user data.") return self.ERROR if self.cloud_type == "AmazonEC2" and vm_networkassoc != "public": log.debug( "You requested '%s' networking, but EC2 only supports 'public'", vm_networkassoc) addressing_type = "public" else: addressing_type = vm_networkassoc try: connection = self._get_connection() image = None if self.cloud_type != "Eucalyptus": image = connection.get_image(vm_ami) else: #HACK: for some reason Eucalyptus won't respond properly to # get_image("whateverimg"). Use a linear search until # this is fixed # This is Eucalyptus bug #495670 # https://bugs.launchpad.net/eucalyptus/+bug/495670 images = connection.get_all_images() for potential_match in images: if potential_match.id == vm_ami: image = potential_match break # Compress the user data to try and get under the limit user_data = utilities.gzip_userdata(user_data) if image: # don't request a spot instance if maximum_price is 0 or self.cloud_type == "OpenStack": try: reservation = image.run( 1, 1, key_name=key_name, addressing_type=addressing_type, user_data=user_data, placement=self.placement_zone, security_groups=sec_group, instance_type=instance_type) instance_id = reservation.instances[0].id log.debug("Booted VM %s", instance_id) except boto.exception.EC2ResponseError, e: log.exception( "There was a problem creating an EC2 instance: %s", e) return self.ERROR except Exception, e: log.exception( "There was an unexpected problem creating an EC2 instance: %s", e) return self.ERROR
def vm_create(self, vm_name, vm_type, vm_user, vm_networkassoc, vm_image, vm_mem, vm_cores, vm_storage, customization=None, vm_keepalive=0, instance_type="", job_per_core=False, securitygroup=[],key_name="", pre_customization=None, use_cloud_init=False, extra_userdata=[]): """ Create a VM on OpenStack.""" import novaclient.exceptions use_cloud_init = use_cloud_init or config.use_cloud_init nova = self._get_creds_nova() if len(securitygroup) != 0: sec_group = [] for group in securitygroup: if group in self.security_groups: sec_group.append(group) if len(sec_group) == 0: log.debug("No defined security groups for job - trying default value from cloud_resources.conf") sec_group = self.security_groups else: sec_group = self.security_groups log.debug("Using security group: %s" % str(sec_group)) if key_name and len(key_name) > 0: if not nova.keypairs.findall(name=key_name): key_name = "" else: key_name = self.key_name if self.key_name else "" if customization: if not use_cloud_init: user_data = nimbus_xml.ws_optional(customization) else: user_data = cloud_init_util.build_write_files_cloud_init(customization) else: user_data = "" if pre_customization: if not use_cloud_init: for item in pre_customization: user_data = '\n'.join([item, user_data]) else: user_data = cloud_init_util.inject_customizations(pre_customization, user_data) elif use_cloud_init: user_data = cloud_init_util.inject_customizations([], user_data) if len(extra_userdata) > 0: # need to use the multi-mime type functions user_data = cloud_init_util.build_multi_mime_message([(user_data, 'cloud-config', 'cloud_conf.yaml')], extra_userdata) # Compress the user data to try and get under the limit user_data = utilities.gzip_userdata(user_data) try: if self.name in vm_image.keys(): image = vm_image[self.name] elif self.network_address in vm_image.keys(): image = vm_image[self.network_address] else: image = vm_image['default'] except: try: vm_default_ami = _attr_list_to_dict(config.default_VMAMI) if self.name in vm_default_ami.keys(): image = vm_default_ami[self.name] else: image = vm_default_ami[self.network_address] except: try: image = vm_default_ami["default"] except: log.exception("Can't find a suitable AMI") return try: imageobj = nova.images.find(name=image) except novaclient.exceptions.EndpointNotFound: log.error("Endpoint not found, are your region settings correct for %s" % self.name) return -4 except Exception as e: log.warning("Exception occurred while trying to fetch image via name: %s %s" % (image, e)) try: imageobj = nova.images.get(image) log.debug("Got image via uuid: %s" % image) except novaclient.exceptions.EndpointNotFound: log.error("Endpoint not found, are your region settings correct for %s" % self.name) return -4 except Exception as e: log.exception("Unable to fetch image via uuid: %s %s" % (image, e)) self.failed_image_set.add(image) return try: if self.name in instance_type.keys(): i_type = instance_type[self.name] elif self.network_address in instance_type.keys(): i_type = instance_type[self.network_address] else: i_type = instance_type['default'] except: log.debug("No instance type for %s, trying default" % self.network_address) try: if self.name in self.DEFAULT_INSTANCE_TYPE_LIST.keys(): i_type = self.DEFAULT_INSTANCE_TYPE_LIST[self.name] else: i_type = self.DEFAULT_INSTANCE_TYPE_LIST[self.network_address] except: log.debug("No default instance type found for %s, trying single default" % self.network_address) i_type = self.DEFAULT_INSTANCE_TYPE try: flavor = nova.flavors.find(name=i_type) except Exception as e: log.warning("Exception occurred while trying to get flavor by name: %s - will attempt to use name value as a uuid." % e) try: flavor = nova.flavors.get(i_type) log.debug("Got flavor via uuid: %s" % i_type) except Exception as ex: log.error("Exception occurred trying to get flavor by uuid: %s" % ex) return self.flavor_set.add(flavor) # find the network id to use if more than one network if vm_networkassoc: network = self._find_network(vm_networkassoc) if network: netid = [{'net-id': network.id}] else: log.debug("Unable to find network named: %s on %s" % (vm_networkassoc, self.name)) netid = [] elif self.network_pools and len(self.network_pools) > 0: network = self._find_network(self.network_pools[0]) if network: netid = [{'net-id': network.id}] else: log.debug("Unable to find network named: %s on %s" % (self.network_pools[0], self.name)) netid = [] else: netid = [] # Need to get the rotating hostname from the google code to use for here. name = self._generate_next_name() instance = None if name: try: instance = nova.servers.create(name=name, image=imageobj, flavor=flavor, key_name=key_name, availability_zone=self.placement_zone, nics =netid, userdata=user_data, security_groups=sec_group) #print instance.__dict__ except novaclient.exceptions.OverLimit as e: log.info("Unable to create VM without exceeded quota on %s: %s" % (self.name, e.message)) except Exception as e: #print e log.error("Unhandled exception while creating vm on %s: %s" %(self.name, e)) if instance: instance_id = instance.id if not vm_keepalive and self.keep_alive: #if job didn't set a keep_alive use the clouds default vm_keepalive = self.keep_alive new_vm = cluster_tools.VM(name = vm_name, id = instance_id, vmtype = vm_type, user = vm_user, clusteraddr = self.network_address, hostname = ''.join([name, self.vm_domain_name]), cloudtype = self.cloud_type, network = vm_networkassoc, image= vm_image, flavor=flavor.name, memory = vm_mem, cpucores = vm_cores, storage = vm_storage, keep_alive = vm_keepalive, job_per_core = job_per_core) try: self.resource_checkout(new_vm) log.info("Launching 1 VM: %s on %s under tenant: %s" % (instance_id, self.name, self.tenant_name)) except: log.error("Unexpected Error checking out resources when creating a VM. Programming error?") self.vm_destroy(new_vm, reason="Failed Resource checkout") return self.ERROR self.vms.append(new_vm) else: log.debug("Failed to create instance on %s" % self.name) return self.ERROR else: log.debug("Unable to generate name for %" % self.name) return self.ERROR return 0
class GoogleComputeEngineCluster(cluster_tools.ICluster): """ Connector class for Google Compute Engine. """ VM_STATES = { "RUNNING" : "Running", "Starting" : "Starting", "shutting-down" : "Shutdown", "terminated" : "Shutdown", "PROVISIONING" : "Provisioning", "error" : "Error", } GCE_SCOPE = 'https://www.googleapis.com/auth/compute' API_VERSION = 'v1' GCE_URL = 'https://www.googleapis.com/compute/%s/projects/' % (API_VERSION) DEFAULT_ZONE = 'us-central1-b' # will need to be option in job DEFAULT_MACHINE_TYPE = 'n1-standard-1' # option specified in job config DEFAULT_INSTANCE_TYPE_LIST = _attr_list_to_dict(config.config_options.get('job', 'default_VMInstanceTypeList')) DEFAULT_IMAGE = 'cloudscheduler-centos-9' DEFAULT_ROOT_PD_NAME = 'hepgc-uvic-root-pd' DEFAULT_NETWORK = 'default' # job option setup DEFAULT_SERVICE_EMAIL = 'default' DEFAULT_SCOPES = ['https://www.googleapis.com/auth/devstorage.full_control', 'https://www.googleapis.com/auth/compute'] def __init__(self, name="Dummy Cluster", host="localhost", cloud_type="Dummy", memory=[], max_vm_mem=-1, networks=[], vm_slots=0, cpu_cores=0, storage=0, boot_timeout=None, auth_dat_file=None, secret_file=None, security_group=None, project_id=None, enabled=True, priority=0, total_cpu_cores=-1, keep_alive=0,): """ Google Compute constructor :param name: :param host: :param cloud_type: :param memory: :param max_vm_mem: :param networks: :param vm_slots: :param cpu_cores: :param storage: :param boot_timeout: :param auth_dat_file: :param secret_file: :param security_group: :param project_id: :param enabled: :param priority: :param total_cpu_cores: :param keep_alive: """ log.debug("Init GCE cores %s, storage %s", cpu_cores, storage) self.gce_hostname_prefix = 'gce-cs-vm' self.security_group = security_group self.auth_dat_file_path = auth_dat_file self.secret_file_path = secret_file self.project_id = project_id self.total_cpu_cores = total_cpu_cores self.keep_alive = keep_alive if not project_id: return None # Perform OAuth 2.0 authorization. flow = flow_from_clientsecrets(self.secret_file_path, scope=self.GCE_SCOPE) auth_storage = Storage(self.auth_dat_file_path) credentials = auth_storage.get() if credentials is None or credentials.invalid: credentials = run_flow(flow, auth_storage) http = httplib2.Http() self.auth_http = credentials.authorize(http) #if not security_group: # security_group = ["default"] #self.security_groups = security_group # Build service object self.gce_service = build('compute', self.API_VERSION) self.project_url = '%s%s' % (self.GCE_URL, self.project_id) # Call super class's init cluster_tools.ICluster.__init__(self, name=name, host=host, cloud_type=cloud_type, memory=memory, max_vm_mem=max_vm_mem, networks=networks, vm_slots=vm_slots, cpu_cores=cpu_cores, storage=storage, boot_timeout=boot_timeout, enabled=enabled, priority=priority, keep_alive=0,) def vm_create(self, vm_type, vm_user, vm_networkassoc, vm_image, vm_mem, vm_cores, vm_storage, customization=None, vm_keepalive=0, instance_type="", job_per_core=False, pre_customization=None, use_cloud_init=False, extra_userdata=[]): """ Create a VM on GCE. :param vm_type: :param vm_user: :param vm_networkassoc: :param vm_image: :param vm_mem: :param vm_cores: :param vm_storage: :param customization: :param vm_keepalive: :param instance_type: :param job_per_core: :param pre_customization: :param use_cloud_init: :param extra_userdata: :return: """ try: if self.network_address in vm_image.keys(): vm_ami = vm_image[self.network_address] elif self.name in vm_image.keys(): vm_ami = vm_image[self.name] except: log.debug("No AMI for %s, trying default", self.network_address) try: vm_ami = vm_image["default"] except: log.exception("Can't find a suitable AMI") return # Construct URLs try: if self.name in instance_type.keys(): i_type = instance_type[self.name] else: i_type = instance_type[self.network_address] except: log.debug("No instance type for %s, trying default", self.network_address) try: if self.name in self.DEFAULT_INSTANCE_TYPE_LIST.keys(): i_type = self.DEFAULT_INSTANCE_TYPE_LIST[self.name] else: i_type = self.DEFAULT_INSTANCE_TYPE_LIST[self.network_address] except: log.debug("No default instance type found for %s, trying single default", self.network_address) i_type = self.DEFAULT_MACHINE_TYPE vm_instance_type = i_type if vm_image: vm_image_name = vm_ami else: vm_image_name = self.DEFAULT_IMAGE #Ensures the VM's Root Disks are Unique self.DEFAULT_ROOT_PD_NAME = '%s-%s'%('hepgc-uvic-root-pd', self.generate_next_instance_name()) #temporary variable for disk_url disk_url = '%s%s/zones/%s/disks/%s'%(self.GCE_URL, self.project_id, self.DEFAULT_ZONE, self.DEFAULT_ROOT_PD_NAME) machine_type_url = '%s/zones/%s/machineTypes/%s' % ( self.project_url, self.DEFAULT_ZONE, vm_instance_type) #zone_url = '%s/zones/%s' % (self.project_url, self.DEFAULT_ZONE) network_url = '%s/global/networks/%s' % (self.project_url, self.DEFAULT_NETWORK) # Construct the request body disk = { 'name': self.DEFAULT_ROOT_PD_NAME, 'sourceSnapshot': 'https://www.googleapis.com/compute/v1/projects/atlasgce/global/snapshots/%s'%vm_image_name, 'sizeGb':vm_storage } # Create the root pd try: request = self.gce_service.disks().insert(project=self.project_id, body=disk, zone=self.DEFAULT_ZONE) response = request.execute(http=self.auth_http) response = self._blocking_call(self.gce_service, self.auth_http, response) except: log.exception('Error Trying to create disk, one already exists ... returning ') return use_cloud_init = use_cloud_init or config.config_options.getboolean('global', 'use_cloud_init') if customization: user_data = cloud_init_util.build_write_files_cloud_init(customization) else: user_data = "" if pre_customization: user_data = cloud_init_util.inject_customizations(pre_customization, user_data) elif use_cloud_init: user_data = cloud_init_util.inject_customizations([], user_data)[0] if len(extra_userdata) > 0: # need to use the multi-mime type functions user_data = cloud_init_util.build_multi_mime_message([(user_data, 'cloud-config')], extra_userdata) if not user_data: log.error("Problem building cloud-config user data.") return 1 # Compress the user data to try and get under the limit user_data = utilities.gzip_userdata(user_data) next_instance_name = self.generate_next_instance_name() instance = { 'name': next_instance_name, 'machineType': machine_type_url, #'disks': [{ # 'autoDelete': 'true', # 'boot': 'true', # 'type': 'PERSISTENT', # 'initializeParams' : { # 'diskname': self.DEFAULT_ROOT_PD_NAME, # 'sourceImage': image_url # } # }], 'disks': [{ 'source':disk_url, 'boot': 'true', 'autoDelete':'true', 'type': 'PERSISTENT', }], #'image': image_url, 'networkInterfaces': [{ 'accessConfigs': [{ 'type': 'ONE_TO_ONE_NAT', 'name': 'External NAT' }], 'network': network_url }], 'serviceAccounts': [{ 'email': self.DEFAULT_SERVICE_EMAIL, 'scopes': self.DEFAULT_SCOPES }], 'metadata': { 'items': [ { 'key': 'user-data', 'value': user_data, }, ] } } # Create the instance response = None request = self.gce_service.instances().insert( project=self.project_id, body=instance, zone=self.DEFAULT_ZONE) try: response = request.execute(http=self.auth_http) response = self._blocking_call(self.gce_service, self.auth_http, response) #log.info('Created VM ') except Exception, e: log.error("Error creating VM on gce: %s", e) if response and 'targetId' in response: target_id = response['targetId'] elif response: return else: #print 'no response' return #if job didn't set a keep alive use the clouds default if not vm_keepalive and self.keep_alive: vm_keepalive = self.keep_alive new_vm = cluster_tools.VM(name=next_instance_name, vmtype=vm_type, user=vm_user, clusteraddr=self.network_address, id=target_id, cloudtype=self.cloud_type, network=vm_networkassoc, hostname=self.construct_hostname(next_instance_name), image=vm_image, flavor=vm_instance_type, memory=vm_mem, cpucores=vm_cores, storage=vm_storage, keep_alive=vm_keepalive, job_per_core=job_per_core) try: self.resource_checkout(new_vm) except: log.exception("Unexpected Error checking out resources when creating a VM. Programming error?") self.vm_destroy(new_vm, reason="Failed Resource checkout", return_resources=False) return self.ERROR self.vms.append(new_vm) #log.info("added a new vm %s"%new_vm) return 0
def vm_create(self, vm_name, vm_type, vm_user, vm_networkassoc, vm_cpuarch, vm_image, vm_mem, vm_cores, vm_storage, customization=None, vm_keepalive=0, instance_type="", maximum_price=0, job_per_core=False, securitygroup=[],key_name=""): """Attempt to boot a new VM on the cluster.""" #print vm_image #print instance_type #print securitygroup log.verbose("Trying to boot %s on %s" % (vm_type, self.network_address)) if len(securitygroup) != 0: sec_group = [] for group in securitygroup: if group in self.security_groups: sec_group.append(group) if len(sec_group) == 0: log.warning("No matching security groups - trying default config") sec_group = self.security_groups #sec_group.append("default") - don't just append default use what is in cloud_resources.conf for this cloud else: sec_group = self.security_groups try: if self.name in vm_image.keys(): vm_ami = vm_image[self.name] else: vm_ami = vm_image[self.network_address] except: log.debug("No AMI for %s, trying default" % self.network_address) #try: # vm_ami = vm_image["default"] #except: #log.debug("No given default - trying global defaults") try: vm_default_ami = _attr_list_to_dict(config.default_VMAMI) if self.name in vm_default_ami.keys(): vm_ami = vm_default_ami[self.name] else: vm_ami = vm_default_ami[self.network_address] except: try: vm_ami = vm_default_ami["default"] except: log.exception("Can't find a suitable AMI") return try: if self.name in instance_type.keys(): i_type = instance_type[self.name] else: i_type = instance_type[self.network_address] except: log.debug("No instance type for %s, trying default" % self.network_address) #try: # i_type = instance_type["default"] #except: # if isinstance(instance_type, str): # i_type = instance_type # else: try: if self.name in self.DEFAULT_INSTANCE_TYPE_LIST.keys(): i_type = self.DEFAULT_INSTANCE_TYPE_LIST[self.name] else: i_type = self.DEFAULT_INSTANCE_TYPE_LIST[self.network_address] except: log.debug("No default instance type found for %s, trying single default" % self.network_address) i_type = self.DEFAULT_INSTANCE_TYPE instance_type = i_type if key_name == None: key_name = self.key_name if customization: user_data = nimbus_xml.ws_optional(customization) else: user_data = "" if "AmazonEC2" == self.cloud_type and vm_networkassoc != "public": log.debug("You requested '%s' networking, but EC2 only supports 'public'" % vm_networkassoc) addressing_type = "public" else: addressing_type = vm_networkassoc try: connection = self._get_connection() image = None if not "Eucalyptus" == self.cloud_type: image = connection.get_image(vm_ami) else: #HACK: for some reason Eucalyptus won't respond properly to # get_image("whateverimg"). Use a linear search until # this is fixed # This is Eucalyptus bug #495670 # https://bugs.launchpad.net/eucalyptus/+bug/495670 images = connection.get_all_images() for potential_match in images: if potential_match.id == vm_ami: image = potential_match break if image: if maximum_price is 0: # don't request a spot instance try: reservation = image.run(1,1, key_name=key_name, addressing_type=addressing_type, user_data=user_data, placement=self.placement_zone, security_groups=sec_group, instance_type=instance_type) instance_id = reservation.instances[0].id log.debug("Booted VM %s" % instance_id) except: log.exception("There was a problem creating an EC2 instance...") return self.ERROR else: # get a spot instance of no more than maximum_price try: price_in_dollars = str(float(maximum_price) / 100) reservation = connection.request_spot_instances( price_in_dollars, image.id, key_name=key_name, user_data=user_data, placement=self.placement_zone, addressing_type=addressing_type, security_groups=self.sec_group, instance_type=instance_type) spot_id = str(reservation[0].id) instance_id = "" log.debug("Reserved instance %s at no more than %s" % (spot_id, price_in_dollars)) except AttributeError: log.exception("Your version of boto doesn't seem to support "\ "spot instances. You need at least 1.9") return self.ERROR except: log.exception("Problem creating an EC2 spot instance...") return self.ERROR else: log.error("Couldn't find image %s on %s" % (vm_image, self.name)) return self.ERROR except: log.exception("Problem creating EC2 instance on on %s" % self.name) return self.ERROR vm_mementry = self.find_mementry(vm_mem) if (vm_mementry < 0): #TODO: this is kind of pointless with EC2... log.debug("Cluster memory list has no sufficient memory " +\ "entries (Not supposed to happen). Returning error.") return self.ERROR log.verbose("vm_create - Memory entry found in given cluster: %d" % vm_mementry) new_vm = cluster_tools.VM(name = vm_name, id = instance_id, vmtype = vm_type, user = vm_user, clusteraddr = self.network_address, cloudtype = self.cloud_type, network = vm_networkassoc, cpuarch = vm_cpuarch, image= vm_image, memory = vm_mem, mementry = vm_mementry, cpucores = vm_cores, storage = vm_storage, keep_alive = vm_keepalive, job_per_core = job_per_core) try: new_vm.spot_id = spot_id except: log.verbose("No spot ID to add to VM %s" % instance_id) try: self.resource_checkout(new_vm) except: log.exception("Unexpected Error checking out resources when creating a VM. Programming error?") self.vm_destroy(new_vm, reason="Failed Resource checkout") return self.ERROR self.vms.append(new_vm) return 0
def vm_create(self, vm_name, vm_type, vm_user, vm_networkassoc, vm_image, vm_mem, vm_cores, vm_storage, customization=None, pre_customization=None, vm_keepalive=0, instance_type="", maximum_price=0, job_per_core=False, securitygroup=[], key_name="",use_cloud_init=False, extra_userdata=[]): """Attempt to boot a new VM on the cluster.""" use_cloud_init = use_cloud_init or config.use_cloud_init log.verbose("Trying to boot %s on %s" % (vm_type, self.network_address)) if len(securitygroup) != 0: sec_group = [] for group in securitygroup: if group in self.security_groups: sec_group.append(group) if len(sec_group) == 0: log.debug("No matching security groups - trying default config") sec_group = self.security_groups #sec_group.append("default") - don't just append default use what is in cloud_resources.conf for this cloud else: sec_group = self.security_groups try: if self.name in vm_image.keys(): vm_ami = vm_image[self.name] else: vm_ami = vm_image[self.network_address] except: log.debug("No AMI for %s, trying default" % self.network_address) #try: # vm_ami = vm_image["default"] #except: #log.debug("No given default - trying global defaults") try: vm_default_ami = _attr_list_to_dict(config.default_VMAMI) if self.name in vm_default_ami.keys(): vm_ami = vm_default_ami[self.name] else: vm_ami = vm_default_ami[self.network_address] except: try: vm_ami = vm_default_ami["default"] except: log.exception("Can't find a suitable AMI") self.failed_image_set.add(vm_ami) return try: if self.name in instance_type.keys(): i_type = instance_type[self.name] elif self.network_address in instance_type.keys(): i_type = instance_type[self.network_address] else: i_type = instance_type["default"] except: log.debug("No instance type for %s, trying default" % self.network_address) #try: # i_type = instance_type["default"] #except: # if isinstance(instance_type, str): # i_type = instance_type # else: try: if self.name in self.DEFAULT_INSTANCE_TYPE_LIST.keys(): i_type = self.DEFAULT_INSTANCE_TYPE_LIST[self.name] else: i_type = self.DEFAULT_INSTANCE_TYPE_LIST[self.network_address] except: log.debug("No default instance type found for %s, trying single default" % self.network_address) i_type = self.DEFAULT_INSTANCE_TYPE instance_type = i_type if key_name == "" or key_name == None: key_name = self.key_name if self.key_name else "" if customization: if not use_cloud_init: user_data = nimbus_xml.ws_optional(customization) else: user_data = cloud_init_util.build_write_files_cloud_init(customization) else: user_data = "" if pre_customization: if not use_cloud_init: for item in pre_customization: user_data = '\n'.join([item, user_data]) else: user_data = cloud_init_util.inject_customizations(pre_customization, user_data) elif use_cloud_init: user_data = cloud_init_util.inject_customizations([], user_data)[0] if len(extra_userdata) > 0: # need to use the multi-mime type functions user_data = cloud_init_util.build_multi_mime_message([(user_data, 'cloud-config', 'cloud_conf.yaml')], extra_userdata) if "AmazonEC2" == self.cloud_type and vm_networkassoc != "public": log.debug("You requested '%s' networking, but EC2 only supports 'public'" % vm_networkassoc) addressing_type = "public" else: addressing_type = vm_networkassoc user_data = utilities.gzip_userdata(user_data) try: client = self._get_connection() #Uncomment for debugging boto calls #boto3.set_stream_logger('botocore') #boto_file_handler = logging.handlers.WatchedFileHandler('/tmp/csboto3.yaml', ) #botolog = logging.getLogger('botocore') #botolog.addHandler(boto_file_handler) resp = client.run_instances(ImageId=vm_ami, MinCount=1, MaxCount=1, InstanceType=instance_type, UserData=user_data, KeyName=key_name, SecurityGroups=sec_group) # will need to figure out how PlacementGroups will work still will probably just be Placement={"AvailabilityZone':placement_zone} except Exception as e: log.error("Problem creating instance %s" % e.__dict__) return self.ERROR if not vm_keepalive and self.keep_alive: #if job didn't set a keep_alive use the clouds default vm_keepalive = self.keep_alive if 'Instances' in resp.keys(): new_vm_id =resp['Instances'][0]['InstanceId'] else: #print resp.keys() new_vm_id = "unable to get id" return self.ERROR new_vm = cluster_tools.VM(name = vm_name, id = new_vm_id, vmtype = vm_type, user = vm_user, clusteraddr = self.network_address, cloudtype = self.cloud_type, network = vm_networkassoc, image= vm_ami, flavor=instance_type, memory = vm_mem, cpucores = vm_cores, storage = vm_storage, keep_alive = vm_keepalive, job_per_core = job_per_core) #try: # new_vm.spot_id = spot_id #except: # log.verbose("No spot ID to add to VM %s" % instance_id) try: self.resource_checkout(new_vm) except: log.exception("Unexpected Error checking out resources when creating a VM. Programming error?") self.vm_destroy(new_vm, reason="Failed Resource checkout", return_resources=False) return self.ERROR self.vms.append(new_vm) return 0
class GoogleComputeEngineCluster(cluster_tools.ICluster): GCE_SCOPE = 'https://www.googleapis.com/auth/compute' API_VERSION = 'v1beta15' GCE_URL = 'https://www.googleapis.com/compute/%s/projects/' % (API_VERSION) DEFAULT_ZONE = 'us-central1-a' # will need to be option in job DEFAULT_MACHINE_TYPE = 'n1-standard-1-d' # option specified in job config DEFAULT_INSTANCE_TYPE_LIST = _attr_list_to_dict( config.default_VMInstanceTypeList) DEFAULT_IMAGE = 'condorimagebase' DEFAULT_NETWORK = 'default' # job option setup DEFAULT_SERVICE_EMAIL = 'default' DEFAULT_SCOPES = [ 'https://www.googleapis.com/auth/devstorage.full_control', 'https://www.googleapis.com/auth/compute' ] def __init__(self, name="Dummy Cluster", host="localhost", cloud_type="Dummy", memory=[], max_vm_mem=-1, cpu_archs=[], networks=[], vm_slots=0, cpu_cores=0, storage=0, hypervisor='xen', boot_timeout=None, auth_dat_file=None, secret_file=None, security_group=None, project_id=None): self.gce_hostname_prefix = 'gce-cs-vm' self.gce_hostname_counter = 0 self.security_group = security_group self.auth_dat_file_path = auth_dat_file self.secret_file_path = secret_file self.project_id = project_id if not project_id: return None # Perform OAuth 2.0 authorization. flow = flow_from_clientsecrets(self.secret_file_path, scope=self.GCE_SCOPE) auth_storage = Storage(self.auth_dat_file_path) credentials = auth_storage.get() if credentials is None or credentials.invalid: credentials = run(flow, auth_storage) http = httplib2.Http() self.auth_http = credentials.authorize(http) # Build service object self.gce_service = build('compute', self.API_VERSION) self.project_url = self.GCE_URL + self.project_id # Call super class's init cluster_tools.ICluster.__init__(self, name=name, host=host, cloud_type=cloud_type, memory=memory, max_vm_mem=max_vm_mem, cpu_archs=cpu_archs, networks=networks, vm_slots=vm_slots, cpu_cores=cpu_cores, storage=storage, hypervisor=hypervisor, boot_timeout=boot_timeout) def vm_create(self, vm_name, vm_type, vm_user, vm_networkassoc, vm_cpuarch, vm_image, vm_mem, vm_cores, vm_storage, customization=None, vm_keepalive=0, instance_type="", maximum_price=0, job_per_core=False, securitygroup=[]): try: vm_ami = vm_image[self.network_address] except: log.debug("No AMI for %s, trying default" % self.network_address) try: vm_ami = vm_image["default"] except: log.exception("Can't find a suitable AMI") return # Construct URLs #if instance_type: # vm_instance_type = instance_type #else: # vm_instance_type = self.DEFAULT_MACHINE_TYPE try: if self.name in instance_type.keys(): i_type = instance_type[self.name] else: i_type = instance_type[self.network_address] except: log.debug("No instance type for %s, trying default" % self.network_address) try: if self.name in self.DEFAULT_INSTANCE_TYPE_LIST.keys(): i_type = self.DEFAULT_INSTANCE_TYPE_LIST[self.name] else: i_type = self.DEFAULT_INSTANCE_TYPE_LIST[ self.network_address] except: log.debug( "No default instance type found for %s, trying single default" % self.network_address) i_type = self.DEFAULT_MACHINE_TYPE instance_type = i_type if vm_image: vm_image_name = vm_ami else: vm_image_name = self.DEFAULT_IMAGE image_url = '%s%s/global/images/%s' % (self.GCE_URL, self.project_id, vm_image_name) machine_type_url = '%s/zones/%s/machineTypes/%s' % ( self.project_url, self.DEFAULT_ZONE, vm_instance_type) #zone_url = '%s/zones/%s' % (self.project_url, self.DEFAULT_ZONE) network_url = '%s/global/networks/%s' % (self.project_url, self.DEFAULT_NETWORK) if customization: user_data = nimbus_xml.ws_optional(customization) else: user_data = "" next_instance_name = self.generate_next_instance_name() # Construct the request body instance = { 'name': next_instance_name, 'machineType': machine_type_url, 'image': image_url, 'networkInterfaces': [{ 'accessConfigs': [{ 'type': 'ONE_TO_ONE_NAT', 'name': 'External NAT' }], 'network': network_url }], 'serviceAccounts': [{ 'email': self.DEFAULT_SERVICE_EMAIL, 'scopes': self.DEFAULT_SCOPES }], 'metadata': { 'items': [ { 'key': 'user-data', 'value': user_data, }, # { # 'key': 'startup-script', # 'value': user_script, # } ] } } # Create the instance response = None request = self.gce_service.instances().insert(project=self.project_id, body=instance, zone=self.DEFAULT_ZONE) try: response = request.execute(self.auth_http) response = self._blocking_call(self.gce_service, self.auth_http, response) except Exception, e: log.error("Error creating VM on gce: %s" % e) pass if response and 'targetId' in response: target_id = response['targetId'] elif response: #print 'targetID missing' #print response return else: #print 'no response' return vm_mementry = self.find_mementry(vm_mem) if (vm_mementry < 0): #TODO: this is kind of pointless with EC2..., but the resource code depends on it log.debug("Cluster memory list has no sufficient memory " +\ "entries (Not supposed to happen). Returning error.") return self.ERROR new_vm = cluster_tools.VM( name=next_instance_name, vmtype=vm_type, user=vm_user, clusteraddr=self.network_address, id=target_id, cloudtype=self.cloud_type, network=vm_networkassoc, hostname=self.construct_hostname(next_instance_name), cpuarch=vm_cpuarch, image=vm_image, mementry=vm_mementry, memory=vm_mem, cpucores=vm_cores, storage=vm_storage, keep_alive=vm_keepalive, job_per_core=job_per_core) try: self.resource_checkout(new_vm) except: log.exception( "Unexpected Error checking out resources when creating a VM. Programming error?" ) self.vm_destroy(new_vm, reason="Failed Resource checkout") return self.ERROR self.vms.append(new_vm) return 0
def vm_create(self, vm_name, vm_type, vm_user, vm_networkassoc, vm_image, vm_mem, vm_cores, vm_storage, customization=None, vm_keepalive=0, instance_type="", job_per_core=False, securitygroup=None, key_name="", pre_customization=None, use_cloud_init=False, extra_userdata=None): """ Create a VM on OpenStack.""" import novaclient.exceptions use_cloud_init = use_cloud_init or config_val.getboolean( 'global', 'use_cloud_init') nova = self._get_creds_nova_updated() if securitygroup: sec_group = [] for group in securitygroup: if group in self.security_groups: sec_group.append(group) if len(sec_group) == 0: log.debug("No defined security groups for job - " "trying default value from cloud_resources.conf") sec_group = self.security_groups else: sec_group = self.security_groups log.debug("Using security group: %s", str(sec_group)) if key_name and len(key_name) > 0: if not nova.keypairs.findall(name=key_name): key_name = "" else: key_name = self.key_name if self.key_name else "" if customization: user_data = cloud_init_util.build_write_files_cloud_init( customization) else: user_data = "" if pre_customization: if not use_cloud_init: for item in pre_customization: user_data = '\n'.join([item, user_data]) else: user_data = cloud_init_util.inject_customizations( pre_customization, user_data) elif use_cloud_init: user_data = cloud_init_util.inject_customizations([], user_data) if extra_userdata: # need to use the multi-mime type functions user_data = cloud_init_util.build_multi_mime_message( [(user_data, 'cloud-config', 'cloud_conf.yaml')], extra_userdata) if not user_data: log.error("Problem building cloud-config user data.") return self.ERROR #with open('/tmp/userdata.yaml', 'w') as f: # f.write(user_data) # Compress the user data to try and get under the limit user_data = utilities.gzip_userdata(user_data) try: if self.name in vm_image.keys(): image = vm_image[self.name] elif self.network_address in vm_image.keys(): image = vm_image[self.network_address] else: image = vm_image['default'] except: try: vm_default_ami = _attr_list_to_dict( config_val.get('job', 'default_VMAMI')) if self.name in vm_default_ami.keys(): image = vm_default_ami[self.name] else: image = vm_default_ami[self.network_address] except: try: image = vm_default_ami["default"] except: log.exception("Can't find a suitable AMI") return try: imageobj = nova.glance.find_image(image) except novaclient.exceptions.EndpointNotFound: log.error( "Endpoint not found, are your region settings correct for %s", self.name) return -4 except Exception as e: log.warning( "Exception occurred while trying to fetch image: %s %s", image, e) self.failed_image_set.add(image) return try: if self.name in instance_type.keys(): i_type = instance_type[self.name] elif self.network_address in instance_type.keys(): i_type = instance_type[self.network_address] else: i_type = instance_type['default'] except: log.debug("No instance type for %s, trying default", self.network_address) try: if self.name in self.DEFAULT_INSTANCE_TYPE_LIST.keys(): i_type = self.DEFAULT_INSTANCE_TYPE_LIST[self.name] else: i_type = self.DEFAULT_INSTANCE_TYPE_LIST[ self.network_address] except: log.debug( "No default instance type found for %s, trying single default", self.network_address) i_type = self.DEFAULT_INSTANCE_TYPE try: flavor = nova.flavors.find(name=i_type) except Exception as e: log.warning( "Exception occurred while trying to get flavor by name:" " %s - will attempt to use name value as a uuid.", e) try: flavor = nova.flavors.get(i_type) log.debug("Got flavor via uuid: %s", i_type) except Exception as ex: log.error( "Exception occurred trying to get flavor by uuid: %s", ex) return self.flavor_set.add(flavor) # find the network id to use if more than one network if vm_networkassoc: network = self._find_network(vm_networkassoc) if network: netid = [{'net-id': network.id}] else: log.debug("Unable to find network named: %s on %s", vm_networkassoc, self.name) if len(vm_networkassoc.split('-')) == 5: #uuid netid = [{'net-id': vm_networkassoc}] else: netid = [] elif self.network_pools and len(self.network_pools) > 0: network = self._find_network(self.network_pools[0]) if network: netid = [{'net-id': network.id}] else: log.debug("Unable to find network named: %s on %s", self.network_pools[0], self.name) if len(self.network_pools[0].split('-')) == 5: #uuid netid = [{'net-id': self.network_pools[0]}] else: netid = [] else: netid = [] # Need to get the rotating hostname from the google code to use for here. name = self._generate_next_name() instance = None if name: try: instance = nova.servers.create( name=name, image=imageobj, flavor=flavor, key_name=key_name, availability_zone=self.placement_zone, nics=netid, userdata=user_data, security_groups=sec_group) except novaclient.exceptions.OverLimit as e: log.info( "Unable to create VM without exceeded quota on %s: %s", self.name, e.message) except Exception as e: log.error("Unhandled exception while creating vm on %s: %s", self.name, e) if instance: instance_id = instance.id #if job didn't set a keep_alive use the clouds default if not vm_keepalive and self.keep_alive: vm_keepalive = self.keep_alive new_vm = cluster_tools.VM(name=vm_name, id=instance_id, vmtype=vm_type, user=vm_user, clusteraddr=self.network_address, hostname=name, cloudtype=self.cloud_type, network=vm_networkassoc, image=vm_image, flavor=flavor.name, memory=vm_mem, cpucores=vm_cores, storage=vm_storage, keep_alive=vm_keepalive, job_per_core=job_per_core) try: self.resource_checkout(new_vm) log.info("Launching 1 VM: %s on %s under tenant: %s", instance_id, self.name, self.tenant_name) except: log.error( "Unexpected Error checking out resources creating VM. Programming error?" ) self.vm_destroy(new_vm, reason="Failed Resource checkout", return_resources=False) return self.ERROR self.vms.append(new_vm) else: log.debug("Failed to create instance on %s", self.name) return self.ERROR else: log.debug("Unable to generate name for %s", self.name) return self.ERROR return 0
class BotoCluster(cluster_tools.ICluster): VM_STATES = { "running" : "Running", "pending" : "Starting", "shutting-down" : "Shutdown", "terminated" : "Shutdown", "error" : "Error", } ERROR = 1 DEFAULT_INSTANCE_TYPE = config_val.get('job', 'default_VMInstanceType') if config_val.get('job', 'default_VMInstanceType') else "m1.small" DEFAULT_INSTANCE_TYPE_LIST = _attr_list_to_dict(config_val.get('job', 'default_VMInstanceTypeList')) def __init__(self, name="Dummy Cluster", host="localhost", cloud_type="Dummy", memory=[], max_vm_mem=-1, networks=[], vm_slots=0, cpu_cores=0, storage=0, access_key_id=None, secret_access_key=None, security_group=None, key_name="", boot_timeout=None, secure_connection="", regions="", reverse_dns_lookup=False, placement_zone=None, enabled=True, priority=0, keep_alive=0, port=8773): # Call super class's init cluster_tools.ICluster.__init__(self, name=name, host=host, cloud_type=cloud_type, memory=memory, max_vm_mem=max_vm_mem, networks=networks, vm_slots=vm_slots, cpu_cores=cpu_cores, storage=storage, boot_timeout=boot_timeout, enabled=enabled, priority=priority, keep_alive=keep_alive,) if not security_group: security_group = ["default"] self.security_groups = security_group if not access_key_id or not secret_access_key: log.error("Cannot connect to cluster %s " "because you haven't specified an access_key_id or " "a secret_access_key", self.name) self.access_key_id = access_key_id self.secret_access_key = secret_access_key self.key_name = key_name self.secure_connection = secure_connection in ['True', 'true', 'TRUE'] self.total_cpu_cores = -1 self.regions = regions self.reverse_dns_lookup = reverse_dns_lookup in ['True', 'true', 'TRUE'] self.placement_zone = placement_zone self.port = port def _get_connection(self): """ _get_connection - get a boto connection object to this cluster returns a boto connection object, or none in the case of an error """ connection = None if len(self.regions) > 0: region_name = self.regions[0] else: region_name = self.name if self.cloud_type == "AmazonEC2": try: log.debug("Not Implemented use the boto 2 version for AmazonEC2.") return None except Exception, e: log.error("Couldn't connect to Amazon EC2 because: %s", e.error_message) elif self.cloud_type == "Eucalyptus": try: log.verbose("Created a connection to Eucalyptus (%s)", self.name) except boto.exception.EC2ResponseError, e: log.error("Couldn't connect to Eucalyptus EC2 because: %s", e.error_message)
def vm_create(self, vm_name, vm_type, vm_user, vm_networkassoc, vm_cpuarch, vm_image, vm_mem, vm_cores, vm_storage, customization=None, vm_keepalive=0, instance_type="", job_per_core=False, securitygroup=[], key_name=""): """ Create a VM on OpenStack.""" nova = self._get_creds_nova() if len(key_name) > 0: if not nova.keypairs.findall(name=key_name): key_name = "" try: image = vm_image[self.name] except: try: image = vm_image[self.network_address] except: try: vm_default_ami = _attr_list_to_dict(config.default_VMAMI) if self.name in vm_default_ami.keys(): image = vm_default_ami[self.name] else: image = vm_default_ami[self.network_address] except: try: image = vm_default_ami["default"] except: log.exception("Can't find a suitable AMI") return try: if self.name in instance_type.keys(): i_type = instance_type[self.name] else: i_type = instance_type[self.network_address] except: log.debug("No instance type for %s, trying default" % self.network_address) try: if self.name in self.DEFAULT_INSTANCE_TYPE_LIST.keys(): i_type = self.DEFAULT_INSTANCE_TYPE_LIST[self.name] else: i_type = self.DEFAULT_INSTANCE_TYPE_LIST[ self.network_address] except: log.debug( "No default instance type found for %s, trying single default" % self.network_address) i_type = self.DEFAULT_INSTANCE_TYPE instance = nova.servers.create(image=image, flavor=i_type, key_name=key_name) #print instance instance_id = instance.id vm_mementry = self.find_mementry(vm_mem) if (vm_mementry < 0): #TODO: this is kind of pointless with EC2... log.debug("Cluster memory list has no sufficient memory " +\ "entries (Not supposed to happen). Returning error.") return self.ERROR log.verbose("vm_create - Memory entry found in given cluster: %d" % vm_mementry) new_vm = cluster_tools.VM(name=vm_name, id=instance_id, vmtype=vm_type, user=vm_user, clusteraddr=self.network_address, cloudtype=self.cloud_type, network=vm_networkassoc, cpuarch=vm_cpuarch, image=vm_image, memory=vm_mem, mementry=vm_mementry, cpucores=vm_cores, storage=vm_storage, keep_alive=vm_keepalive, job_per_core=job_per_core) try: self.resource_checkout(new_vm) except: log.exception( "Unexpected Error checking out resources when creating a VM. Programming error?" ) self.vm_destroy(new_vm, reason="Failed Resource checkout") return self.ERROR self.vms.append(new_vm) return 0
def vm_create(self, vm_name, vm_type, vm_user, vm_networkassoc, vm_image, vm_mem, vm_cores, vm_storage, customization=None, pre_customization=None, vm_keepalive=0, instance_type="", job_per_core=False, securitygroup=[], key_name="", use_cloud_init=False, extra_userdata=[]): """Attempt to boot a new VM on the cluster.""" use_cloud_init = use_cloud_init or config_val.get('global', 'use_cloud_init') log.verbose("Trying to boot %s on %s", vm_type, self.network_address) if len(securitygroup) != 0: sec_group = [] for group in securitygroup: if group in self.security_groups: sec_group.append(group) if len(sec_group) == 0: log.debug("No matching security groups - trying default config") sec_group = self.security_groups else: sec_group = self.security_groups try: if self.name in vm_image.keys(): vm_ami = vm_image[self.name] else: vm_ami = vm_image[self.network_address] except: log.debug("No AMI for %s, trying default", self.network_address) try: vm_default_ami = _attr_list_to_dict(config_val.get('job', 'default_VMAMI')) if self.name in vm_default_ami.keys(): vm_ami = vm_default_ami[self.name] else: vm_ami = vm_default_ami[self.network_address] except: try: vm_ami = vm_default_ami["default"] except: log.exception("Can't find a suitable AMI") self.failed_image_set.add(vm_ami) return try: if self.name in instance_type.keys(): i_type = instance_type[self.name] elif self.network_address in instance_type.keys(): i_type = instance_type[self.network_address] else: i_type = instance_type["default"] except: log.debug("No instance type for %s, trying default", self.network_address) try: if self.name in self.DEFAULT_INSTANCE_TYPE_LIST.keys(): i_type = self.DEFAULT_INSTANCE_TYPE_LIST[self.name] else: i_type = self.DEFAULT_INSTANCE_TYPE_LIST[self.network_address] except: log.debug("No default instance type found for %s, trying single default", \ self.network_address) i_type = self.DEFAULT_INSTANCE_TYPE instance_type = i_type if key_name == "" or key_name is None: key_name = self.key_name if self.key_name else "" if customization: if not use_cloud_init: user_data = nimbus_xml.ws_optional(customization) else: user_data = cloud_init_util.build_write_files_cloud_init(customization) else: user_data = "" if pre_customization: if not use_cloud_init: for item in pre_customization: user_data = '\n'.join([item, user_data]) else: user_data = cloud_init_util.inject_customizations(pre_customization, user_data) elif use_cloud_init: user_data = cloud_init_util.inject_customizations([], user_data)[0] if len(extra_userdata) > 0: # need to use the multi-mime type functions user_data = cloud_init_util.build_multi_mime_message([(user_data, 'cloud-config', 'cloud_conf.yaml')], extra_userdata) if "AmazonEC2" == self.cloud_type and vm_networkassoc != "public": log.debug("You requested '%s' networking, but EC2 only supports 'public'", \ vm_networkassoc) addressing_type = "public" else: addressing_type = vm_networkassoc user_data = utilities.gzip_userdata(user_data) try: client = self._get_connection() #Uncomment for debugging boto calls #boto3.set_stream_logger('botocore') #boto_file_handler = logging.handlers.WatchedFileHandler('/tmp/csboto3.yaml', ) #botolog = logging.getLogger('botocore') #botolog.addHandler(boto_file_handler) resp = client.run_instances(ImageId=vm_ami, MinCount=1, MaxCount=1, InstanceType=instance_type, UserData=user_data, KeyName=key_name, SecurityGroups=sec_group) # will need to figure out how PlacementGroups will work # probably just be Placement={"AvailabilityZone':placement_zone} except Exception as e: log.error("Problem creating instance %s", e.__dict__) return self.ERROR #if job didn't set a keep_alive use the clouds default if not vm_keepalive and self.keep_alive: vm_keepalive = self.keep_alive if 'Instances' in resp.keys(): new_vm_id = resp['Instances'][0]['InstanceId'] else: #print resp.keys() new_vm_id = "unable to get id" return self.ERROR new_vm = cluster_tools.VM(name=vm_name, id=new_vm_id, vmtype=vm_type, user=vm_user, clusteraddr=self.network_address, cloudtype=self.cloud_type, network=vm_networkassoc, image=vm_ami, flavor=instance_type, memory=vm_mem, cpucores=vm_cores, storage=vm_storage, keep_alive=vm_keepalive, job_per_core=job_per_core) #try: # new_vm.spot_id = spot_id #except: # log.verbose("No spot ID to add to VM %s" % instance_id) try: self.resource_checkout(new_vm) except: log.exception("Unexpected Error checking out resources when creating a VM. \ Programming error?") self.vm_destroy(new_vm, reason="Failed Resource checkout", return_resources=False) return self.ERROR self.vms.append(new_vm) return 0
class AzureCluster(cluster_tools.ICluster): """ Connector class for Microsoft's Azure cloud service. """ ERROR = 1 DEFAULT_INSTANCE_TYPE = config_val.get('job', 'default_VMInstanceType') if \ config_val.get('job', 'default_VMInstanceType') else "m1.small" DEFAULT_INSTANCE_TYPE_LIST = _attr_list_to_dict(config_val.get('job', 'default_VMInstanceTypeList')) AZURE_SERVICE_NAME = "CloudSchedulerService" VM_STATES = { "Active": "Starting", "BusyRole": "Starting", "CreatingRole": "Starting", "CreatingVM": "Starting", "Deleting": "Shutdown", "DeletingVM": "Shutdown", "Deploying": "Starting", "Error": "Error", "Preparing": "Starting", "Provisioning": "Starting", "ProvisioningFailed": "Error", "ProvisioningTimeout": "Error", "ReadyRole": "Running", "RestartingRole": "Restarting", "RoleStateUnknown": "Starting", "Running": "Running", "StartingVM": "Starting", "StartingRole": "Starting", "StoppedDeallocated": "Stopped", "StoppedVM": "Stopped", "StoppingRole": "StopRole", "StoppingVM": "Shutdown", "Suspended": "Suspended", "Suspending": "Suspending", "Unknown": "Error", } def __init__(self, name="Dummy Cluster", cloud_type="Dummy", memory=None, max_vm_mem=-1, networks=None, vm_slots=0, cpu_cores=0, storage=0, security_group=None, username=None, password=None, tenant_name=None, auth_url=None, key_name=None, boot_timeout=None, secure_connection="", regions="", placement_zone=None, enabled=True, priority=0, keycert=None, keep_alive=0, blob_url="", service_name=None): # Call super class's init cluster_tools.ICluster.__init__(self, name=name, host="azure.microsoft.com", cloud_type=cloud_type, memory=memory, max_vm_mem=max_vm_mem, networks=networks, vm_slots=vm_slots, cpu_cores=cpu_cores, storage=storage, boot_timeout=boot_timeout, enabled=enabled, priority=priority, keep_alive=keep_alive) try: import azure import azure.servicemanagement except ImportError: print "Unable to import azure-mgmt, unable to use Azure cloudtypes" sys.exit(1) if not security_group: security_group = ["default"] self.security_groups = security_group self.username = username if username else "" self.password = password if password else "" self.tenant_name = tenant_name if tenant_name else "" self.auth_url = auth_url if auth_url else "" self.key_name = key_name if key_name else "" self.secure_connection = secure_connection in ['True', 'true', 'TRUE'] self.total_cpu_cores = -1 self.regions = regions self.placement_zone = placement_zone self.keycert = keycert self.blob_url = blob_url self.count = 0 self.azure_service_name = service_name if service_name else self.AZURE_SERVICE_NAME def __getstate__(self): """Override to work with pickle module.""" state = cluster_tools.ICluster.__getstate__(self) return state def __setstate__(self, state): """Override to work with pickle module.""" cluster_tools.ICluster.__setstate__(self, state) def vm_create(self, vm_name, vm_type, vm_user, vm_image, vm_mem, vm_cores, vm_storage, customization=None, vm_keepalive=0, instance_type={}, job_per_core=False, pre_customization=None, extra_userdata=None): """ Create a VM on Azure.""" use_cloud_init = True if customization: user_data = cloud_init_util.build_write_files_cloud_init(customization) else: user_data = "" if pre_customization: user_data = cloud_init_util.inject_customizations(pre_customization, user_data) elif use_cloud_init: user_data = cloud_init_util.inject_customizations([], user_data) if extra_userdata: # need to use the multi-mime type functions user_data = cloud_init_util.build_multi_mime_message([(user_data, 'cloud-config', 'cloud_conf.yaml')], extra_userdata) if not user_data: log.error("Problem building cloud-config user data.") return self.ERROR # Compress the user data to try and get under the limit user_data = utilities.gzip_userdata(user_data) try: if self.name in vm_image.keys(): image = vm_image[self.name] elif self.network_address in vm_image.keys(): image = vm_image[self.network_address] else: image = vm_image['default'] except: try: vm_default_ami = _attr_list_to_dict(config_val.get('job', 'default_VMAMI')) if self.name in vm_default_ami.keys(): image = vm_default_ami[self.name] else: image = vm_default_ami[self.network_address] except: try: image = vm_default_ami["default"] except: log.exception("Can't find a suitable AMI") return try: if self.name in instance_type.keys(): i_type = instance_type[self.name] elif self.network_address in instance_type.keys(): i_type = instance_type[self.network_address] else: i_type = instance_type['default'] except: log.debug("No instance type for %s, trying default", self.network_address) try: if self.name in self.DEFAULT_INSTANCE_TYPE_LIST.keys(): i_type = self.DEFAULT_INSTANCE_TYPE_LIST[self.name] else: i_type = self.DEFAULT_INSTANCE_TYPE_LIST[self.network_address] except: log.debug("No default instance type found for %s, \ trying single default", self.network_address) i_type = self.DEFAULT_INSTANCE_TYPE name = self._generate_next_name() instance = None req = None if name: sms = self._get_service_connection() try: conf_set = azure.servicemanagement.\ LinuxConfigurationSet(host_name=name, user_name=self.username, user_password=self.password, disable_ssh_password_authentication=False, custom_data=user_data) net_set = azure.servicemanagement.ConfigurationSet() vm_ssh_port = 20000+self.count net_set.input_endpoints.input_endpoints.append( azure.servicemanagement.ConfigurationSetInputEndpoint(name='SSH', protocol='TCP', port=vm_ssh_port, local_port=22)) self.count += 1 if self.count > 15000: self.count = 0 os_hd = azure.servicemanagement.OSVirtualHardDisk(image, self.blob_url + name) res = sms.check_hosted_service_name_availability(self.azure_service_name) if res.result: req = sms.create_hosted_service(self.azure_service_name, self.azure_service_name, location=self.regions[0]) sms.wait_for_operation_status(req.request_id) if len(self.vms) == 0: req = sms.create_virtual_machine_deployment(service_name=self.azure_service_name, deployment_name=self.azure_service_name, deployment_slot='production', role_name=name, label=name, system_config=conf_set, network_config=net_set, os_virtual_hard_disk=os_hd, role_size=i_type) try: op_status = sms.wait_for_operation_status(req.request_id) except Exception as e: log.error("Problem creating VM on Azure: %s", e.result.error.message) return 1 else: req = sms.add_role(service_name=self.azure_service_name, deployment_name=self.azure_service_name, role_name=name, system_config=conf_set, network_config=net_set, os_virtual_hard_disk=os_hd, role_size=i_type) try: op_status = sms.wait_for_operation_status(req.request_id) except Exception as e: log.error("Problem creating VM on Azure: %s", e.result.error.message) return 1 except Exception as e: log.error("Unhandled exception while creating vm on %s: %s", self.name, e) return self.ERROR if req: #if job didn't set a keep_alive use the clouds default if not vm_keepalive and self.keep_alive: vm_keepalive = self.keep_alive new_vm = cluster_tools.VM(name=vm_name, id=name, vmtype=vm_type, user=vm_user, clusteraddr=self.network_address, hostname=name, cloudtype=self.cloud_type, network=None, image=vm_image, flavor=i_type, memory=vm_mem, cpucores=vm_cores, storage=vm_storage, keep_alive=vm_keepalive, job_per_core=job_per_core, ssh_port=vm_ssh_port) try: self.resource_checkout(new_vm) log.info("Launching 1 VM: %s on %s", name, self.name) except: log.error("Unexpected Error checking out resources when creating a VM. \ Programming error?") self.vm_destroy(new_vm, reason="Failed Resource checkout", return_resources=False) return self.ERROR self.vms.append(new_vm) else: log.debug("Failed to create instance on %s", self.name) return self.ERROR else: log.debug("Unable to generate name for %s", self.name) return self.ERROR return 0 def vm_destroy(self, vm, return_resources=True, reason=""): """ Destroy a VM on Azure.""" log.info("Destroying VM: %s Name: %s on %s tenant: %s Reason: %s", \ vm.id, vm.hostname, self.name, self.tenant_name, reason) try: azure_conn = self._get_service_connection() req = azure_conn.delete_role(self.azure_service_name, self.azure_service_name, vm.id, True) azure_conn.wait_for_operation_status(req.request_id) except Exception as e: log.debug("Problem destroying VM on Azure: %s", e) try: if "only role present" in e.message: try: azure_conn = self._get_service_connection() req = azure_conn.delete_hosted_service(self.azure_service_name, True) azure_conn.wait_for_operation_status(req.request_id) except Exception as e: log.error("Problem deleteing the CS Azure service: %s", e) return 1 elif "hosted service name is invalid" in e.message or \ 'does not exist' in e.message or \ "not found in the currently deployed service" in e.message: log.error("Invalid service name on %s : %s, dropping from CS", self.name, e) elif "Not Found" in e.message: log.error("VM %s not found on azure, may already be destroyed, \ dropping from CS", vm.id) else: log.error("Unhandled exception while destroying VM on %s : %s", self.name, e) return 1 except: log.error("Failed to log exception properly?") return 1 # Delete references to this VM try: if return_resources and vm.return_resources: self.resource_return(vm) with self.vms_lock: self.vms.remove(vm) log.info("VM %s removed from %s list", vm.id, self.name) if config_val.get('global', 'monitor_url') is not None: self._report_monitor(vm) except Exception as e: log.error("Error removing vm from list: %s", e) return 1 return 0 def vm_poll(self, vm): """ Query Azure for status information of VMs.""" instance = None azure_conn = self._get_service_connection() vm_info = None try: vm_info = azure_conn.get_hosted_service_properties(self.azure_service_name, True) except Exception as e: log.error("Unable to find service with name: %s on Azure. %s", \ self.azure_service_name, e) vm.status = self.VM_STATES['Error'] return vm.status if vm_info and len(vm_info.deployments) == 0: log.debug("No VMs running on service: %s, skipping.", vm_info.service_name) vm.status = self.VM_STATES['Error'] return vm.status if vm_info: for vm_instance in vm_info.deployments: for role in vm_instance.role_instance_list.role_instances: if role.role_name == vm.id: instance = role break else: log.debug("Unable to find VM: %s on Azure", vm.id) with self.vms_lock: if instance and vm.status != self.VM_STATES.get( instance.instance_status, "Starting"): vm.last_state_change = int(time.time()) log.debug("VM: %s on %s. Changed from %s to %s.", \ vm.id, self.name, vm.status, self.VM_STATES.get( instance.instance_status, "Starting")) if instance and instance.instance_status in self.VM_STATES.keys(): vm.status = self.VM_STATES[instance.instance_status] elif instance: vm.status = instance.instance_status else: vm.status = self.VM_STATES['Error'] return vm.status def _get_service_connection(self): return azure.servicemanagement.ServiceManagementService(self.tenant_name, self.keycert)
def vm_create(self, vm_name, vm_type, vm_user, vm_image, vm_mem, vm_cores, vm_storage, customization=None, vm_keepalive=0, instance_type={}, job_per_core=False, pre_customization=None, extra_userdata=None): """ Create a VM on Azure.""" use_cloud_init = True if customization: user_data = cloud_init_util.build_write_files_cloud_init(customization) else: user_data = "" if pre_customization: user_data = cloud_init_util.inject_customizations(pre_customization, user_data) elif use_cloud_init: user_data = cloud_init_util.inject_customizations([], user_data) if extra_userdata: # need to use the multi-mime type functions user_data = cloud_init_util.build_multi_mime_message([(user_data, 'cloud-config', 'cloud_conf.yaml')], extra_userdata) if not user_data: log.error("Problem building cloud-config user data.") return self.ERROR # Compress the user data to try and get under the limit user_data = utilities.gzip_userdata(user_data) try: if self.name in vm_image.keys(): image = vm_image[self.name] elif self.network_address in vm_image.keys(): image = vm_image[self.network_address] else: image = vm_image['default'] except: try: vm_default_ami = _attr_list_to_dict(config_val.get('job', 'default_VMAMI')) if self.name in vm_default_ami.keys(): image = vm_default_ami[self.name] else: image = vm_default_ami[self.network_address] except: try: image = vm_default_ami["default"] except: log.exception("Can't find a suitable AMI") return try: if self.name in instance_type.keys(): i_type = instance_type[self.name] elif self.network_address in instance_type.keys(): i_type = instance_type[self.network_address] else: i_type = instance_type['default'] except: log.debug("No instance type for %s, trying default", self.network_address) try: if self.name in self.DEFAULT_INSTANCE_TYPE_LIST.keys(): i_type = self.DEFAULT_INSTANCE_TYPE_LIST[self.name] else: i_type = self.DEFAULT_INSTANCE_TYPE_LIST[self.network_address] except: log.debug("No default instance type found for %s, \ trying single default", self.network_address) i_type = self.DEFAULT_INSTANCE_TYPE name = self._generate_next_name() instance = None req = None if name: sms = self._get_service_connection() try: conf_set = azure.servicemanagement.\ LinuxConfigurationSet(host_name=name, user_name=self.username, user_password=self.password, disable_ssh_password_authentication=False, custom_data=user_data) net_set = azure.servicemanagement.ConfigurationSet() vm_ssh_port = 20000+self.count net_set.input_endpoints.input_endpoints.append( azure.servicemanagement.ConfigurationSetInputEndpoint(name='SSH', protocol='TCP', port=vm_ssh_port, local_port=22)) self.count += 1 if self.count > 15000: self.count = 0 os_hd = azure.servicemanagement.OSVirtualHardDisk(image, self.blob_url + name) res = sms.check_hosted_service_name_availability(self.azure_service_name) if res.result: req = sms.create_hosted_service(self.azure_service_name, self.azure_service_name, location=self.regions[0]) sms.wait_for_operation_status(req.request_id) if len(self.vms) == 0: req = sms.create_virtual_machine_deployment(service_name=self.azure_service_name, deployment_name=self.azure_service_name, deployment_slot='production', role_name=name, label=name, system_config=conf_set, network_config=net_set, os_virtual_hard_disk=os_hd, role_size=i_type) try: op_status = sms.wait_for_operation_status(req.request_id) except Exception as e: log.error("Problem creating VM on Azure: %s", e.result.error.message) return 1 else: req = sms.add_role(service_name=self.azure_service_name, deployment_name=self.azure_service_name, role_name=name, system_config=conf_set, network_config=net_set, os_virtual_hard_disk=os_hd, role_size=i_type) try: op_status = sms.wait_for_operation_status(req.request_id) except Exception as e: log.error("Problem creating VM on Azure: %s", e.result.error.message) return 1 except Exception as e: log.error("Unhandled exception while creating vm on %s: %s", self.name, e) return self.ERROR if req: #if job didn't set a keep_alive use the clouds default if not vm_keepalive and self.keep_alive: vm_keepalive = self.keep_alive new_vm = cluster_tools.VM(name=vm_name, id=name, vmtype=vm_type, user=vm_user, clusteraddr=self.network_address, hostname=name, cloudtype=self.cloud_type, network=None, image=vm_image, flavor=i_type, memory=vm_mem, cpucores=vm_cores, storage=vm_storage, keep_alive=vm_keepalive, job_per_core=job_per_core, ssh_port=vm_ssh_port) try: self.resource_checkout(new_vm) log.info("Launching 1 VM: %s on %s", name, self.name) except: log.error("Unexpected Error checking out resources when creating a VM. \ Programming error?") self.vm_destroy(new_vm, reason="Failed Resource checkout", return_resources=False) return self.ERROR self.vms.append(new_vm) else: log.debug("Failed to create instance on %s", self.name) return self.ERROR else: log.debug("Unable to generate name for %s", self.name) return self.ERROR return 0
def vm_create(self, vm_name, vm_type, vm_user, vm_image, vm_mem, vm_cores, vm_storage, customization=None, vm_keepalive=0, instance_type={}, job_per_core=False, pre_customization=None, extra_userdata=[]): """ Create a VM on Azure.""" use_cloud_init = True if customization: user_data = cloud_init_util.build_write_files_cloud_init(customization) else: user_data = "" if pre_customization: user_data = cloud_init_util.inject_customizations(pre_customization, user_data) elif use_cloud_init: user_data = cloud_init_util.inject_customizations([], user_data) if len(extra_userdata) > 0: # need to use the multi-mime type functions user_data = cloud_init_util.build_multi_mime_message([(user_data, 'cloud-config', 'cloud_conf.yaml')], extra_userdata) if not user_data: log.error("Problem building cloud-config user data.") return self.ERROR # Compress the user data to try and get under the limit user_data = utilities.gzip_userdata(user_data) try: if self.name in vm_image.keys(): image = vm_image[self.name] elif self.network_address in vm_image.keys(): image = vm_image[self.network_address] else: image = vm_image['default'] except: try: vm_default_ami = _attr_list_to_dict(config.default_VMAMI) if self.name in vm_default_ami.keys(): image = vm_default_ami[self.name] else: image = vm_default_ami[self.network_address] except: try: image = vm_default_ami["default"] except: log.exception("Can't find a suitable AMI") return try: if self.name in instance_type.keys(): i_type = instance_type[self.name] elif self.network_address in instance_type.keys(): i_type = instance_type[self.network_address] else: i_type = instance_type['default'] except: log.debug("No instance type for %s, trying default" % self.network_address) try: if self.name in self.DEFAULT_INSTANCE_TYPE_LIST.keys(): i_type = self.DEFAULT_INSTANCE_TYPE_LIST[self.name] else: i_type = self.DEFAULT_INSTANCE_TYPE_LIST[self.network_address] except: log.debug("No default instance type found for %s, trying single default" % self.network_address) i_type = self.DEFAULT_INSTANCE_TYPE name = self._generate_next_name() instance = None req = None if name: sms = self._get_service_connection() try: conf_set = azure.servicemanagement.LinuxConfigurationSet(host_name=name, user_name=self.username, user_password=self.password, disable_ssh_password_authentication=False, custom_data=user_data) net_set = azure.servicemanagement.ConfigurationSet() vm_ssh_port = 20000+self.count net_set.input_endpoints.input_endpoints.append( azure.servicemanagement.ConfigurationSetInputEndpoint(name='SSH', protocol='TCP', port=vm_ssh_port, local_port=22)) self.count += 1 if self.count > 15000: self.count = 0 os_hd = azure.servicemanagement.OSVirtualHardDisk(image, self.blob_url + name) res = sms.check_hosted_service_name_availability(self.azure_service_name) if res.result: req = sms.create_hosted_service(self.azure_service_name, self.azure_service_name, location=self.regions[0]) sms.wait_for_operation_status(req.request_id) if len(self.vms) == 0: req = sms.create_virtual_machine_deployment(service_name=self.azure_service_name, deployment_name=self.azure_service_name, deployment_slot='production', role_name=name, label=name, system_config=conf_set, network_config=net_set, os_virtual_hard_disk=os_hd, role_size=i_type) try: op_status = sms.wait_for_operation_status(req.request_id) except Exception as e: log.error("Problem creating VM on Azure: %s" % e.result.error.message) return 1 else: req = sms.add_role(service_name=self.azure_service_name, deployment_name=self.azure_service_name, role_name=name, system_config=conf_set, network_config=net_set, os_virtual_hard_disk=os_hd, role_size=i_type) try: op_status = sms.wait_for_operation_status(req.request_id) except Exception as e: log.error("Problem creating VM on Azure: %s" % e.result.error.message) return 1 except Exception as e: log.error("Unhandled exception while creating vm on %s: %s" % (self.name, e)) return self.ERROR if req: if not vm_keepalive and self.keep_alive: # if job didn't set a keep_alive use the clouds default vm_keepalive = self.keep_alive new_vm = cluster_tools.VM(name=vm_name, id=name, vmtype=vm_type, user=vm_user, clusteraddr=self.network_address, hostname=name, cloudtype=self.cloud_type, network=None, image=vm_image, flavor=i_type, memory=vm_mem, cpucores=vm_cores, storage=vm_storage, keep_alive=vm_keepalive, job_per_core=job_per_core, ssh_port=vm_ssh_port) try: self.resource_checkout(new_vm) log.info("Launching 1 VM: %s on %s" % (name, self.name)) except: log.error("Unexpected Error checking out resources when creating a VM. Programming error?") self.vm_destroy(new_vm, reason="Failed Resource checkout", return_resources=False) return self.ERROR self.vms.append(new_vm) else: log.debug("Failed to create instance on %s" % self.name) return self.ERROR else: log.debug("Unable to generate name for %s" % self.name) return self.ERROR return 0
class OpenStackCluster(cluster_tools.ICluster): """ The OpenStackCluster class - manages connections and VMs on an OpenStack cloud. """ ERROR = 1 DEFAULT_INSTANCE_TYPE = config_val.get('job', 'default_VMInstanceType')\ if config_val.get('job', 'default_VMInstanceType')\ else "m1.small" DEFAULT_INSTANCE_TYPE_LIST = \ _attr_list_to_dict(config_val.get('job', 'default_VMInstanceTypeList')) VM_STATES = { "BUILD": "Starting", "ACTIVE": "Running", "SHUTOFF": "Shutdown", "SUSPENDED": "Suspended", "STOPPED": "Stopped", "PAUSED": "Paused", "ERROR": "Error", "VERIFY_RESIZE": "Error", } def __init__(self, name="Dummy Cluster", cloud_type="Dummy", memory=[], max_vm_mem=-1, networks=[], vm_slots=0, cpu_cores=0, storage=0, security_group=None, username=None, password=None, tenant_name=None, auth_url=None, key_name=None, boot_timeout=None, secure_connection="", regions="", reverse_dns_lookup=False, placement_zone=None, enabled=True, priority=0, cacert=None, keep_alive=0, user_domain_name=None, project_domain_name=None): # Call super class's init cluster_tools.ICluster.__init__( self, name=name, host=auth_url, cloud_type=cloud_type, memory=memory, max_vm_mem=max_vm_mem, networks=networks, vm_slots=vm_slots, cpu_cores=cpu_cores, storage=storage, boot_timeout=boot_timeout, enabled=enabled, priority=priority, keep_alive=keep_alive, ) try: import novaclient.v2.client as nvclient import novaclient.exceptions #import keystoneclient.v2_0.client as ksclient except: print "Unable to import novaclient - cannot use native openstack cloudtypes" sys.exit(1) if not security_group: security_group = ["default"] self.security_groups = security_group self.username = username if username else "" self.password = password if password else "" self.tenant_name = tenant_name if tenant_name else "" self.auth_url = auth_url if auth_url else "" self.key_name = key_name if key_name else "" self.secure_connection = secure_connection in ['True', 'true', 'TRUE'] self.total_cpu_cores = -1 self.regions = regions self.reverse_dns_lookup = reverse_dns_lookup in [ 'True', 'true', 'TRUE' ] self.placement_zone = placement_zone self.flavor_set = set() self.cacert = cacert self.user_domain_name = user_domain_name if user_domain_name is not None else "Default" self.project_domain_name = project_domain_name if project_domain_name is not None else "Default" self.session = None try: authsplit = self.auth_url.split('/') version = int(float(authsplit[-1][1:]))\ if len(authsplit[-1]) > 0\ else int(float(authsplit[-2][1:])) if version == 2: self.session = self._get_keystone_session() elif version == 3: self.session = self._get_keystone_session_v3() except: log.error("Error determining keystone version from auth url.") return None def __getstate__(self): """Override to work with pickle module.""" state = cluster_tools.ICluster.__getstate__(self) try: del state['flavor_set'] del state['session'] except: log.error("no session to remove") return state def __setstate__(self, state): """Override to work with pickle module.""" cluster_tools.ICluster.__setstate__(self, state) self.flavor_set = set() try: authsplit = self.auth_url.split('/') version = int(float(authsplit[-1][1:]))\ if len(authsplit[-1]) > 0\ else int(float(authsplit[-2][1:])) if version == 2: self.session = self._get_keystone_session() elif version == 3: self.session = self._get_keystone_session_v3() except: log.error("Error determining keystone version from auth url") def vm_create(self, vm_name, vm_type, vm_user, vm_networkassoc, vm_image, vm_mem, vm_cores, vm_storage, customization=None, vm_keepalive=0, instance_type="", job_per_core=False, securitygroup=None, key_name="", pre_customization=None, use_cloud_init=False, extra_userdata=None): """ Create a VM on OpenStack.""" import novaclient.exceptions use_cloud_init = use_cloud_init or config_val.getboolean( 'global', 'use_cloud_init') nova = self._get_creds_nova_updated() if securitygroup: sec_group = [] for group in securitygroup: if group in self.security_groups: sec_group.append(group) if len(sec_group) == 0: log.debug("No defined security groups for job - " "trying default value from cloud_resources.conf") sec_group = self.security_groups else: sec_group = self.security_groups log.debug("Using security group: %s", str(sec_group)) if key_name and len(key_name) > 0: if not nova.keypairs.findall(name=key_name): key_name = "" else: key_name = self.key_name if self.key_name else "" if customization: user_data = cloud_init_util.build_write_files_cloud_init( customization) else: user_data = "" if pre_customization: if not use_cloud_init: for item in pre_customization: user_data = '\n'.join([item, user_data]) else: user_data = cloud_init_util.inject_customizations( pre_customization, user_data) elif use_cloud_init: user_data = cloud_init_util.inject_customizations([], user_data) if extra_userdata: # need to use the multi-mime type functions user_data = cloud_init_util.build_multi_mime_message( [(user_data, 'cloud-config', 'cloud_conf.yaml')], extra_userdata) if not user_data: log.error("Problem building cloud-config user data.") return self.ERROR #with open('/tmp/userdata.yaml', 'w') as f: # f.write(user_data) # Compress the user data to try and get under the limit user_data = utilities.gzip_userdata(user_data) try: if self.name in vm_image.keys(): image = vm_image[self.name] elif self.network_address in vm_image.keys(): image = vm_image[self.network_address] else: image = vm_image['default'] except: try: vm_default_ami = _attr_list_to_dict( config_val.get('job', 'default_VMAMI')) if self.name in vm_default_ami.keys(): image = vm_default_ami[self.name] else: image = vm_default_ami[self.network_address] except: try: image = vm_default_ami["default"] except: log.exception("Can't find a suitable AMI") return try: imageobj = nova.glance.find_image(image) except novaclient.exceptions.EndpointNotFound: log.error( "Endpoint not found, are your region settings correct for %s", self.name) return -4 except Exception as e: log.warning( "Exception occurred while trying to fetch image: %s %s", image, e) self.failed_image_set.add(image) return try: if self.name in instance_type.keys(): i_type = instance_type[self.name] elif self.network_address in instance_type.keys(): i_type = instance_type[self.network_address] else: i_type = instance_type['default'] except: log.debug("No instance type for %s, trying default", self.network_address) try: if self.name in self.DEFAULT_INSTANCE_TYPE_LIST.keys(): i_type = self.DEFAULT_INSTANCE_TYPE_LIST[self.name] else: i_type = self.DEFAULT_INSTANCE_TYPE_LIST[ self.network_address] except: log.debug( "No default instance type found for %s, trying single default", self.network_address) i_type = self.DEFAULT_INSTANCE_TYPE try: flavor = nova.flavors.find(name=i_type) except Exception as e: log.warning( "Exception occurred while trying to get flavor by name:" " %s - will attempt to use name value as a uuid.", e) try: flavor = nova.flavors.get(i_type) log.debug("Got flavor via uuid: %s", i_type) except Exception as ex: log.error( "Exception occurred trying to get flavor by uuid: %s", ex) return self.flavor_set.add(flavor) # find the network id to use if more than one network if vm_networkassoc: network = self._find_network(vm_networkassoc) if network: netid = [{'net-id': network.id}] else: log.debug("Unable to find network named: %s on %s", vm_networkassoc, self.name) if len(vm_networkassoc.split('-')) == 5: #uuid netid = [{'net-id': vm_networkassoc}] else: netid = [] elif self.network_pools and len(self.network_pools) > 0: network = self._find_network(self.network_pools[0]) if network: netid = [{'net-id': network.id}] else: log.debug("Unable to find network named: %s on %s", self.network_pools[0], self.name) if len(self.network_pools[0].split('-')) == 5: #uuid netid = [{'net-id': self.network_pools[0]}] else: netid = [] else: netid = [] # Need to get the rotating hostname from the google code to use for here. name = self._generate_next_name() instance = None if name: try: instance = nova.servers.create( name=name, image=imageobj, flavor=flavor, key_name=key_name, availability_zone=self.placement_zone, nics=netid, userdata=user_data, security_groups=sec_group) except novaclient.exceptions.OverLimit as e: log.info( "Unable to create VM without exceeded quota on %s: %s", self.name, e.message) except Exception as e: log.error("Unhandled exception while creating vm on %s: %s", self.name, e) if instance: instance_id = instance.id #if job didn't set a keep_alive use the clouds default if not vm_keepalive and self.keep_alive: vm_keepalive = self.keep_alive new_vm = cluster_tools.VM(name=vm_name, id=instance_id, vmtype=vm_type, user=vm_user, clusteraddr=self.network_address, hostname=name, cloudtype=self.cloud_type, network=vm_networkassoc, image=vm_image, flavor=flavor.name, memory=vm_mem, cpucores=vm_cores, storage=vm_storage, keep_alive=vm_keepalive, job_per_core=job_per_core) try: self.resource_checkout(new_vm) log.info("Launching 1 VM: %s on %s under tenant: %s", instance_id, self.name, self.tenant_name) except: log.error( "Unexpected Error checking out resources creating VM. Programming error?" ) self.vm_destroy(new_vm, reason="Failed Resource checkout", return_resources=False) return self.ERROR self.vms.append(new_vm) else: log.debug("Failed to create instance on %s", self.name) return self.ERROR else: log.debug("Unable to generate name for %s", self.name) return self.ERROR return 0 def vm_destroy(self, vm, return_resources=True, reason=""): """ Destroy a VM on OpenStack.""" nova = self._get_creds_nova_updated() import novaclient.exceptions log.info("Destroying VM: %s Name: %s on %s tenant: %s Reason: %s", vm.id, vm.hostname, self.name, self.tenant_name, reason) try: instance = nova.servers.get(vm.id) instance.delete() except novaclient.exceptions.NotFound as e: log.error("VM %s not found on %s: removing from CS", vm.id, self.name) except Exception as e: try: log.error("Unhandled exception while destroying VM on %s : %s", self.name, e) return 1 except: log.error("Failed to log exception properly?") return 1 # Delete references to this VM try: if return_resources and vm.return_resources: self.resource_return(vm) with self.vms_lock: self.vms.remove(vm) if config_val.get('global', 'monitor_url'): self._report_monitor(vm) except Exception as e: log.error("Error removing vm from list: %s", e) return 1 return 0 def vm_poll(self, vm): """ Query OpenStack for status information of VMs.""" import novaclient.exceptions nova = self._get_creds_nova_updated() instance = None try: instance = nova.servers.get(vm.id) except novaclient.exceptions.NotFound as e: log.exception("VM %s not found on %s: %s", vm.id, self.name, e) vm.status = self.VM_STATES['ERROR'] except Exception as e: try: log.error("Unexpected exception occurred polling vm %s: %s", vm.id, e) except: log.error("Failed to log exception properly: %s", vm.id) with self.vms_lock: #print instance.status if instance and vm.status != self.VM_STATES.get( instance.status, "Starting"): vm.last_state_change = int(time.time()) log.debug("VM: %s on %s. Changed from %s to %s.", vm.id, self.name, vm.status, self.VM_STATES.get(instance.status, "Starting")) if instance and instance.status in self.VM_STATES.keys(): vm.status = self.VM_STATES[instance.status] elif instance: vm.status = instance.status else: vm.status = self.VM_STATES['ERROR'] return vm.status def _get_creds_nova(self): """Get an auth token to Nova.""" try: import novaclient.v2.client as nvclient except Exception as e: print "Unable to import novaclient - cannot use native openstack cloudtypes" print e sys.exit(1) try: client = nvclient.Client(session=self.session, region_name=self.regions[0], timeout=10) except Exception as e: log.error("Unable to create connection to %s: Reason: %s", self.name, e) return None return client def _get_creds_nova_updated(self): """Get an auth token to Nova.""" try: from novaclient import client as nvclient except Exception as e: print "Unable to import novaclient - cannot use native openstack cloudtypes" print e sys.exit(1) try: client = nvclient.Client("2.0", session=self.session, region_name=self.regions[0], timeout=10) return client except Exception as e: log.error("Unable to create connection to %s: Reason: %s", self.name, e) return None def _get_keystone_session(self): """Get a session object to keystone with v2 url.""" try: from keystoneauth1.identity import v2 from keystoneauth1 import session auth = v2.Password(auth_url=self.auth_url, username=self.username, password=self.password, tenant_name=self.tenant_name) sess = session.Session(auth=auth, verify=self.cacert) except Exception as e: log.error( "Problem importing keystone modules, and getting session: %s", e) log.debug("Session object for %s created", self.name) return sess def _get_keystone_session_v3(self): """Get a session object to keystone using v3.""" try: from keystoneauth1.identity import v3 from keystoneauth1 import session auth = v3.Password(auth_url=self.auth_url, username=self.username, password=self.password, project_name=self.tenant_name, project_domain_name=self.project_domain_name, user_domain_name=self.user_domain_name) sess = session.Session(auth=auth, verify=self.cacert) except Exception as e: log.error( "Problem importing keystone modules, and getting session: %s", e) log.debug("Session object for %s created", self.name) return sess def _find_network(self, name): """ Find a network on openstack given the name of network. :param name: str - name of network to look for. :return: openstack network obj. """ nova = self._get_creds_nova_updated() network = None try: network = nova.neutron.find_network(name) except Exception as e: log.error("Unable to find network %s on %s Exception: %s", name, self.name, e) return network
def vm_create(self, vm_name, vm_type, vm_user, vm_networkassoc, vm_image, vm_mem, vm_cores, vm_storage, customization=None, pre_customization=None, vm_keepalive=0, instance_type="", maximum_price=0, job_per_core=False, securitygroup=[], key_name="",use_cloud_init=False, extra_userdata=[]): """Attempt to boot a new VM on the cluster.""" use_cloud_init = use_cloud_init or config.use_cloud_init log.verbose("Trying to boot %s on %s" % (vm_type, self.network_address)) if len(securitygroup) != 0: sec_group = [] for group in securitygroup: if group in self.security_groups: sec_group.append(group) if len(sec_group) == 0: log.debug("No matching security groups - trying default config") sec_group = self.security_groups #sec_group.append("default") - don't just append default use what is in cloud_resources.conf for this cloud else: sec_group = self.security_groups try: if self.name in vm_image.keys(): vm_ami = vm_image[self.name] else: vm_ami = vm_image[self.network_address] except: log.debug("No AMI for %s, trying default" % self.network_address) #try: # vm_ami = vm_image["default"] #except: #log.debug("No given default - trying global defaults") try: vm_default_ami = _attr_list_to_dict(config.default_VMAMI) if self.name in vm_default_ami.keys(): vm_ami = vm_default_ami[self.name] else: vm_ami = vm_default_ami[self.network_address] except: try: vm_ami = vm_default_ami["default"] except: log.exception("Can't find a suitable AMI") self.failed_image_set.add(vm_ami) return try: if self.name in instance_type.keys(): i_type = instance_type[self.name] elif self.network_address in instance_type.keys(): i_type = instance_type[self.network_address] else: i_type = instance_type["default"] except: log.debug("No instance type for %s, trying default" % self.network_address) #try: # i_type = instance_type["default"] #except: # if isinstance(instance_type, str): # i_type = instance_type # else: try: if self.name in self.DEFAULT_INSTANCE_TYPE_LIST.keys(): i_type = self.DEFAULT_INSTANCE_TYPE_LIST[self.name] else: i_type = self.DEFAULT_INSTANCE_TYPE_LIST[self.network_address] except: log.debug("No default instance type found for %s, trying single default" % self.network_address) i_type = self.DEFAULT_INSTANCE_TYPE instance_type = i_type if key_name == None: key_name = self.key_name if customization: if not use_cloud_init: user_data = nimbus_xml.ws_optional(customization) else: user_data = cloud_init_util.build_write_files_cloud_init(customization) else: user_data = "" if pre_customization: if not use_cloud_init: for item in pre_customization: user_data = '\n'.join([item, user_data]) else: user_data = cloud_init_util.inject_customizations(pre_customization, user_data) elif use_cloud_init: user_data = cloud_init_util.inject_customizations([], user_data)[0] if len(extra_userdata) > 0: # need to use the multi-mime type functions user_data = cloud_init_util.build_multi_mime_message([(user_data, 'cloud-config', 'cloud_conf.yaml')], extra_userdata) if "AmazonEC2" == self.cloud_type and vm_networkassoc != "public": log.debug("You requested '%s' networking, but EC2 only supports 'public'" % vm_networkassoc) addressing_type = "public" else: addressing_type = vm_networkassoc try: connection = self._get_connection() image = None if not "Eucalyptus" == self.cloud_type: image = connection.get_image(vm_ami) else: #HACK: for some reason Eucalyptus won't respond properly to # get_image("whateverimg"). Use a linear search until # this is fixed # This is Eucalyptus bug #495670 # https://bugs.launchpad.net/eucalyptus/+bug/495670 images = connection.get_all_images() for potential_match in images: if potential_match.id == vm_ami: image = potential_match break # Compress the user data to try and get under the limit user_data = utilities.gzip_userdata(user_data) if image: if maximum_price is 0 or self.cloud_type == "OpenStack": # don't request a spot instance try: reservation = image.run(1,1, key_name=key_name, addressing_type=addressing_type, user_data=user_data, placement=self.placement_zone, security_groups=sec_group, instance_type=instance_type) instance_id = reservation.instances[0].id log.debug("Booted VM %s" % instance_id) except boto.exception.EC2ResponseError, e: log.exception("There was a problem creating an EC2 instance: %s" % e) return self.ERROR except Exception, e: log.exception("There was an unexpected problem creating an EC2 instance: %s" % e) return self.ERROR
def vm_create(self, vm_name, vm_type, vm_user, vm_networkassoc, vm_cpuarch, vm_image, vm_mem, vm_cores, vm_storage, customization=None, vm_keepalive=0, instance_type="", maximum_price=0, job_per_core=False, securitygroup=[], key_name=""): """Attempt to boot a new VM on the cluster.""" #print vm_image #print instance_type #print securitygroup log.verbose("Trying to boot %s on %s" % (vm_type, self.network_address)) if len(securitygroup) != 0: sec_group = [] for group in securitygroup: if group in self.security_groups: sec_group.append(group) if len(sec_group) == 0: log.warning( "No matching security groups - trying default config") sec_group = self.security_groups #sec_group.append("default") - don't just append default use what is in cloud_resources.conf for this cloud else: sec_group = self.security_groups try: if self.name in vm_image.keys(): vm_ami = vm_image[self.name] else: vm_ami = vm_image[self.network_address] except: log.debug("No AMI for %s, trying default" % self.network_address) #try: # vm_ami = vm_image["default"] #except: #log.debug("No given default - trying global defaults") try: vm_default_ami = _attr_list_to_dict(config.default_VMAMI) if self.name in vm_default_ami.keys(): vm_ami = vm_default_ami[self.name] else: vm_ami = vm_default_ami[self.network_address] except: try: vm_ami = vm_default_ami["default"] except: log.exception("Can't find a suitable AMI") return try: if self.name in instance_type.keys(): i_type = instance_type[self.name] else: i_type = instance_type[self.network_address] except: log.debug("No instance type for %s, trying default" % self.network_address) #try: # i_type = instance_type["default"] #except: # if isinstance(instance_type, str): # i_type = instance_type # else: try: if self.name in self.DEFAULT_INSTANCE_TYPE_LIST.keys(): i_type = self.DEFAULT_INSTANCE_TYPE_LIST[self.name] else: i_type = self.DEFAULT_INSTANCE_TYPE_LIST[ self.network_address] except: log.debug( "No default instance type found for %s, trying single default" % self.network_address) i_type = self.DEFAULT_INSTANCE_TYPE instance_type = i_type if key_name == None: key_name = self.key_name if customization: user_data = nimbus_xml.ws_optional(customization) else: user_data = "" if "AmazonEC2" == self.cloud_type and vm_networkassoc != "public": log.debug( "You requested '%s' networking, but EC2 only supports 'public'" % vm_networkassoc) addressing_type = "public" else: addressing_type = vm_networkassoc try: connection = self._get_connection() image = None if not "Eucalyptus" == self.cloud_type: image = connection.get_image(vm_ami) else: #HACK: for some reason Eucalyptus won't respond properly to # get_image("whateverimg"). Use a linear search until # this is fixed # This is Eucalyptus bug #495670 # https://bugs.launchpad.net/eucalyptus/+bug/495670 images = connection.get_all_images() for potential_match in images: if potential_match.id == vm_ami: image = potential_match break if image: if maximum_price is 0: # don't request a spot instance try: reservation = image.run( 1, 1, key_name=key_name, addressing_type=addressing_type, user_data=user_data, placement=self.placement_zone, security_groups=sec_group, instance_type=instance_type) instance_id = reservation.instances[0].id log.debug("Booted VM %s" % instance_id) except: log.exception( "There was a problem creating an EC2 instance...") return self.ERROR else: # get a spot instance of no more than maximum_price try: price_in_dollars = str(float(maximum_price) / 100) reservation = connection.request_spot_instances( price_in_dollars, image.id, key_name=key_name, user_data=user_data, placement=self.placement_zone, addressing_type=addressing_type, security_groups=self.sec_group, instance_type=instance_type) spot_id = str(reservation[0].id) instance_id = "" log.debug("Reserved instance %s at no more than %s" % (spot_id, price_in_dollars)) except AttributeError: log.exception("Your version of boto doesn't seem to support "\ "spot instances. You need at least 1.9") return self.ERROR except: log.exception( "Problem creating an EC2 spot instance...") return self.ERROR else: log.error("Couldn't find image %s on %s" % (vm_image, self.name)) return self.ERROR except: log.exception("Problem creating EC2 instance on on %s" % self.name) return self.ERROR vm_mementry = self.find_mementry(vm_mem) if (vm_mementry < 0): #TODO: this is kind of pointless with EC2... log.debug("Cluster memory list has no sufficient memory " +\ "entries (Not supposed to happen). Returning error.") return self.ERROR log.verbose("vm_create - Memory entry found in given cluster: %d" % vm_mementry) new_vm = cluster_tools.VM(name=vm_name, id=instance_id, vmtype=vm_type, user=vm_user, clusteraddr=self.network_address, cloudtype=self.cloud_type, network=vm_networkassoc, cpuarch=vm_cpuarch, image=vm_image, memory=vm_mem, mementry=vm_mementry, cpucores=vm_cores, storage=vm_storage, keep_alive=vm_keepalive, job_per_core=job_per_core) try: new_vm.spot_id = spot_id except: log.verbose("No spot ID to add to VM %s" % instance_id) try: self.resource_checkout(new_vm) except: log.exception( "Unexpected Error checking out resources when creating a VM. Programming error?" ) self.vm_destroy(new_vm, reason="Failed Resource checkout") return self.ERROR self.vms.append(new_vm) return 0