Example #1
0
    def _get_instance(self, vm_id):
        # contact EC2 API to get VM info
        try:
            reservations = self.conn.get_all_instances(instance_ids=[vm_id])
        except boto.exception.EC2ResponseError as err:
            # scrape actual error kind and message out of the
            # exception; we do this mostly for sensible logging, but
            # could be an actual improvement to Boto to provide
            # different exception classes based on the <Code>
            # element...
            # XXX: is there a more robust way of doing this?
            match = _BOTO_ERRMSG_RE.search(str(err))
            if match:
                raise UnrecoverableError(
                    "Error getting info on VM %s: EC2ResponseError/%s: %s" %
                    (vm_id, match.group('code'), match.group('message')),
                    do_log=True)
            else:
                # fall back to normal reporting...
                raise UnrecoverableError("Error getting VM %s: %s" %
                                         (vm_id, err),
                                         do_log=True)
        if not reservations:
            raise InstanceNotFound("No instance with id %s has been found." %
                                   vm_id)

        instances = dict(
            (i.id, i) for i in reservations[0].instances if reservations)
        if vm_id not in instances:
            raise UnrecoverableError("No instance with id %s has been found." %
                                     vm_id)

        return instances[vm_id]
Example #2
0
    def get_vm(self, vm_id, force_reload=False):
        """
        Return the VM object with id `vm_id`.

        If it is found in the local cache, that object is
        returned. Otherwise a new VM object is searched for in the EC2
        endpoint.
        """
        # return cached info, if any
        if not force_reload and vm_id in self._vm_cache:
            return self._vm_cache[vm_id]

        # XXX: should this be an `assert` instead?
        if not self.conn:
            raise UnrecoverableError(
                "No connection set for `VMPool('%s')`" % self.path)

        vm = self._get_instance(vm_id)
        if not hasattr(vm, 'preferred_ip'):
            # read from file
            vm.preferred_ip = gc3libs.utils.read_contents(
                os.path.join(self.path, vm.id))
        self._vm_cache[vm_id] = vm
        if vm_id not in self._vm_ids:
            self._vm_ids.add(vm_id)
            self.changed = True
        return vm
Example #3
0
    def _setup_security_groups(self):
        """
        Check the current configuration and set up the security group
        if it does not exist.
        """
        self._connect()
        if not self.security_group_name:
            gc3libs.log.error("Group name in `security_group_name`"
                              " configuration option cannot be empty!")
            return

        try:
            self._get_security_group(self.security_group_name)
        except NotFound:
            try:
                gc3libs.log.info("Creating security group %s",
                                 self.security_group_name)

                self.client.security_groups.create(
                    self.security_group_name,
                    "GC3Pie_%s" % self.security_group_name)
            except Exception as ex:
                gc3libs.log.error("Error creating security group %s: %s",
                                  self.security_group_name, ex)
                raise UnrecoverableError(
                    "Error creating security group %s: %s"
                    % (self.security_group_name, ex))

            self._get_security_group(self.security_group_name)
Example #4
0
 def _import_keypair(self):
     """
     Create a new keypair and import the public key defined in the
     configuration file.
     """
     with open(os.path.expanduser(self.public_key)) as fd:
         try:
             key_material = fd.read()
             imported_key = self._conn.import_key_pair(
                 self.keypair_name, key_material)
             gc3libs.log.info(
                 "Successfully imported key `%s`"
                 " with fingerprint `%s` as keypair `%s`",
                 imported_key.name, imported_key.fingerprint,
                 self.keypair_name)
         except Exception as ex:
             raise UnrecoverableError("Error importing keypair %s: %s" %
                                      (self.keypair_name, ex))
Example #5
0
 def _import_keypair(self):
     """
     Create a new keypair and import the public key defined in the
     configuration file.
     """
     fd = open(os.path.expanduser(self.public_key))
     try:
         key_material = fd.read()
         self.client.keypairs.create(self.keypair_name, key_material)
         keypair = self.client.keypairs.get(self.keypair_name)
         gc3libs.log.info(
             "Successfully imported key `%s` with fingerprint `%s`"
             " as keypair `%s`" % (self.public_key,
                                   keypair.fingerprint,
                                   self.keypair_name))
         return keypair
     except Exception as ex:
         fd.close()
         raise UnrecoverableError("Error importing keypair %s: %s"
                                  % (self.keypair_name, ex))
Example #6
0
    def _setup_security_groups(self):
        """
        Check the current configuration and set up the security group
        if it does not exist.
        """
        if not self.security_group_name:
            gc3libs.log.error("Group name in `security_group_name`"
                              " configuration option cannot be empty!")
            return
        security_groups = self._conn.get_all_security_groups()
        groups = dict((g.name, g) for g in security_groups)
        # Check if the security group exists already
        if self.security_group_name not in groups:
            try:
                gc3libs.log.info("Creating security group %s",
                                 self.security_group_name)
                security_group = self._conn.create_security_group(
                    self.security_group_name,
                    "GC3Pie_%s" % self.security_group_name)
            except Exception as ex:
                gc3libs.log.error("Error creating security group %s: %s",
                                  self.security_group_name, ex)
                raise UnrecoverableError(
                    "Error creating security group %s: %s" %
                    (self.security_group_name, ex))

            for rule in self.security_group_rules:
                try:
                    gc3libs.log.debug("Adding rule %s to security group %s.",
                                      rule, self.security_group_name)
                    security_group.authorize(**rule)
                except Exception as ex:
                    if gc3libs.error_ignored(
                            # context:
                            # - module
                            'ec2',
                            # - class
                            'EC2Lrms',
                            # - method
                            'setup_security_groups',
                            # - actual error class
                            ex.__class__.__name__,
                            # - additional keywords
                            'setup',
                            'security',
                            'network',
                            'cloud',
                    ):
                        gc3libs.log.info(
                            "Ignoring error adding rule %s"
                            " to security group %s: %s", rule,
                            self.security_group_name, ex)
                    else:
                        # propagate exception to caller
                        raise
        else:
            # Check if the security group has all the rules we want
            security_group = groups[self.security_group_name]
            current_rules = []
            for rule in security_group.rules:
                rule_dict = {
                    'ip_protocol': rule.ip_protocol,
                    'from_port': int(rule.from_port),
                    'to_port': int(rule.to_port),
                    'cidr_ip': str(rule.grants[0]),
                }
                current_rules.append(rule_dict)

            for new_rule in self.security_group_rules:
                if new_rule not in current_rules:
                    security_group.authorize(**new_rule)
Example #7
0
    def _create_instance(self, image_id, instance_type=None, user_data=None):
        """
        Create an instance using the image `image_id` and instance
        type `instance_type`. If not `instance_type` is defined, use
        the default.

        This method will also setup the keypair and the security
        groups, if needed.
        """
        self._connect()

        args = {'key_name': self.keypair_name, 'min_count': 1, 'max_count': 1}
        if instance_type:
            args['instance_type'] = instance_type

        if user_data:
            args['user_data'] = user_data

        # Check if the desired keypair is present
        keypairs = dict((k.name, k) for k in self._conn.get_all_key_pairs())
        if self.keypair_name not in keypairs:
            gc3libs.log.info(
                "Keypair `%s` not found: creating it using public key `%s`" %
                (self.keypair_name, self.public_key))
            # Create keypair if it does not exist and give an error if it
            # exists but have different fingerprint
            self._import_keypair()
        else:
            self._have_keypair(keypairs[self.keypair_name])

        # Setup security groups
        if 'security_group_name' in self:
            self._setup_security_groups()
            args['security_groups'] = [self.security_group_name]

        # FIXME: we should add check/creation of proper security
        # groups
        gc3libs.log.debug("Create new VM using image id `%s`", image_id)
        try:
            reservation = self._conn.run_instances(image_id, **args)
        except boto.exception.EC2ResponseError as err:
            # scrape actual error kind and message out of the
            # exception; we do this mostly for sensible logging, but
            # could be an actual improvement to Boto to provide
            # different exception classes based on the <Code>
            # element...
            # XXX: is there a more robust way of doing this?
            match = _BOTO_ERRMSG_RE.search(str(err))
            if match:
                raise UnrecoverableError(
                    "Error starting instance: EC2ResponseError/%s: %s" %
                    (match.group('code'), match.group('message')))
            else:
                # fall back to normal reporting...
                raise UnrecoverableError("Error starting instance: %s" % err)
        except Exception as ex:
            raise UnrecoverableError("Error starting instance: %s" % ex)
        vm = reservation.instances[0]
        self._vmpool.add_vm(vm)
        gc3libs.log.info(
            "VM with id `%s` has been created and is in %s state.", vm.id,
            vm.state)
        return vm
Example #8
0
    def __init__(
            self,
            name,
            # these parameters are inherited from the `LRMS` class
            architecture,
            max_cores,
            max_cores_per_job,
            max_memory_per_core,
            max_walltime,
            # these are specific of the EC2Lrms class
            ec2_region,
            keypair_name,
            public_key,
            vm_auth,
            image_id=None,
            image_name=None,
            ec2_url=None,
            instance_type=None,
            auth=None,
            vm_pool_max_size=None,
            user_data=None,
            **extra_args):
        LRMS.__init__(self, name, architecture, max_cores, max_cores_per_job,
                      max_memory_per_core, max_walltime, auth, **extra_args)

        self.free_slots = int(max_cores)
        self.user_run = 0
        self.user_queued = 0
        self.queued = 0
        self.vm_pool_max_size = vm_pool_max_size
        if vm_pool_max_size is not None:
            try:
                self.vm_pool_max_size = int(self.vm_pool_max_size)
            except ValueError:
                raise ConfigurationError(
                    "Value for `vm_pool_max_size` must be an integer,"
                    " was %s instead." % vm_pool_max_size)

        self.subresource_type = self.type.split('+', 1)[1]
        if self.subresource_type not in available_subresource_types:
            raise UnrecoverableError("Invalid resource type: %s" % self.type)

        self.region = ec2_region

        # Mapping of job.execution._lrms_vm_id => LRMS
        self.subresources = {}

        auth = self._auth_fn()
        self.ec2_access_key = auth.ec2_access_key
        self.ec2_secret_key = auth.ec2_secret_key
        if ec2_url is None:
            ec2_url = os.getenv('EC2_URL')
        if ec2_url is None:
            raise gc3libs.exceptions.InvalidArgument(
                "Cannot connect to the EC2 API:"
                " No 'EC2_URL' environment variable defined,"
                " and no 'ec2_url' argument passed to the EC2 backend.")
        self.ec2_url = gc3libs.url.Url(ec2_url)

        # Keypair names can only contain alphanumeric chars!
        if re.match(r'.*\W.*', keypair_name):
            raise ConfigurationError(
                "Keypair name `%s` is invalid: keypair names can only contain "
                "alphanumeric chars: [a-zA-Z0-9_]" % keypair_name)
        self.keypair_name = keypair_name
        self.public_key = os.path.expanduser(
            os.path.expandvars(public_key.strip()))
        self.image_id = image_id
        self.image_name = image_name
        self.instance_type = instance_type
        self._instance_type_specs = {}
        self.user_data = user_data

        self._parse_security_group()
        self._conn = None

        # `self.subresource_args` is used to create subresources
        self.subresource_args = extra_args
        self.subresource_args['type'] = self.subresource_type
        self.subresource_args['architecture'] = self['architecture']
        self.subresource_args['max_cores'] = self['max_cores']
        self.subresource_args['max_cores_per_job'] = self['max_cores_per_job']
        self.subresource_args['max_memory_per_core'] = \
            self['max_memory_per_core']
        self.subresource_args['max_walltime'] = self['max_walltime']
        # SSH-specific configuration
        self.subresource_args['transport'] = 'ssh'
        self.subresource_args['auth'] = vm_auth
        self.subresource_args['ssh_timeout'] = 7  # FIXME: hard-coded!
        self.subresource_args['ignore_ssh_host_keys'] = True
        self.subresource_args['keyfile'] = self.public_key
        if self.subresource_args['keyfile'].endswith('.pub'):
            self.subresource_args['keyfile'] = \
              self.subresource_args['keyfile'][:-len('.pub')]
        # ShellcmdLrms by default trusts the configuration, instead of
        # checking the real amount of memory and number of cpus, but
        # we need the real values instead.
        if self.subresource_type == gc3libs.Default.SHELLCMD_LRMS:
            self.subresource_args['override'] = 'True'

        if not image_name and not image_id:
            raise ConfigurationError(
                "No `image_id` or `image_name` has been specified in the"
                " configuration file.")
Example #9
0
    def _create_instance(self, image_id, name='gc3pie-instance',
                         instance_type=None, user_data=None):
        """
        Create an instance using the image `image_id` and instance
        type `instance_type`. If no `instance_type` is defined, use
        the default.

        This method will also setup the keypair and the security
        groups, if needed.
        """

        args = {}
        if user_data:
            args['userdata'] = user_data

        # Check if the desired keypair is present
        try:
            keypair = self._get_keypair(self.keypair_name)
        except NotFound:
            gc3libs.log.info(
                "Keypair `%s` not found: creating it using public key `%s`"
                % (self.keypair_name, self.public_key))
            # Create keypair if it does not exist and give an error if it
            # exists but have different fingerprint
            self._import_keypair()
        else:
            self._have_keypair(keypair)
        instance_type = instance_type or self.instance_type
        # Setup security groups
        if 'security_group_name' in self:
            self._setup_security_groups()
            args['security_groups'] = [self.security_group_name]

        # FIXME: we should add check/creation of proper security
        # groups

        nics = None
        if self.network_ids:
            nics=[{'net-id': netid.strip(), 'v4-fixed-ip': ''}
                  for netid in self.network_ids.split(',')]
            gc3libs.log.debug("Specifying networks for vm %s: %s",
                      name, str.join(', ', [nic['net-id'] for nic in nics]))
        args['nics'] = nics

        gc3libs.log.debug("Create new VM using image id `%s`", image_id)
        try:
            vm = self.client.servers.create(name, image_id, instance_type,
                                            key_name=self.keypair_name, **args)
        except Exception as err:
            # scrape actual error kind and message out of the
            # exception; we do this mostly for sensible logging, but
            # could be an actual improvement to Boto to provide
            # different exception classes based on the <Code>
            # element...
            # XXX: is there a more robust way of doing this?
            # fall back to normal reporting...
            raise UnrecoverableError("Error starting instance: %s" % err)

        self._vmpool.add_vm(vm)
        gc3libs.log.info(
            "VM with id `%s` has been created and is in %s state.",
            vm.id, vm.status)
        return vm
Example #10
0
    def __init__(self, name,
                 # these parameters are inherited from the `LRMS` class
                 architecture, max_cores, max_cores_per_job,
                 max_memory_per_core, max_walltime,
                 # these are specific of the OpenStackLrms class
                 keypair_name, public_key, vm_auth,
                 os_region=None, image_id=None, os_auth_url=None,
                 instance_type=None, auth=None,
                 vm_pool_max_size=None, user_data=None,
                 vm_os_overhead=gc3libs.Default.VM_OS_OVERHEAD,
                 # extra args are used to instanciate "sub-resources"
                 **extra_args):

        # Note: this creates attributes from key/value pairs given in the
        # `extra_args` parameters. In particular, the `self.type` attribute
        # (referenced below) is set in this chained constructor...
        LRMS.__init__(
            self, name,
            architecture, max_cores, max_cores_per_job,
            max_memory_per_core, max_walltime, auth, **extra_args)

        self.free_slots = int(max_cores)
        self.user_run = 0
        self.user_queued = 0
        self.queued = 0
        self._flavors = []
        self.vm_pool_max_size = vm_pool_max_size
        if vm_pool_max_size is not None:
            try:
                self.vm_pool_max_size = int(self.vm_pool_max_size)
            except ValueError:
                raise ConfigurationError(
                    "Value for `vm_pool_max_size` must be an integer,"
                    " was %s instead." % vm_pool_max_size)

        # pylint: disable=no-member
        self.subresource_type = self.type.split('+', 1)[1]
        if self.subresource_type not in available_subresource_types:
            raise UnrecoverableError("Invalid resource type: %s" % self.type)

        # Mapping of job.execution.instance_id => LRMS
        self.subresources = {}

        auth = self._auth_fn()
        if os_auth_url is None:
            os_auth_url = os.getenv('OS_AUTH_URL')
        if os_auth_url is None:
            raise gc3libs.exceptions.InvalidArgument(
                "Cannot connect to the OpenStack API:"
                " No 'OS_AUTH_URL' environment variable defined,"
                " and no 'os_auth_url' argument passed"
                " to the OpenStack backend.")
        self.os_auth_url = os_auth_url
        self.os_username = auth.os_username
        self.os_password = auth.os_password
        self.os_tenant_name = auth.os_project_name
        self.os_region_name = os_region
        if self.os_auth_url is None:
            raise gc3libs.exceptions.InvalidArgument(
                "Cannot connect to the OpenStack API:"
                " No 'os_auth_url' argument passed to the OpenStack backend.")
        # Keypair names can only contain alphanumeric chars!
        if not set(keypair_name).issubset(set(ascii_letters + digits + '_')):
            raise ConfigurationError(
                "Keypair name `%s` is invalid: keypair names can only contain "
                "alphanumeric chars: [a-zA-Z0-9_]" % keypair_name)
        self.keypair_name = keypair_name
        self.public_key = os.path.expanduser(
            os.path.expandvars(public_key.strip()))
        self.image_id = image_id
        self.instance_type = instance_type
        self.user_data = user_data
        self.vm_os_overhead = gc3libs.quantity.Memory(vm_os_overhead)
        self._parse_security_group()
        self._conn = None

        # `*_instance_type` config items should be consumed here,
        # not in any sub-resource
        for key, value in extra_args.items():
            if key.endswith('_instance_type'):
                self[key] = value
                extra_args.pop(key)

        # `self.subresource_args` is used to create subresources
        self.subresource_args = extra_args
        self.subresource_args['type'] = self.subresource_type
        self.subresource_args['architecture'] = self['architecture']
        self.subresource_args['max_cores'] = self['max_cores']
        self.subresource_args['max_cores_per_job'] = self['max_cores_per_job']
        self.subresource_args['max_memory_per_core'] = \
            self['max_memory_per_core']
        self.subresource_args['max_walltime'] = self['max_walltime']
        # SSH-specific configuration
        self.subresource_args['transport'] = 'ssh'
        self.subresource_args['auth'] = vm_auth
        self.subresource_args['ssh_timeout'] = 7  # FIXME: hard-coded!
        self.subresource_args['ignore_ssh_host_keys'] = True
        self.subresource_args['keyfile'] = self.public_key
        if self.subresource_args['keyfile'].endswith('.pub'):
            self.subresource_args['keyfile'] = \
              self.subresource_args['keyfile'][:-len('.pub')]
        # ShellcmdLrms by default trusts the configuration, instead of
        # checking the real amount of memory and number of cpus, but
        # we need the real values instead.
        if self.subresource_type == gc3libs.Default.SHELLCMD_LRMS:
            self.subresource_args['override'] = 'True'

        if image_id is None:
            raise ConfigurationError(
                "No `image_id` specified in the configuration file.")

        # "Connect" to the cloud (connection is actually performed
        # only when needed by the `Client` class.

        self.client = self._new_client()

        # Set up the VMPool persistent class. This has been delayed
        # until here because otherwise self._conn is None
        pooldir = os.path.join(os.path.expandvars(OpenStackLrms.RESOURCE_DIR),
                               'vmpool', self.name)
        self._vmpool = OpenStackVMPool(pooldir, self.client)