Ejemplo n.º 1
0
class LoadBalancingPolicy(base.Policy):
    """Policy for load balancing among members of a cluster.

    This policy is expected to be enforced before or after the membership of a
    cluster is changed. We need to refresh the load-balancer associated with
    the cluster (which could be created by the policy) when these actions are
    performed.
    """
    VERSION = '1.0'

    PRIORITY = 500

    TARGET = [
        ('AFTER', consts.CLUSTER_ADD_NODES),
        ('AFTER', consts.CLUSTER_SCALE_OUT),
        ('AFTER', consts.CLUSTER_RESIZE),
        ('BEFORE', consts.CLUSTER_DEL_NODES),
        ('BEFORE', consts.CLUSTER_SCALE_IN),
        ('BEFORE', consts.CLUSTER_RESIZE),
    ]

    PROFILE_TYPE = [
        'os.nova.server-1.0',
    ]

    KEYS = (
        POOL,
        VIP,
        HEALTH_MONITOR,
    ) = (
        'pool',
        'vip',
        'health_monitor',
    )

    _POOL_KEYS = (
        POOL_PROTOCOL,
        POOL_PROTOCOL_PORT,
        POOL_SUBNET,
        POOL_LB_METHOD,
        POOL_ADMIN_STATE_UP,
        POOL_SESSION_PERSISTENCE,
    ) = (
        'protocol',
        'protocol_port',
        'subnet',
        'lb_method',
        'admin_state_up',
        'session_persistence',
    )

    PROTOCOLS = (
        HTTP,
        HTTPS,
        TCP,
    ) = (
        'HTTP',
        'HTTPS',
        'TCP',
    )

    LB_METHODS = (
        ROUND_ROBIN,
        LEAST_CONNECTIONS,
        SOURCE_IP,
    ) = (
        'ROUND_ROBIN',
        'LEAST_CONNECTIONS',
        'SOURCE_IP',
    )

    HEALTH_MONITOR_TYPES = (
        PING,
        TCP,
        HTTP,
        HTTPS,
    ) = (
        'PING',
        'TCP',
        'HTTP',
        'HTTPS',
    )

    HTTP_METHODS = (
        GET,
        POST,
        PUT,
        DELETE,
    ) = (
        'GET',
        'POST',
        'PUT',
        'DELETE',
    )

    _VIP_KEYS = (
        VIP_SUBNET,
        VIP_ADDRESS,
        VIP_CONNECTION_LIMIT,
        VIP_PROTOCOL,
        VIP_PROTOCOL_PORT,
        VIP_ADMIN_STATE_UP,
    ) = (
        'subnet',
        'address',
        'connection_limit',
        'protocol',
        'protocol_port',
        'admin_state_up',
    )

    HEALTH_MONITOR_KEYS = (
        HM_TYPE,
        HM_DELAY,
        HM_TIMEOUT,
        HM_MAX_RETRIES,
        HM_ADMIN_STATE_UP,
        HM_HTTP_METHOD,
        HM_URL_PATH,
        HM_EXPECTED_CODES,
    ) = (
        'type',
        'delay',
        'timeout',
        'max_retries',
        'admin_state_up',
        'http_method',
        'url_path',
        'expected_codes',
    )

    _SESSION_PERSISTENCE_KEYS = (
        PERSISTENCE_TYPE,
        COOKIE_NAME,
    ) = (
        'type',
        'cookie_name',
    )

    PERSISTENCE_TYPES = (
        PERSIST_SOURCE_IP,
        PERSIST_HTTP_COOKIE,
        PERSIST_APP_COOKIE,
    ) = (
        'SOURCE_IP',
        'HTTP_COOKIE',
        'APP_COOKIE',
    )

    properties_schema = {
        POOL:
        schema.Map(
            _('LB pool properties.'),
            schema={
                POOL_PROTOCOL:
                schema.String(
                    _('Protocol used for load balancing.'),
                    constraints=[
                        constraints.AllowedValues(PROTOCOLS),
                    ],
                    default=HTTP,
                ),
                POOL_PROTOCOL_PORT:
                schema.Integer(
                    _('Port on which servers are running on the nodes.'),
                    default=80,
                ),
                POOL_SUBNET:
                schema.String(
                    _('Name or ID of subnet for the port on which nodes can '
                      'be connected.'),
                    required=True,
                ),
                POOL_LB_METHOD:
                schema.String(
                    _('Load balancing algorithm.'),
                    constraints=[
                        constraints.AllowedValues(LB_METHODS),
                    ],
                    default=ROUND_ROBIN,
                ),
                POOL_ADMIN_STATE_UP:
                schema.Boolean(
                    _('Administrative state of the pool.'),
                    default=True,
                ),
                POOL_SESSION_PERSISTENCE:
                schema.Map(
                    _('Session pesistence configuration.'),
                    schema={
                        PERSISTENCE_TYPE:
                        schema.String(
                            _('Type of session persistence implementation.'),
                            constraints=[
                                constraints.AllowedValues(PERSISTENCE_TYPES),
                            ],
                        ),
                        COOKIE_NAME:
                        schema.String(
                            _('Name of cookie if type set to APP_COOKIE.'), ),
                    },
                    default={},
                ),
            },
        ),
        VIP:
        schema.Map(
            _('VIP address and port of the pool.'),
            schema={
                VIP_SUBNET:
                schema.String(
                    _('Name or ID of Subnet on which the VIP address will be '
                      'allocated.'),
                    required=True,
                ),
                VIP_ADDRESS:
                schema.String(
                    _('IP address of the VIP.'),
                    default=None,
                ),
                VIP_CONNECTION_LIMIT:
                schema.Integer(
                    _('Maximum number of connections per second allowed for '
                      'this VIP'),
                    default=-1,
                ),
                VIP_PROTOCOL:
                schema.String(
                    _('Protocol used for VIP.'),
                    constraints=[
                        constraints.AllowedValues(PROTOCOLS),
                    ],
                    default=HTTP,
                ),
                VIP_PROTOCOL_PORT:
                schema.Integer(
                    _('TCP port to listen on.'),
                    default=80,
                ),
                VIP_ADMIN_STATE_UP:
                schema.Boolean(
                    _('Administrative state of the VIP.'),
                    default=True,
                ),
            },
        ),
        HEALTH_MONITOR:
        schema.Map(
            _('Health monitor for loadbalancer.'),
            schema={
                HM_TYPE:
                schema.String(
                    _('The type of probe sent by the load balancer to verify '
                      'the member state.'),
                    constraints=[
                        constraints.AllowedValues(HEALTH_MONITOR_TYPES),
                    ],
                    default=PING,
                ),
                HM_DELAY:
                schema.Integer(
                    _('The amount of time in seconds between sending '
                      'probes to members.'),
                    default=10,
                ),
                HM_TIMEOUT:
                schema.Integer(
                    _('The maximum time in seconds that a monitor waits to '
                      'connect before it times out.'),
                    default=5,
                ),
                HM_MAX_RETRIES:
                schema.Integer(
                    _('The number of allowed connection failures before '
                      'changing the status of the member to INACTIVE.'),
                    default=3,
                ),
                HM_ADMIN_STATE_UP:
                schema.Boolean(
                    _('Administrative state of the health monitor.'),
                    default=True,
                ),
                HM_HTTP_METHOD:
                schema.String(
                    _('The HTTP method that the monitor uses for requests.'),
                    constraints=[
                        constraints.AllowedValues(HTTP_METHODS),
                    ],
                ),
                HM_URL_PATH:
                schema.String(
                    _('The HTTP path of the request sent by the monitor to '
                      'test the health of a member.'), ),
                HM_EXPECTED_CODES:
                schema.String(
                    _('Expected HTTP codes for a passing HTTP(S) monitor.'), ),
            },
        ),
    }

    def __init__(self, name, spec, **kwargs):
        super(LoadBalancingPolicy, self).__init__(name, spec, **kwargs)

        self.pool_spec = self.properties.get(self.POOL, {})
        self.vip_spec = self.properties.get(self.VIP, {})
        self.hm_spec = self.properties.get(self.HEALTH_MONITOR, None)
        self.validate()
        self.lb = None

    def validate(self):
        super(LoadBalancingPolicy, self).validate()

        # validate subnet's exists
        # subnet = self.nc.subnet_get(vip[self.VIP_SUBNET])

    def attach(self, cluster):
        """Routine to be invoked when policy is to be attached to a cluster.

        :param cluster: The target cluster to be attached to;
        :returns: When the operation was successful, returns a tuple (True,
                  message); otherwise, return a tuple (False, error).
        """
        res, data = super(LoadBalancingPolicy, self).attach(cluster)
        if res is False:
            return False, data

        nodes = node_mod.Node.load_all(oslo_context.get_current(),
                                       cluster_id=cluster.id)

        params = self._build_conn_params(cluster)
        lb_driver = driver_base.SenlinDriver().loadbalancing(params)

        res, data = lb_driver.lb_create(self.vip_spec, self.pool_spec,
                                        self.hm_spec)
        if res is False:
            return False, data

        port = self.pool_spec.get(self.POOL_PROTOCOL_PORT)
        subnet = self.pool_spec.get(self.POOL_SUBNET)

        for node in nodes:
            member_id = lb_driver.member_add(node, data['loadbalancer'],
                                             data['pool'], port, subnet)
            if member_id is None:
                # When failed in adding member, remove all lb resources that
                # were created and return the failure reason.
                # TODO(anyone): May need to "roll-back" changes caused by any
                # successful member_add() calls.
                lb_driver.lb_delete(**data)
                return False, 'Failed in adding node into lb pool'

            node.data.update({'lb_member': member_id})
            node.store(oslo_context.get_current())

        cluster_data_lb = cluster.data.get('loadbalancers', {})
        cluster_data_lb[self.id] = {'vip_address': data.pop('vip_address')}
        cluster.data['loadbalancers'] = cluster_data_lb

        policy_data = self._build_policy_data(data)

        return True, policy_data

    def detach(self, cluster):
        """Routine to be called when the policy is detached from a cluster.

        :param cluster: The cluster from which the policy is to be detached.
        :returns: When the operation was successful, returns a tuple of
            (True, data) where the data contains references to the resources
            created; otherwise returns a tuple of (False, err) where the err
            contains a error message.
        """
        reason = _('LB resources deletion succeeded.')
        params = self._build_conn_params(cluster)
        lb_driver = driver_base.SenlinDriver().loadbalancing(params)

        cp = cluster_policy.ClusterPolicy.load(oslo_context.get_current(),
                                               cluster.id, self.id)

        policy_data = self._extract_policy_data(cp.data)
        if policy_data is None:
            return True, reason

        res, reason = lb_driver.lb_delete(**policy_data)
        if res is False:
            return False, reason

        nodes = node_mod.Node.load_all(oslo_context.get_current(),
                                       cluster_id=cluster.id)
        for node in nodes:
            if 'lb_member' in node.data:
                node.data.pop('lb_member')
                node.store(oslo_context.get_current())

        lb_data = cluster.data.get('loadbalancers', {})
        if lb_data and isinstance(lb_data, dict):
            lb_data.pop(self.id, None)
            if lb_data:
                cluster.data['loadbalancers'] = lb_data
            else:
                cluster.data.pop('loadbalancers')

        return True, reason

    def _get_delete_candidates(self, cluster_id, action):
        deletion = action.data.get('deletion', None)
        # No deletion field in action.data which means no scaling
        # policy or deletion policy is attached.
        candidates = None
        if deletion is None:
            if action.action == consts.CLUSTER_DEL_NODES:
                # Get candidates from action.input
                candidates = action.inputs.get('candidates', [])
                count = len(candidates)
            elif action.action == consts.CLUSTER_RESIZE:
                # Calculate deletion count based on action input
                db_cluster = db_api.cluster_get(action.context, cluster_id)
                scaleutils.parse_resize_params(action, db_cluster)
                if 'deletion' not in action.data:
                    return []
                else:
                    count = action.data['deletion']['count']
            else:  # action.action == consts.CLUSTER_SCALE_IN
                count = 1
        else:
            count = deletion.get('count', 0)
            candidates = deletion.get('candidates', None)

        # Still no candidates available, pick count of nodes randomly
        if candidates is None:
            if count == 0:
                return []
            nodes = db_api.node_get_all_by_cluster(action.context,
                                                   cluster_id=cluster_id)
            if count > len(nodes):
                count = len(nodes)
            candidates = scaleutils.nodes_by_random(nodes, count)
            deletion_data = action.data.get('deletion', {})
            deletion_data.update({
                'count': len(candidates),
                'candidates': candidates
            })
            action.data.update({'deletion': deletion_data})

        return candidates

    def pre_op(self, cluster_id, action):
        """Routine to be called before an action has been executed.

        For this particular policy, we take this chance to update the pool
        maintained by the load-balancer.

        :param cluster_id: The ID of the cluster on which a relevant action
            has been executed.
        :param action: The action object that triggered this operation.
        :returns: Nothing.
        """

        candidates = self._get_delete_candidates(cluster_id, action)
        if len(candidates) == 0:
            return

        db_cluster = db_api.cluster_get(action.context, cluster_id)
        params = self._build_conn_params(db_cluster)
        lb_driver = driver_base.SenlinDriver().loadbalancing(params)
        cp = cluster_policy.ClusterPolicy.load(action.context, cluster_id,
                                               self.id)
        policy_data = self._extract_policy_data(cp.data)
        lb_id = policy_data['loadbalancer']
        pool_id = policy_data['pool']

        # Remove nodes that will be deleted from lb pool
        for node_id in candidates:
            node = node_mod.Node.load(action.context, node_id=node_id)
            member_id = node.data.get('lb_member', None)
            if member_id is None:
                LOG.warning(_LW('Node %(n)s not found in lb pool %(p)s.'), {
                    'n': node_id,
                    'p': pool_id
                })
                continue

            res = lb_driver.member_remove(lb_id, pool_id, member_id)
            if res is not True:
                action.data['status'] = base.CHECK_ERROR
                action.data['reason'] = _('Failed in removing deleted '
                                          'node(s) from lb pool.')
                return

        return

    def post_op(self, cluster_id, action):
        """Routine to be called after an action has been executed.

        For this particular policy, we take this chance to update the pool
        maintained by the load-balancer.

        :param cluster_id: The ID of the cluster on which a relevant action
            has been executed.
        :param action: The action object that triggered this operation.
        :returns: Nothing.
        """

        # TODO(Yanyanhu): Need special handling for cross-az scenario
        # which is supported by Neutron lbaas.
        creation = action.data.get('creation', None)
        nodes_added = creation.get('nodes', []) if creation else []
        if len(nodes_added) == 0:
            return

        db_cluster = db_api.cluster_get(action.context, cluster_id)
        params = self._build_conn_params(db_cluster)
        lb_driver = driver_base.SenlinDriver().loadbalancing(params)
        cp = cluster_policy.ClusterPolicy.load(action.context, cluster_id,
                                               self.id)
        policy_data = self._extract_policy_data(cp.data)
        lb_id = policy_data['loadbalancer']
        pool_id = policy_data['pool']
        port = self.pool_spec.get(self.POOL_PROTOCOL_PORT)
        subnet = self.pool_spec.get(self.POOL_SUBNET)

        # Add new nodes to lb pool
        for node_id in nodes_added:
            node = node_mod.Node.load(action.context, node_id=node_id)
            member_id = node.data.get('lb_member', None)
            if member_id:
                LOG.warning(_LW('Node %(n)s already in lb pool %(p)s.'), {
                    'n': node_id,
                    'p': pool_id
                })
                continue

            member_id = lb_driver.member_add(node, lb_id, pool_id, port,
                                             subnet)
            if member_id is None:
                action.data['status'] = base.CHECK_ERROR
                action.data['reason'] = _('Failed in adding new node(s) '
                                          'into lb pool.')
                return

            node.data.update({'lb_member': member_id})
            node.store(action.context)

        return
Ejemplo n.º 2
0
    def test_validate_failed(self):
        sot = schema.List(schema=schema.String())

        ex = self.assertRaises(exc.ESchema, sot.validate, None)
        self.assertEqual("'None' is not a List", six.text_type(ex))
Ejemplo n.º 3
0
class DeletionPolicy(base.Policy):
    """Policy for choosing victim node(s) from a cluster for deletion.

    This policy is enforced when nodes are to be removed from a cluster.
    It will yield an ordered list of candidates for deletion based on user
    specified criteria.
    """

    VERSION = '1.0'

    PRIORITY = 400

    KEYS = (
        CRITERIA,
        DESTROY_AFTER_DELETION,
        GRACE_PERIOD,
        REDUCE_DESIRED_CAPACITY,
    ) = (
        'criteria',
        'destroy_after_deletion',
        'grace_period',
        'reduce_desired_capacity',
    )

    CRITERIA_VALUES = (
        OLDEST_FIRST,
        OLDEST_PROFILE_FIRST,
        YOUNGEST_FIRST,
        RANDOM,
    ) = (
        'OLDEST_FIRST',
        'OLDEST_PROFILE_FIRST',
        'YOUNGEST_FIRST',
        'RANDOM',
    )

    TARGET = [
        ('BEFORE', consts.CLUSTER_SCALE_IN),
        ('BEFORE', consts.CLUSTER_DEL_NODES),
        ('BEFORE', consts.CLUSTER_RESIZE),
    ]

    PROFILE_TYPE = ['ANY']

    properties_schema = {
        CRITERIA:
        schema.String(_('Criteria used in selecting candidates for deletion'),
                      default=RANDOM,
                      constraints=[
                          constraints.AllowedValues(CRITERIA_VALUES),
                      ]),
        DESTROY_AFTER_DELETION:
        schema.Boolean(
            _('Whether a node should be completely destroyed after '
              'deletion. Default to True'),
            default=True,
        ),
        GRACE_PERIOD:
        schema.Integer(
            _('Number of seconds before real deletion happens.'),
            default=0,
        ),
        REDUCE_DESIRED_CAPACITY:
        schema.Boolean(
            _('Whether the desired capacity of the cluster should be '
              'reduced along the deletion. Default to False.'),
            default=False,
        )
    }

    def __init__(self, name, spec, **kwargs):
        super(DeletionPolicy, self).__init__(name, spec, **kwargs)

        self.criteria = self.properties[self.CRITERIA]
        self.grace_period = self.properties[self.GRACE_PERIOD]
        self.destroy_after_deletion = self.properties[
            self.DESTROY_AFTER_DELETION]
        self.reduce_desired_capacity = self.properties[
            self.REDUCE_DESIRED_CAPACITY]

    def _victims_by_regions(self, cluster, regions):
        victims = []
        for region in sorted(regions.keys()):
            count = regions[region]
            nodes = cluster.nodes_by_region(region)
            if self.criteria == self.RANDOM:
                candidates = scaleutils.nodes_by_random(nodes, count)
            elif self.criteria == self.OLDEST_PROFILE_FIRST:
                candidates = scaleutils.nodes_by_profile_age(nodes, count)
            elif self.criteria == self.OLDEST_FIRST:
                candidates = scaleutils.nodes_by_age(nodes, count, True)
            else:
                candidates = scaleutils.nodes_by_age(nodes, count, False)

            victims.extend(candidates)

        return victims

    def _victims_by_zones(self, cluster, zones):
        victims = []
        for zone in sorted(zones.keys()):
            count = zones[zone]
            nodes = cluster.nodes_by_zone(zone)
            if self.criteria == self.RANDOM:
                candidates = scaleutils.nodes_by_random(nodes, count)
            elif self.criteria == self.OLDEST_PROFILE_FIRST:
                candidates = scaleutils.nodes_by_profile_age(nodes, count)
            elif self.criteria == self.OLDEST_FIRST:
                candidates = scaleutils.nodes_by_age(nodes, count, True)
            else:
                candidates = scaleutils.nodes_by_age(nodes, count, False)

            victims.extend(candidates)

        return victims

    def _update_action(self, action, victims):
        pd = action.data.get('deletion', {})
        pd['count'] = len(victims)
        pd['candidates'] = victims
        pd['destroy_after_deletion'] = self.destroy_after_deletion
        pd['grace_period'] = self.grace_period
        action.data.update({
            'status': base.CHECK_OK,
            'reason': _('Candidates generated'),
            'deletion': pd
        })
        action.store(action.context)

    def pre_op(self, cluster_id, action):
        """Choose victims that can be deleted.

        :param cluster_id: ID of the cluster to be handled.
        :param action: The action object that triggered this policy.
        """

        victims = action.inputs.get('candidates', [])
        if len(victims) > 0:
            self._update_action(action, victims)
            return

        db_cluster = None
        regions = None
        zones = None

        deletion = action.data.get('deletion', {})
        if deletion:
            # there are policy decisions
            count = deletion['count']
            regions = deletion.get('regions', None)
            zones = deletion.get('zones', None)
        # No policy decision, check action itself: SCALE_IN
        elif action.action == consts.CLUSTER_SCALE_IN:
            count = action.inputs.get('count', 1)

        # No policy decision, check action itself: RESIZE
        else:
            db_cluster = co.Cluster.get(action.context,
                                        cluster_id,
                                        project_safe=True)
            res = scaleutils.parse_resize_params(action, db_cluster)
            if res[0] == base.CHECK_ERROR:
                action.data['status'] = base.CHECK_ERROR
                action.data['reason'] = res[1]
                LOG.error(res[1])
                return

            if 'deletion' not in action.data:
                return
            count = action.data['deletion']['count']

        cluster = cm.Cluster.load(action.context,
                                  dbcluster=db_cluster,
                                  cluster_id=cluster_id)
        # Cross-region
        if regions:
            victims = self._victims_by_regions(cluster, regions)
            self._update_action(action, victims)
            return

        # Cross-AZ
        if zones:
            victims = self._victims_by_zones(cluster, zones)
            self._update_action(action, victims)
            return

        if count > len(cluster.nodes):
            count = len(cluster.nodes)

        if self.criteria == self.RANDOM:
            victims = scaleutils.nodes_by_random(cluster.nodes, count)
        elif self.criteria == self.OLDEST_PROFILE_FIRST:
            victims = scaleutils.nodes_by_profile_age(cluster.nodes, count)
        elif self.criteria == self.OLDEST_FIRST:
            victims = scaleutils.nodes_by_age(cluster.nodes, count, True)
        else:
            victims = scaleutils.nodes_by_age(cluster.nodes, count, False)

        self._update_action(action, victims)
        return
Ejemplo n.º 4
0
    def test_basic(self):
        sot = schema.String('desc')

        self.assertEqual('String', sot['type'])
        self.assertEqual('desc', sot['description'])
Ejemplo n.º 5
0
    def test_get_children(self):
        sot = schema.List('desc', schema=schema.String())

        res = sot._get_children(['v1', 'v2'], [0, 1])
        self.assertEqual(['v1', 'v2'], list(res))
Ejemplo n.º 6
0
class Profile(object):
    '''Base class for profiles.'''

    KEYS = (
        TYPE,
        VERSION,
        PROPERTIES,
    ) = (
        'type',
        'version',
        'properties',
    )

    spec_schema = {
        TYPE:
        schema.String(
            _('Name of the profile type.'),
            required=True,
        ),
        VERSION:
        schema.String(
            _('Version number of the profile type.'),
            required=True,
        ),
        PROPERTIES:
        schema.Map(
            _('Properties for the profile.'),
            required=True,
        )
    }

    properties_schema = {}

    def __new__(cls, name, spec, **kwargs):
        """Create a new profile of the appropriate class.

        :param name: The name for the profile.
        :param spec: A dictionary containing the spec for the profile.
        :param kwargs: Keyword arguments for profile creation.
        :returns: An instance of a specific sub-class of Profile.
        """
        type_name, version = schema.get_spec_version(spec)
        type_str = "-".join([type_name, version])

        if cls != Profile:
            ProfileClass = cls
        else:
            ProfileClass = environment.global_env().get_profile(type_str)

        return super(Profile, cls).__new__(ProfileClass)

    def __init__(self, name, spec, **kwargs):
        """Initialize a profile instance.

        :param name: A string that specifies the name for the profile.
        :param spec: A dictionary containing the detailed profile spec.
        :param kwargs: Keyword arguments for initializing the profile.
        :returns: An instance of a specific sub-class of Profile.
        """

        type_name, version = schema.get_spec_version(spec)
        type_str = "-".join([type_name, version])

        self.name = name
        self.spec = spec

        self.id = kwargs.get('id', None)
        self.type = kwargs.get('type', type_str)

        self.user = kwargs.get('user')
        self.project = kwargs.get('project')
        self.domain = kwargs.get('domain')

        self.metadata = kwargs.get('metadata', {})

        self.created_at = kwargs.get('created_at', None)
        self.updated_at = kwargs.get('updated_at', None)

        self.spec_data = schema.Spec(self.spec_schema, self.spec)
        self.properties = schema.Spec(self.properties_schema,
                                      self.spec.get(self.PROPERTIES, {}))

        if not self.id:
            # new object needs a context dict
            self.context = self._init_context()
        else:
            self.context = kwargs.get('context')

    @classmethod
    def from_db_record(cls, record):
        '''Construct a profile object from database record.

        :param record: a DB Profle object that contains all required fields.
        '''
        kwargs = {
            'id': record.id,
            'type': record.type,
            'context': record.context,
            'user': record.user,
            'project': record.project,
            'domain': record.domain,
            'metadata': record.meta_data,
            'created_at': record.created_at,
            'updated_at': record.updated_at,
        }

        return cls(record.name, record.spec, **kwargs)

    @classmethod
    def load(cls, ctx, profile=None, profile_id=None, project_safe=True):
        '''Retrieve a profile object from database.'''
        if profile is None:
            profile = db_api.profile_get(ctx,
                                         profile_id,
                                         project_safe=project_safe)
            if profile is None:
                raise exception.ProfileNotFound(profile=profile_id)

        return cls.from_db_record(profile)

    @classmethod
    def load_all(cls,
                 ctx,
                 limit=None,
                 marker=None,
                 sort=None,
                 filters=None,
                 project_safe=True):
        """Retrieve all profiles from database."""

        records = db_api.profile_get_all(ctx,
                                         limit=limit,
                                         marker=marker,
                                         sort=sort,
                                         filters=filters,
                                         project_safe=project_safe)

        for record in records:
            yield cls.from_db_record(record)

    @classmethod
    def delete(cls, ctx, profile_id):
        db_api.profile_delete(ctx, profile_id)

    def store(self, ctx):
        '''Store the profile into database and return its ID.'''
        timestamp = timeutils.utcnow()

        values = {
            'name': self.name,
            'type': self.type,
            'context': self.context,
            'spec': self.spec,
            'user': self.user,
            'project': self.project,
            'domain': self.domain,
            'meta_data': self.metadata,
        }

        if self.id:
            self.updated_at = timestamp
            values['updated_at'] = timestamp
            db_api.profile_update(ctx, self.id, values)
        else:
            self.created_at = timestamp
            values['created_at'] = timestamp
            profile = db_api.profile_create(ctx, values)
            self.id = profile.id

        return self.id

    @classmethod
    def create_object(cls, ctx, obj):
        profile = cls.load(ctx, profile_id=obj.profile_id)
        return profile.do_create(obj)

    @classmethod
    def check_object(cls, ctx, obj):
        profile = cls.load(ctx, profile_id=obj.profile_id)
        return profile.do_check(obj)

    @classmethod
    def delete_object(cls, ctx, obj):
        profile = cls.load(ctx, profile_id=obj.profile_id)
        return profile.do_delete(obj)

    @classmethod
    def update_object(cls, ctx, obj, new_profile_id=None, **params):
        profile = cls.load(ctx, profile_id=obj.profile_id)
        new_profile = None
        if new_profile_id:
            new_profile = cls.load(ctx, profile_id=new_profile_id)
        return profile.do_update(obj, new_profile, **params)

    @classmethod
    def recover_object(cls, ctx, obj, **options):
        profile = cls.load(ctx, profile_id=obj.profile_id)
        return profile.do_recover(obj, **options)

    @classmethod
    def get_details(cls, ctx, obj):
        profile = cls.load(ctx, profile_id=obj.profile_id)
        return profile.do_get_details(obj)

    @classmethod
    def join_cluster(cls, ctx, obj, cluster_id):
        profile = cls.load(ctx, profile_id=obj.profile_id)
        return profile.do_join(obj, cluster_id)

    @classmethod
    def leave_cluster(cls, ctx, obj):
        profile = cls.load(ctx, profile_id=obj.profile_id)
        return profile.do_leave(obj)

    def validate(self):
        '''Validate the schema and the data provided.'''
        # general validation
        self.spec_data.validate()
        self.properties.validate()

        # TODO(Anyone): need to check the contents in self.CONTEXT

    @classmethod
    def get_schema(cls):
        return dict((name, dict(schema))
                    for name, schema in cls.properties_schema.items())

    def _init_context(self):
        profile_context = {}
        if self.CONTEXT in self.properties:
            profile_context = self.properties[self.CONTEXT] or {}

        ctx_dict = context.get_service_context(**profile_context)

        ctx_dict.pop('project_name', None)
        ctx_dict.pop('project_domain_name', None)

        return ctx_dict

    def _build_conn_params(self, user, project):
        """Build connection params for specific user and project.

        :param user: The ID of the user for which a trust will be used.
        :param project: The ID of the project for which a trust will be used.
        :returns: A dict containing the required parameters for connection
                  creation.
        """
        cred = db_api.cred_get(oslo_context.get_current(), user, project)
        if cred is None:
            raise exception.TrustNotFound(trustor=user)

        trust_id = cred.cred['openstack']['trust']

        # This is supposed to be trust-based authentication
        params = copy.deepcopy(self.context)
        params['trust_id'] = trust_id

        return params

    def do_create(self, obj):
        '''For subclass to override.'''

        return NotImplemented

    def do_delete(self, obj):
        '''For subclass to override.'''

        return NotImplemented

    def do_update(self, obj, new_profile, **params):
        '''For subclass to override.'''

        return NotImplemented

    def do_check(self, obj):
        '''For subclass to override.'''
        return NotImplemented

    def do_get_details(self, obj):
        '''For subclass to override.'''
        return NotImplemented

    def do_join(self, obj, cluster_id):
        '''For subclass to override to perform extra operations.'''
        return True

    def do_leave(self, obj):
        '''For subclass to override to perform extra operations.'''
        return True

    def do_rebuild(self, obj):
        '''For subclass to override.'''
        return NotImplemented

    def do_recover(self, obj, **options):
        '''For subclass to override.'''

        operation = options.get('operation', None)
        if operation and operation != 'RECREATE':
            return NotImplemented

        # NOTE: do_delete always returns a boolean
        res = self.do_delete(obj)

        if res:
            try:
                res = self.do_create(obj)
            except Exception as ex:
                LOG.exception(_('Failed at recovering obj: %s '),
                              six.text_type(ex))
                return False

        return res

    def to_dict(self):
        pb_dict = {
            'id': self.id,
            'name': self.name,
            'type': self.type,
            'user': self.user,
            'project': self.project,
            'domain': self.domain,
            'spec': self.spec,
            'metadata': self.metadata,
            'created_at': utils.format_time(self.created_at),
            'updated_at': utils.format_time(self.updated_at),
        }
        return pb_dict

    def validate_for_update(self, new_profile):
        non_updatables = []
        for (k, v) in new_profile.properties.items():
            if self.properties.get(k, None) != v:
                if not self.properties_schema[k].updatable:
                    non_updatables.append(k)

        if not non_updatables:
            return True

        msg = ", ".join(non_updatables)
        LOG.error(_LE("The following properties are not updatable: %s.") % msg)
        return False
Ejemplo n.º 7
0
class ZonePlacementPolicy(base.Policy):
    """Policy for placing members of a cluster across availability zones."""

    VERSION = '1.0'

    PRIORITY = 300

    TARGET = [
        ('BEFORE', consts.CLUSTER_SCALE_OUT),
        ('BEFORE', consts.CLUSTER_SCALE_IN),
        ('BEFORE', consts.CLUSTER_RESIZE),
    ]

    PROFILE_TYPE = [
        'os.nova.server-1.0',
    ]

    KEYS = (ZONES, ) = ('zones', )

    _AZ_KEYS = (
        ZONE_NAME,
        ZONE_WEIGHT,
    ) = (
        'name',
        'weight',
    )

    properties_schema = {
        ZONES:
        schema.List(
            _('List of availability zones to choose from.'),
            schema=schema.Map(
                _('An availability zone as candidate.'),
                schema={
                    ZONE_NAME:
                    schema.String(_('Name of an availability zone.'), ),
                    ZONE_WEIGHT:
                    schema.Integer(
                        _('Weight of the availability zone (default is 100).'),
                        default=100,
                        required=False,
                    )
                },
            ),
        ),
    }

    def __init__(self, name, spec, **kwargs):
        super(ZonePlacementPolicy, self).__init__(name, spec, **kwargs)

        self._novaclient = None
        self.zones = dict((z[self.ZONE_NAME], z[self.ZONE_WEIGHT])
                          for z in self.properties.get(self.ZONES))

    def _nova(self, obj):
        """Construct nova client based on object.

        :param obj: Object for which the client is created. It is expected to
                    be None when retrieving an existing client. When creating
                    a client, it contains the user and project to be used.
        """
        if self._novaclient is not None:
            return self._novaclient

        params = self._build_conn_params(obj)
        self._novaclient = driver.SenlinDriver().compute(params)
        return self._novaclient

    def _create_plan(self, current, zones, count, expand):
        """Compute a placement plan based on the weights of AZs.

        :param current: Distribution of existing nodes.
        :returns: A dict that contains a placement plan.
        """
        # sort candidate zones by distribution and covert it into a list
        candidates = sorted(zones.items(),
                            key=operator.itemgetter(1),
                            reverse=expand)

        sum_weight = sum(zones.values())
        if expand:
            total = count + sum(current.values())
        else:
            total = sum(current.values()) - count

        remain = count
        plan = dict.fromkeys(zones.keys(), 0)

        for i in range(len(zones)):
            zone = candidates[i][0]
            weight = candidates[i][1]
            q = total * weight / float(sum_weight)
            if expand:
                quota = int(math.ceil(q))
                headroom = quota - current[zone]
            else:
                quota = int(math.floor(q))
                headroom = current[zone] - quota

            if headroom <= 0:
                continue

            if headroom < remain:
                plan[zone] = headroom
                remain -= headroom
            else:
                plan[zone] = remain if remain > 0 else 0
                remain = 0
                break

        if remain > 0:
            return None

        # filter out zero values
        result = {}
        for z, c in plan.items():
            if c > 0:
                result[z] = c

        return result

    def _get_count(self, cluster_id, action):
        """Get number of nodes to create or delete.

        :param cluster_id: The ID of the target cluster.
        :param action: The action object which triggered this policy check.
        :return: An integer value which can be 1) positive - number of nodes
                 to create; 2) negative - number of nodes to delete; 3) 0 -
                 something wrong happened, and the policy check failed.
        """
        if action.action == consts.CLUSTER_RESIZE:
            if action.data.get('deletion', None):
                return -action.data['deletion']['count']
            elif action.data.get('creation', None):
                return action.data['creation']['count']

            db_cluster = co.Cluster.get(action.context, cluster_id)
            res = scaleutils.parse_resize_params(action, db_cluster)
            if res[0] == base.CHECK_ERROR:
                action.data['status'] = base.CHECK_ERROR
                action.data['reason'] = res[1]
                LOG.error(res[1])
                return 0

            if action.data.get('deletion', None):
                return -action.data['deletion']['count']
            else:
                return action.data['creation']['count']

        if action.action == consts.CLUSTER_SCALE_IN:
            pd = action.data.get('deletion', None)
            if pd is None:
                return -action.inputs.get('count', 1)
            else:
                return -pd.get('count', 1)

        # CLUSTER_SCALE_OUT: an action that inflates the cluster
        pd = action.data.get('creation', None)
        if pd is None:
            return action.inputs.get('count', 1)
        else:
            return pd.get('count', 1)

    def pre_op(self, cluster_id, action):
        """Callback function when cluster membership is about to change.

        :param cluster_id: ID of the target cluster.
        :param action: The action that triggers this policy check.
        """
        count = self._get_count(cluster_id, action)
        if count == 0:
            return

        expand = True
        if count < 0:
            expand = False
            count = -count

        cluster = cm.Cluster.load(action.context, cluster_id)

        nc = self._nova(cluster)
        zones_good = nc.validate_azs(self.zones.keys())
        if len(zones_good) == 0:
            action.data['status'] = base.CHECK_ERROR
            action.data['reason'] = _('No availability zone found available.')
            LOG.error(_LE('No availability zone found available.'))
            return

        zones = {}
        for z, w in self.zones.items():
            if z in zones_good:
                zones[z] = w

        current = cluster.get_zone_distribution(action.context, zones.keys())
        result = self._create_plan(current, zones, count, expand)

        if not result:
            action.data['status'] = base.CHECK_ERROR
            action.data['reason'] = _('There is no feasible plan to '
                                      'handle all nodes.')
            LOG.error(_LE('There is no feasible plan to handle all nodes.'))
            return

        if expand:
            if 'creation' not in action.data:
                action.data['creation'] = {}
            action.data['creation']['count'] = count
            action.data['creation']['zones'] = result
        else:
            if 'deletion' not in action.data:
                action.data['deletion'] = {}
            action.data['deletion']['count'] = count
            action.data['deletion']['zones'] = result
Ejemplo n.º 8
0
class RegionPlacementPolicy(base.Policy):
    """Policy for placing members of a cluster across multiple regions."""

    VERSION = '1.0'

    PRIORITY = 200

    TARGET = [
        ('BEFORE', consts.CLUSTER_SCALE_OUT),
        ('BEFORE', consts.CLUSTER_SCALE_IN),
        ('BEFORE', consts.CLUSTER_RESIZE),
    ]

    PROFILE_TYPE = ['ANY']

    KEYS = (REGIONS, ) = ('regions', )

    _AZ_KEYS = (
        REGION_NAME,
        REGION_WEIGHT,
        REGION_CAP,
    ) = (
        'name',
        'weight',
        'cap',
    )

    properties_schema = {
        REGIONS:
        schema.List(
            _('List of regions to choose from.'),
            schema=schema.Map(
                _('An region as a candidate.'),
                schema={
                    REGION_NAME:
                    schema.String(_('Name of a region.'), ),
                    REGION_WEIGHT:
                    schema.Integer(
                        _('Weight of the region. The default is 100.'),
                        default=100,
                    ),
                    REGION_CAP:
                    schema.Integer(
                        _('Maximum number of nodes in this region. The '
                          'default is -1 which means no cap set.'),
                        default=-1,
                    ),
                },
            ),
        ),
    }

    def __init__(self, name, spec, **kwargs):
        super(RegionPlacementPolicy, self).__init__(name, spec, **kwargs)

        self._keystoneclient = None
        regions = {}
        for r in self.properties.get(self.REGIONS):
            regions[r[self.REGION_NAME]] = {
                'weight': r[self.REGION_WEIGHT],
                'cap': r[self.REGION_CAP],
            }
        self.regions = regions

    def _keystone(self, obj):
        """Construct keystone client based on object.

        :param obj: Object for which the client is created. It is expected to
                    be None when retrieving an existing client. When creating
                    a client, it contains the user and project to be used.
        """
        if self._keystoneclient is not None:
            return self._keystoneclient
        params = self._build_conn_params(obj)
        self._keystoneclient = driver_base.SenlinDriver().identity(params)
        return self._keystoneclient

    def _create_plan(self, current, regions, count, expand):
        """Compute a placement plan based on the weights of regions.

        :param current: Distribution of existing nodes.
        :param regions: Usable regions for node creation.
        :param count: Number of nodes to create/delete in this plan.
        :param expand: True if the plan is for inflating the cluster, False
                       otherwise.

        :returns: A list of region names selected for the nodes.
        """
        # sort candidate regions by distribution and covert it into a list
        candidates = sorted(regions.items(),
                            key=lambda x: x[1]['weight'],
                            reverse=expand)
        sum_weight = sum(r['weight'] for r in regions.values())
        if expand:
            total = count + sum(current.values())
        else:
            total = sum(current.values()) - count
        remain = count
        plan = dict.fromkeys(regions.keys(), 0)

        for i in range(len(candidates)):
            region = candidates[i]
            r_name = region[0]
            r_weight = region[1]['weight']
            r_cap = region[1]['cap']

            # maximum number of nodes on current region
            q = total * r_weight / float(sum_weight)
            if expand:
                quota = int(math.ceil(q))
                # respect the cap setting, if any
                if r_cap >= 0:
                    quota = min(quota, r_cap)
                headroom = quota - current[r_name]
            else:
                quota = int(math.floor(q))
                headroom = current[r_name] - quota

            if headroom <= 0:
                continue

            if headroom < remain:
                plan[r_name] = headroom
                remain -= headroom
            else:
                plan[r_name] = remain if remain > 0 else 0
                remain = 0
                break

        # we have leftovers
        if remain > 0:
            return None

        result = {}
        for reg, count in plan.items():
            if count > 0:
                result[reg] = count

        return result

    def _get_count(self, cluster_id, action):
        """Get number of nodes to create or delete.

        :param cluster_id: The ID of the target cluster.
        :param action: The action object which triggered this policy check.
        :return: An integer value which can be 1) positive - number of nodes
                 to create; 2) negative - number of nodes to delete; 3) 0 -
                 something wrong happened, and the policy check failed.
        """
        if action.action == consts.CLUSTER_RESIZE:
            if action.data.get('deletion', None):
                return -action.data['deletion']['count']
            elif action.data.get('creation', None):
                return action.data['creation']['count']

            db_cluster = db_api.cluster_get(action.context, cluster_id)
            res = scaleutils.parse_resize_params(action, db_cluster)
            if res[0] == base.CHECK_ERROR:
                action.data['status'] = base.CHECK_ERROR
                action.data['reason'] = res[1]
                LOG.error(res[1])
                return 0

            if action.data.get('deletion', None):
                return -action.data['deletion']['count']
            else:
                return action.data['creation']['count']

        if action.action == consts.CLUSTER_SCALE_IN:
            pd = action.data.get('deletion', None)
            if pd is None:
                return -action.inputs.get('count', 1)
            else:
                return -pd.get('count', 1)

        # CLUSTER_SCALE_OUT: an action that inflates the cluster
        pd = action.data.get('creation', None)
        if pd is None:
            return action.inputs.get('count', 1)
        else:
            return pd.get('count', 1)

    def pre_op(self, cluster_id, action):
        """Callback function when cluster membership is about to change.

        :param cluster_id: ID of the target cluster.
        :param action: The action that triggers this policy check.
        :returns: ``None``.
        """
        count = self._get_count(cluster_id, action)
        if count == 0:
            return

        expand = True
        if count < 0:
            expand = False
            count = -count

        cluster = cluster_mod.Cluster.load(action.context, cluster_id)

        kc = self._keystone(cluster)

        regions_good = kc.validate_regions(self.regions.keys())
        if len(regions_good) == 0:
            action.data['status'] = base.CHECK_ERROR
            action.data['reason'] = _('No region is found usable.')
            LOG.error(_LE('No region is found usable.'))
            return

        regions = {}
        for r in self.regions.items():
            if r[0] in regions_good:
                regions[r[0]] = r[1]

        current_dist = cluster.get_region_distribution(regions_good)
        result = self._create_plan(current_dist, regions, count, expand)
        if not result:
            action.data['status'] = base.CHECK_ERROR
            action.data['reason'] = _('There is no feasible plan to '
                                      'handle all nodes.')
            LOG.error(_LE('There is no feasible plan to handle all nodes.'))
            return

        if expand:
            if 'creation' not in action.data:
                action.data['creation'] = {}
            action.data['creation']['count'] = count
            action.data['creation']['regions'] = result
        else:
            if 'deletion' not in action.data:
                action.data['deletion'] = {}
            action.data['deletion']['count'] = count
            action.data['deletion']['regions'] = result
Ejemplo n.º 9
0
class HealthPolicy(base.Policy):
    """Policy for health management of a cluster."""

    VERSION = '1.0'

    PRIORITY = 600

    TARGET = [
        ('BEFORE', consts.CLUSTER_CHECK),
        ('BEFORE', consts.CLUSTER_DEL_NODES),
        ('BEFORE', consts.CLUSTER_RECOVER),
        ('BEFORE', consts.CLUSTER_RESIZE),
        ('BEFORE', consts.CLUSTER_SCALE_IN),
        ('BEFORE', consts.NODE_DELETE),
        ('AFTER', consts.CLUSTER_DEL_NODES),
        ('AFTER', consts.CLUSTER_SCALE_IN),
        ('AFTER', consts.CLUSTER_RESIZE),
        ('AFTER', consts.NODE_DELETE),
    ]

    # Should be ANY if profile provides health check support?
    PROFILE_TYPE = [
        'os.nova.server',
        'os.heat.stack',
    ]

    KEYS = (DETECTION, RECOVERY) = ('detection', 'recovery')

    _DETECTION_KEYS = (
        DETECTION_TYPE,
        DETECTION_OPTIONS,
    ) = ('type', 'options')

    _DETECTION_OPTIONS = (DETECTION_INTERVAL, ) = ('interval', )

    _RECOVERY_KEYS = (RECOVERY_ACTIONS, RECOVERY_FENCING) = ('actions',
                                                             'fencing')

    RECOVERY_ACTION_VALUES = (
        REBUILD,
        RECREATE,
        # REBOOT, MIGRATE, EVACUATE,
    ) = (
        "REBUILD",
        "RECREATE",
        # 'REBOOT', 'MIGRATE', 'EVACUATE',
    )

    FENCING_OPTION_VALUES = (
        COMPUTE,
        # STORAGE, NETWORK,
    ) = (
        'COMPUTE',
        # 'STORAGE', 'NETWORK'
    )

    properties_schema = {
        DETECTION:
        schema.Map(
            _('Policy aspect for node failure detection.'),
            schema={
                DETECTION_TYPE:
                schema.String(
                    _('Type of node failure detection.'),
                    constraints=[
                        constraints.AllowedValues(consts.DETECTION_TYPES),
                    ],
                    required=True,
                ),
                DETECTION_OPTIONS:
                schema.Map(
                    schema={
                        DETECTION_INTERVAL:
                        schema.Integer(
                            _("Number of seconds between pollings. Only "
                              "required when type is 'NODE_STATUS_POLLING'."),
                            default=60,
                        ),
                    }),
            },
            required=True,
        ),
        RECOVERY:
        schema.Map(
            _('Policy aspect for node failure recovery.'),
            schema={
                RECOVERY_ACTIONS:
                schema.List(
                    _('List of actions to try for node recovery.'),
                    schema=schema.String(
                        _('Action to try for node recovery.'),
                        constraints=[
                            constraints.AllowedValues(RECOVERY_ACTION_VALUES),
                        ]),
                ),
                RECOVERY_FENCING:
                schema.List(
                    _('List of services to be fenced.'),
                    schema=schema.String(
                        _('Service to be fenced.'),
                        constraints=[
                            constraints.AllowedValues(FENCING_OPTION_VALUES),
                        ],
                    ),
                ),
            }),
    }

    def __init__(self, name, spec, **kwargs):
        super(HealthPolicy, self).__init__(name, spec, **kwargs)

        self.check_type = self.properties[self.DETECTION][self.DETECTION_TYPE]
        options = self.properties[self.DETECTION][self.DETECTION_OPTIONS]
        self.interval = options[self.DETECTION_INTERVAL]
        recover_settings = self.properties[self.RECOVERY]
        self.recover_actions = recover_settings[self.RECOVERY_ACTIONS]
        self.fencing_types = recover_settings[self.RECOVERY_FENCING]

    def attach(self, cluster):
        """"Hook for policy attach.

        Register the cluster for health management.

        :param cluster: The target cluster.
        :return: A tuple comprising execution result and policy data.
        """
        kwargs = {
            'check_type': self.check_type,
            'interval': self.interval,
            'params': {},
        }

        health_manager.register(cluster.id, engine_id=None, **kwargs)

        data = {
            'check_type': self.check_type,
            'interval': self.interval,
        }

        return True, self._build_policy_data(data)

    def detach(self, cluster):
        """Hook for policy detach.

        Unregister the cluster for health management.
        :param cluster: The target cluster.
        :returns: A tuple comprising the execution result and reason.
        """
        health_manager.unregister(cluster.id)
        return True, ''

    def pre_op(self, cluster_id, action, **args):
        """Hook before action execution.

        One of the task for this routine is to disable health policy if the
        action is a request that will shrink the cluster. The reason is that
        the policy may attempt to recover nodes that are to be deleted.

        :param cluster_id: The ID of the target cluster.
        :param action: The action to be examined.
        :param kwargs args: Other keyword arguments to be checked.
        :returns: Boolean indicating whether the checking passed.
        """
        if action.action in (consts.CLUSTER_SCALE_IN, consts.CLUSTER_DEL_NODES,
                             consts.NODE_DELETE):
            health_manager.disable(cluster_id)
            return True

        if action.action == consts.CLUSTER_RESIZE:
            deletion = action.data.get('deletion', None)
            if deletion:
                health_manager.disable(cluster_id)
                return True

            db_cluster = co.Cluster.get(action.context, cluster_id)
            current = no.Node.count_by_cluster(action.context, cluster_id)
            res, reason = scaleutils.parse_resize_params(
                action, db_cluster, current)
            if res == base.CHECK_ERROR:
                action.data['status'] = base.CHECK_ERROR
                action.data['reason'] = reason
                return False

            if action.data.get('deletion', None):
                health_manager.disable(cluster_id)
                return True

        pd = {
            'recover_action': self.recover_actions,
            'fencing': self.fencing_types,
        }
        action.data.update({'health': pd})
        action.store(action.context)

        return True

    def post_op(self, cluster_id, action, **args):
        """Hook before action execution.

        One of the task for this routine is to re-enable health policy if the
        action is a request that will shrink the cluster thus the policy has
        been temporarily disabled.

        :param cluster_id: The ID of the target cluster.
        :param action: The action to be examined.
        :param kwargs args: Other keyword arguments to be checked.
        :returns: Boolean indicating whether the checking passed.
        """
        if action.action in (consts.CLUSTER_SCALE_IN, consts.CLUSTER_DEL_NODES,
                             consts.NODE_DELETE):
            health_manager.enable(cluster_id)
            return True

        if action.action == consts.CLUSTER_RESIZE:
            deletion = action.data.get('deletion', None)
            if deletion:
                health_manager.enable(cluster_id)
                return True

            db_cluster = co.Cluster.get(action.context, cluster_id)
            current = no.Node.count_by_cluster(action.context, cluster_id)
            res, reason = scaleutils.parse_resize_params(
                action, db_cluster, current)
            if res == base.CHECK_ERROR:
                action.data['status'] = base.CHECK_ERROR
                action.data['reason'] = reason
                return False

            if action.data.get('deletion', None):
                health_manager.enable(cluster_id)
                return True

        return True
Ejemplo n.º 10
0
class TestSpec(base.SenlinTestCase):
    spec_schema = {
        'key1': schema.String('first key', default='value1'),
        'key2': schema.Integer('second key', required=True),
    }

    def test_init(self):
        data = {'key1': 'value1', 'key2': 2}
        sot = schema.Spec(self.spec_schema, data)

        self.assertEqual(self.spec_schema, sot._schema)
        self.assertEqual(data, sot._data)
        self.assertIsNone(sot._version)

    def test_init_with_version(self):
        data = {'key1': 'value1', 'key2': 2}
        sot = schema.Spec(self.spec_schema, data, version='1.2')

        self.assertEqual(self.spec_schema, sot._schema)
        self.assertEqual(data, sot._data)
        self.assertEqual('1.2', sot._version)

    def test_validate(self):
        data = {'key1': 'value1', 'key2': 2}
        sot = schema.Spec(self.spec_schema, data)
        res = sot.validate()
        self.assertIsNone(res)

        data1 = {'key2': 2}
        sot = schema.Spec(self.spec_schema, data1)
        res = sot.validate()
        self.assertIsNone(res)

    def test_validate_fail_unrecognizable_key(self):
        spec_schema = {
            'key1': schema.String('first key', default='value1'),
        }
        data = {'key1': 'value1', 'key2': 2}
        sot = schema.Spec(spec_schema, data, version='1.0')
        ex = self.assertRaises(exc.ESchema, sot.validate)

        self.assertIn("Unrecognizable spec item 'key2'",
                      six.text_type(ex.message))

    def test_validate_fail_value_type_incorrect(self):
        spec_schema = {
            'key1': schema.String('first key', default='value1'),
            'key2': schema.Integer('second key', required=True),
        }

        data = {'key1': 'value1', 'key2': 'abc'}
        spec = schema.Spec(spec_schema, data, version='1.0')
        ex = self.assertRaises(exc.ESchema, spec.validate)
        self.assertIn("The value 'abc' is not a valid Integer",
                      six.text_type(ex.message))

    def test_validate_version_good(self):
        spec_schema = {
            'type':
            schema.String('Type name', required=True),
            'version':
            schema.String('Version number', required=True),
            'key1':
            schema.String('first key', default='value1'),
            'key2':
            schema.Integer('second key',
                           required=True,
                           min_version='1.0',
                           max_version='1.2'),
        }

        data = {
            'key1': 'value1',
            'key2': 2,
            'type': 'test-type',
            'version': '1.0'
        }
        spec = schema.Spec(spec_schema, data)
        self.assertIsNone(spec.validate())

        data = {'key2': 2, 'type': 'test-type', 'version': '1.2'}
        spec = schema.Spec(spec_schema, data)
        self.assertIsNone(spec.validate())

    def test_validate_version_fail_unsupported_version(self):
        spec_schema = {
            'type': schema.String('Type name', required=True),
            'version': schema.String('Version number', required=True),
            'key1': schema.String('first key',
                                  default='value1',
                                  min_version='1.1'),
            'key2': schema.Integer('second key', required=True),
        }

        data = {
            'key1': 'value1',
            'key2': 2,
            'type': 'test-type',
            'version': '1.0'
        }
        spec = schema.Spec(spec_schema, data, version='1.0')
        ex = self.assertRaises(exc.ESchema, spec.validate)
        msg = 'key1 (min_version=1.1) is not supported by spec version 1.0.'
        self.assertIn(msg, six.text_type(ex.message))

    def test_validate_version_fail_version_over_max(self):
        spec_schema = {
            'type': schema.String('Type name', required=True),
            'version': schema.String('Version number', required=True),
            'key1': schema.String('first key',
                                  default='value1',
                                  max_version='2.0'),
            'key2': schema.Integer('second key', required=True),
        }

        data = {
            'key1': 'value1',
            'key2': 2,
            'type': 'test-type',
            'version': '3.0'
        }
        spec = schema.Spec(spec_schema, data, version='3.0')
        ex = self.assertRaises(exc.ESchema, spec.validate)
        msg = 'key1 (max_version=2.0) is not supported by spec version 3.0.'
        self.assertIn(msg, six.text_type(ex.message))

    def test_resolve_value(self):
        data = {'key2': 2}
        sot = schema.Spec(self.spec_schema, data, version='1.2')

        res = sot.resolve_value('key2')
        self.assertEqual(2, res)

        res = sot.resolve_value('key1')
        self.assertEqual('value1', res)

        ex = self.assertRaises(exc.ESchema, sot.resolve_value, 'key3')
        self.assertEqual("Invalid spec item: key3", six.text_type(ex))

    def test_resolve_value_required_key_missing(self):
        data = {'key1': 'value1'}
        sot = schema.Spec(self.spec_schema, data, version='1.0')

        ex = self.assertRaises(exc.ESchema, sot.resolve_value, 'key2')
        self.assertIn("Required spec item 'key2' not provided",
                      six.text_type(ex.message))

    def test___getitem__(self):
        data = {'key2': 2}
        sot = schema.Spec(self.spec_schema, data, version='1.2')

        res = sot['key1']
        self.assertEqual('value1', res)
        res = sot['key2']
        self.assertEqual(2, res)

    def test___len__(self):
        data = {'key2': 2}
        sot = schema.Spec(self.spec_schema, data, version='1.2')

        res = len(sot)
        self.assertEqual(2, res)

    def test___contains__(self):
        data = {'key2': 2}
        sot = schema.Spec(self.spec_schema, data, version='1.2')

        self.assertIn('key1', sot)
        self.assertIn('key2', sot)
        self.assertNotIn('key3', sot)

    def test__iter__(self):
        data = {'key2': 2}
        sot = schema.Spec(self.spec_schema, data, version='1.2')

        res = [k for k in iter(sot)]

        self.assertIn('key1', res)
        self.assertIn('key2', res)
Ejemplo n.º 11
0
class ZonePlacementPolicy(base.Policy):
    """Policy for placing members of a cluster across availability zones."""

    VERSION = '1.0'
    VERSIONS = {
        '1.0': [
            {
                'status': consts.EXPERIMENTAL,
                'since': '2016.04'
            },
            {
                'status': consts.SUPPORTED,
                'since': '2016.10'
            },
        ]
    }
    PRIORITY = 300

    TARGET = [
        ('BEFORE', consts.CLUSTER_SCALE_OUT),
        ('BEFORE', consts.CLUSTER_SCALE_IN),
        ('BEFORE', consts.CLUSTER_RESIZE),
        ('BEFORE', consts.NODE_CREATE),
    ]

    PROFILE_TYPE = [
        'os.nova.server-1.0',
    ]

    KEYS = (ZONES, ) = ('zones', )

    _AZ_KEYS = (
        ZONE_NAME,
        ZONE_WEIGHT,
    ) = (
        'name',
        'weight',
    )

    properties_schema = {
        ZONES:
        schema.List(
            _('List of availability zones to choose from.'),
            schema=schema.Map(
                _('An availability zone as candidate.'),
                schema={
                    ZONE_NAME:
                    schema.String(_('Name of an availability zone.'), ),
                    ZONE_WEIGHT:
                    schema.Integer(
                        _('Weight of the availability zone (default is 100).'),
                        default=100,
                        required=False,
                    )
                },
            ),
        ),
    }

    def __init__(self, name, spec, **kwargs):
        super(ZonePlacementPolicy, self).__init__(name, spec, **kwargs)

        self.zones = dict((z[self.ZONE_NAME], z[self.ZONE_WEIGHT])
                          for z in self.properties.get(self.ZONES))

    def validate(self, context, validate_props=False):
        super(ZonePlacementPolicy, self).validate(context, validate_props)

        if not validate_props:
            return True

        nc = self.nova(context.user, context.project)
        input_azs = sorted(self.zones.keys())
        valid_azs = nc.validate_azs(input_azs)
        invalid_azs = sorted(set(input_azs) - set(valid_azs))
        if invalid_azs:
            msg = _("The specified %(key)s '%(value)s' could not be "
                    "found.") % {
                        'key': self.ZONE_NAME,
                        'value': list(invalid_azs)
                    }
            raise exc.InvalidSpec(message=msg)

        return True

    def _create_plan(self, current, zones, count, expand):
        """Compute a placement plan based on the weights of AZs.

        :param current: Distribution of existing nodes.
        :returns: A dict that contains a placement plan.
        """
        # sort candidate zones by distribution and covert it into a list
        candidates = sorted(zones.items(),
                            key=operator.itemgetter(1),
                            reverse=expand)

        sum_weight = sum(zones.values())
        if expand:
            total = count + sum(current.values())
        else:
            total = sum(current.values()) - count

        remain = count
        plan = dict.fromkeys(zones.keys(), 0)

        for i in range(len(zones)):
            zone = candidates[i][0]
            weight = candidates[i][1]
            q = total * weight / float(sum_weight)
            if expand:
                quota = int(math.ceil(q))
                headroom = quota - current[zone]
            else:
                quota = int(math.floor(q))
                headroom = current[zone] - quota

            if headroom <= 0:
                continue

            if headroom < remain:
                plan[zone] = headroom
                remain -= headroom
            else:
                plan[zone] = remain if remain > 0 else 0
                remain = 0
                break

        if remain > 0:
            return None

        # filter out zero values
        result = {}
        for z, c in plan.items():
            if c > 0:
                result[z] = c

        return result

    def _get_count(self, cluster_id, action):
        """Get number of nodes to create or delete.

        :param cluster_id: The ID of the target cluster.
        :param action: The action object which triggered this policy check.
        :return: An integer value which can be 1) positive - number of nodes
                 to create; 2) negative - number of nodes to delete; 3) 0 -
                 something wrong happened, and the policy check failed.
        """
        if action.action == consts.NODE_CREATE:
            # skip the policy if availability zone is specified in profile
            profile = action.node.rt['profile']
            if profile.properties[profile.AVAILABILITY_ZONE]:
                return 0
            return 1

        if action.action == consts.CLUSTER_RESIZE:
            if action.data.get('deletion', None):
                return -action.data['deletion']['count']
            elif action.data.get('creation', None):
                return action.data['creation']['count']

            db_cluster = co.Cluster.get(action.context, cluster_id)
            current = no.Node.count_by_cluster(action.context, cluster_id)
            res = scaleutils.parse_resize_params(action, db_cluster, current)
            if res[0] == base.CHECK_ERROR:
                action.data['status'] = base.CHECK_ERROR
                action.data['reason'] = res[1]
                LOG.error(res[1])
                return 0

            if action.data.get('deletion', None):
                return -action.data['deletion']['count']
            else:
                return action.data['creation']['count']

        if action.action == consts.CLUSTER_SCALE_IN:
            pd = action.data.get('deletion', None)
            if pd is None:
                return -action.inputs.get('count', 1)
            else:
                return -pd.get('count', 1)

        # CLUSTER_SCALE_OUT: an action that inflates the cluster
        pd = action.data.get('creation', None)
        if pd is None:
            return action.inputs.get('count', 1)
        else:
            return pd.get('count', 1)

    def pre_op(self, cluster_id, action):
        """Callback function when cluster membership is about to change.

        :param cluster_id: ID of the target cluster.
        :param action: The action that triggers this policy check.
        """
        count = self._get_count(cluster_id, action)
        if count == 0:
            return

        expand = True
        if count < 0:
            expand = False
            count = -count

        cluster = cm.Cluster.load(action.context, cluster_id)

        nc = self.nova(cluster.user, cluster.project)
        zones_good = nc.validate_azs(self.zones.keys())
        if len(zones_good) == 0:
            action.data['status'] = base.CHECK_ERROR
            action.data['reason'] = _('No availability zone found available.')
            LOG.error('No availability zone found available.')
            return

        zones = {}
        for z, w in self.zones.items():
            if z in zones_good:
                zones[z] = w

        current = cluster.get_zone_distribution(action.context, zones.keys())
        result = self._create_plan(current, zones, count, expand)

        if not result:
            action.data['status'] = base.CHECK_ERROR
            action.data['reason'] = _('There is no feasible plan to '
                                      'handle all nodes.')
            LOG.error('There is no feasible plan to handle all nodes.')
            return

        if expand:
            if 'creation' not in action.data:
                action.data['creation'] = {}
            action.data['creation']['count'] = count
            action.data['creation']['zones'] = result
        else:
            if 'deletion' not in action.data:
                action.data['deletion'] = {}
            action.data['deletion']['count'] = count
            action.data['deletion']['zones'] = result
Ejemplo n.º 12
0
    def test_validate(self):
        sot = schema.Map(schema={'foo': schema.String()})

        res = sot.validate({"foo": "bar"})

        self.assertIsNone(res)
Ejemplo n.º 13
0
class DockerProfile(base.Profile):
    """Profile for a docker container."""

    _VALID_HOST_TYPES = [
        HOST_NOVA_SERVER,
        HOST_HEAT_STACK,
    ] = [
        "os.nova.server",
        "os.heat.stack",
    ]

    KEYS = (
        CONTEXT,
        IMAGE,
        NAME,
        COMMAND,
        HOST_NODE,
        HOST_CLUSTER,
        PORT,
    ) = (
        'context',
        'image',
        'name',
        'command',
        'host_node',
        'host_cluster',
        'port',
    )

    properties_schema = {
        CONTEXT:
        schema.Map(_('Customized security context for operating containers.')),
        IMAGE:
        schema.String(
            _('The image used to create a container'),
            required=True,
        ),
        NAME:
        schema.String(_('The name of the container.')),
        COMMAND:
        schema.String(_('The command to run when container is started.')),
        PORT:
        schema.Integer(_('The port number used to connect to docker daemon.'),
                       default=2375),
        HOST_NODE:
        schema.String(_('The node on which container will be launched.')),
        HOST_CLUSTER:
        schema.String(_('The cluster on which container will be launched.')),
    }

    OPERATIONS = {}

    def __init__(self, type_name, name, **kwargs):
        super(DockerProfile, self).__init__(type_name, name, **kwargs)

        self._dockerclient = None
        self.container_id = None
        self.host = None
        self.cluster = None

    @classmethod
    def create(cls, ctx, name, spec, metadata=None):
        profile = super(DockerProfile, cls).create(ctx, name, spec, metadata)

        host_cluster = profile.properties.get(profile.HOST_CLUSTER, None)
        if host_cluster:
            db_api.cluster_add_dependents(ctx, host_cluster, profile.id)

        host_node = profile.properties.get(profile.HOST_NODE, None)
        if host_node:
            db_api.node_add_dependents(ctx, host_node, profile.id, 'profile')

        return profile

    @classmethod
    def delete(cls, ctx, profile_id):
        obj = cls.load(ctx, profile_id=profile_id)
        cluster_id = obj.properties.get(obj.HOST_CLUSTER, None)
        if cluster_id:
            db_api.cluster_remove_dependents(ctx, cluster_id, profile_id)

        node_id = obj.properties.get(obj.HOST_NODE, None)
        if node_id:
            db_api.node_remove_dependents(ctx, node_id, profile_id, 'profile')

        super(DockerProfile, cls).delete(ctx, profile_id)

    def docker(self, obj):
        """Construct docker client based on object.

        :param obj: Object for which the client is created. It is expected to
                    be None when retrieving an existing client. When creating
                    a client, it contains the user and project to be used.
        """
        if self._dockerclient is not None:
            return self._dockerclient

        host_node = self.properties.get(self.HOST_NODE, None)
        host_cluster = self.properties.get(self.HOST_CLUSTER, None)
        ctx = context.get_admin_context()
        self.host = self._get_host(ctx, host_node, host_cluster)

        # TODO(Anyone): Check node.data for per-node host selection
        host_type = self.host.rt['profile'].type_name
        if host_type not in self._VALID_HOST_TYPES:
            msg = _("Type of host node (%s) is not supported") % host_type
            raise exc.InternalError(message=msg)

        host_ip = self._get_host_ip(obj, self.host.physical_id, host_type)
        if host_ip is None:
            msg = _("Unable to determine the IP address of host node")
            raise exc.InternalError(message=msg)

        url = 'tcp://%(ip)s:%(port)d' % {
            'ip': host_ip,
            'port': self.properties[self.PORT]
        }
        self._dockerclient = docker_driver.DockerClient(url)
        return self._dockerclient

    def _get_host(self, ctx, host_node, host_cluster):
        """Determine which node to launch container on.

        :param ctx: An instance of the request context.
        :param host_node: The uuid of the hosting node.
        :param host_cluster: The uuid of the hosting cluster.
        """
        host = None
        if host_node is not None:
            try:
                host = node_mod.Node.load(ctx, node_id=host_node)
            except exc.ResourceNotFound as ex:
                msg = ex.enhance_msg('host', ex)
                raise exc.InternalError(message=msg)
            return host

        if host_cluster is not None:
            host = self._get_random_node(ctx, host_cluster)

        return host

    def _get_random_node(self, ctx, host_cluster):
        """Get a node randomly from the host cluster.

        :param ctx: An instance of the request context.
        :param host_cluster: The uuid of the hosting cluster.
        """

        self.cluster = None
        try:
            self.cluster = cluster.Cluster.load(ctx, cluster_id=host_cluster)
        except exc.ResourceNotFound as ex:
            msg = ex.enhance_msg('host', ex)
            raise exc.InternalError(message=msg)

        nodes = self.cluster.rt['nodes']
        if len(nodes) == 0:
            msg = _("The cluster (%s) contains no nodes") % host_cluster
            raise exc.InternalError(message=msg)
        else:
            good_nodes = []
            for i in range(len(nodes)):
                if nodes[i].status == "ACTIVE":
                    good_nodes.append(nodes[i])
            if len(good_nodes) > 0:
                node = good_nodes[random.randrange(len(good_nodes))]
            else:
                msg = _("There is no active nodes running in the cluster (%s)"
                        ) % host_cluster
                raise exc.InternalError(message=msg)
        return node

    def _get_host_ip(self, obj, host_node, host_type):
        """Fetch the ip address of physical node.

        :param obj: The node object representing the container instance.
        :param host_node: The name or ID of the hosting node object.
        :param host_type: The type of the hosting node, which can be either a
                          nova server or a heat stack.
        :returns: The fixed IP address of the hosting node.
        """
        host_ip = None
        if host_type == self.HOST_NOVA_SERVER:
            server = self.compute(obj).server_get(host_node)
            private_addrs = server.addresses['private']
            for addr in private_addrs:
                if addr['version'] == 4 and addr['OS-EXT-IPS:type'] == 'fixed':
                    host_ip = addr['addr']
        elif host_type == self.HOST_HEAT_STACK:
            stack = self.orchestration(obj).stack_get(host_node)
            outputs = stack.outputs or {}
            if outputs:
                for output in outputs:
                    if output['output_key'] == 'fixed_ip':
                        host_ip = output['output_value']
                        break

            if not outputs or host_ip is None:
                msg = _("Output 'fixed_ip' is missing from the provided stack"
                        " node")
                raise exc.InternalError(message=msg)

        return host_ip

    def do_validate(self, obj):
        """Validate if the spec has provided valid configuration.

        :param obj: The node object.
        """
        cluster = self.properties[self.HOST_CLUSTER]
        node = self.properties[self.HOST_NODE]
        if all([cluster, node]):
            msg = _("Either '%(c)s' or '%(n)s' should be specified, but not "
                    "both.") % {
                        'c': self.HOST_CLUSTER,
                        'n': self.HOST_NODE
                    }
            raise exc.InvalidSpec(message=msg)

        if not any([cluster, node]):
            msg = _("Either '%(c)s' or '%(n)s' should be specified.") % {
                'c': self.HOST_CLUSTER,
                'n': self.HOST_NODE
            }
            raise exc.InvalidSpec(message=msg)

        if cluster:
            try:
                co.Cluster.find(self.context, cluster)
            except (exc.ResourceNotFound, exc.MultipleChoices):
                msg = _("The specified %(key)s '%(val)s' could not be found "
                        "or is not unique.") % {
                            'key': self.HOST_CLUSTER,
                            'val': cluster
                        }
                raise exc.InvalidSpec(message=msg)

        if node:
            try:
                no.Node.find(self.context, node)
            except (exc.ResourceNotFound, exc.MultipleChoices):
                msg = _("The specified %(key)s '%(val)s' could not be found "
                        "or is not unique.") % {
                            'key': self.HOST_NODE,
                            'val': node
                        }
                raise exc.InvalidSpec(message=msg)

    def do_create(self, obj):
        """Create a container instance using the given profile.

        :param obj: The node object for this container.
        :returns: ID of the container instance or ``None`` if driver fails.
        :raises: `EResourceCreation`
        """
        name = self.properties[self.NAME]
        if name is None:
            name = '-'.join([obj.name, utils.random_name()])

        params = {
            'image': self.properties[self.IMAGE],
            'name': self.properties[self.NAME],
            'command': self.properties[self.COMMAND],
        }

        try:
            ctx = context.get_admin_context()
            dockerclient = self.docker(obj)
            db_api.node_add_dependents(ctx, self.host.id, obj.id)
            container = dockerclient.container_create(**params)
        except exc.InternalError as ex:
            raise exc.EResourceCreation(type='container',
                                        message=six.text_type(ex))

        self.container_id = container['Id'][:36]
        return self.container_id

    def do_delete(self, obj):
        """Delete a container node.

        :param obj: The node object representing the container.
        :returns: `None`
        """
        if not obj.physical_id:
            return

        try:
            self.docker(obj).container_delete(obj.physical_id)
        except exc.InternalError as ex:
            raise exc.EResourceDeletion(type='container',
                                        id=obj.physical_id,
                                        message=six.text_type(ex))
        ctx = context.get_admin_context()
        db_api.node_remove_dependents(ctx, self.host.id, obj.id)
        return
Ejemplo n.º 14
0
class HealthPolicy(base.Policy):
    """Policy for health management of a cluster."""

    VERSION = '1.0'
    VERSIONS = {'1.0': [{'status': consts.EXPERIMENTAL, 'since': '2017.02'}]}
    PRIORITY = 600

    TARGET = [
        ('BEFORE', consts.CLUSTER_RECOVER),
        ('BEFORE', consts.CLUSTER_DEL_NODES),
        ('BEFORE', consts.CLUSTER_SCALE_IN),
        ('BEFORE', consts.CLUSTER_RESIZE),
        ('BEFORE', consts.NODE_DELETE),
        ('AFTER', consts.CLUSTER_DEL_NODES),
        ('AFTER', consts.CLUSTER_SCALE_IN),
        ('AFTER', consts.CLUSTER_RESIZE),
        ('AFTER', consts.NODE_DELETE),
    ]

    # Should be ANY if profile provides health check support?
    PROFILE_TYPE = [
        'os.nova.server',
        'os.heat.stack',
    ]

    KEYS = (DETECTION, RECOVERY) = ('detection', 'recovery')

    _DETECTION_KEYS = (
        DETECTION_TYPE,
        DETECTION_OPTIONS,
    ) = ('type', 'options')

    _DETECTION_OPTIONS = (DETECTION_INTERVAL, ) = ('interval', )

    _RECOVERY_KEYS = (RECOVERY_ACTIONS, RECOVERY_FENCING) = ('actions',
                                                             'fencing')

    FENCING_OPTION_VALUES = (
        COMPUTE,
        # STORAGE, NETWORK,
    ) = (
        'COMPUTE',
        # 'STORAGE', 'NETWORK'
    )

    ACTION_KEYS = (
        ACTION_NAME,
        ACTION_PARAMS,
    ) = (
        'name',
        'params',
    )

    properties_schema = {
        DETECTION:
        schema.Map(
            _('Policy aspect for node failure detection.'),
            schema={
                DETECTION_TYPE:
                schema.String(
                    _('Type of node failure detection.'),
                    constraints=[
                        constraints.AllowedValues(consts.DETECTION_TYPES),
                    ],
                    required=True,
                ),
                DETECTION_OPTIONS:
                schema.Map(
                    schema={
                        DETECTION_INTERVAL:
                        schema.Integer(
                            _("Number of seconds between pollings. Only "
                              "required when type is 'NODE_STATUS_POLLING'."),
                            default=60,
                        ),
                    }),
            },
            required=True,
        ),
        RECOVERY:
        schema.Map(
            _('Policy aspect for node failure recovery.'),
            schema={
                RECOVERY_ACTIONS:
                schema.List(_('List of actions to try for node recovery.'),
                            schema=schema.Map(
                                _('Action to try for node recovery.'),
                                schema={
                                    ACTION_NAME:
                                    schema.String(
                                        _("Name of action to execute."),
                                        constraints=[
                                            constraints.AllowedValues(
                                                consts.RECOVERY_ACTIONS),
                                        ],
                                        required=True),
                                    ACTION_PARAMS:
                                    schema.Map(_("Parameters for the action")),
                                })),
                RECOVERY_FENCING:
                schema.List(
                    _('List of services to be fenced.'),
                    schema=schema.String(
                        _('Service to be fenced.'),
                        constraints=[
                            constraints.AllowedValues(FENCING_OPTION_VALUES),
                        ],
                        required=True,
                    ),
                ),
            }),
    }

    def __init__(self, name, spec, **kwargs):
        super(HealthPolicy, self).__init__(name, spec, **kwargs)

        self.check_type = self.properties[self.DETECTION][self.DETECTION_TYPE]
        options = self.properties[self.DETECTION][self.DETECTION_OPTIONS]
        self.interval = options[self.DETECTION_INTERVAL]
        recover_settings = self.properties[self.RECOVERY]
        self.recover_actions = recover_settings[self.RECOVERY_ACTIONS]
        self.fencing_types = recover_settings[self.RECOVERY_FENCING]

    def validate(self, context, validate_props=False):
        super(HealthPolicy, self).validate(context,
                                           validate_props=validate_props)

        if len(self.recover_actions) > 1:
            message = _(
                "Only one '%s' is supported for now.") % self.RECOVERY_ACTIONS
            raise exc.ESchema(message=message)

        # TODO(Qiming): Add detection of duplicated action names when
        # support to list of actions is implemented.

    def attach(self, cluster, enabled=True):
        """"Hook for policy attach.

        Register the cluster for health management.

        :param cluster: The cluster to which the policy is being attached to.
        :param enabled: The attached cluster policy is enabled or disabled.
        :return: A tuple comprising execution result and policy data.
        """
        p_type = cluster.rt['profile'].type_name
        action_names = [a['name'] for a in self.recover_actions]
        if p_type != 'os.nova.server':
            if consts.RECOVER_REBUILD in action_names:
                err_msg = _("Recovery action REBUILD is only applicable to "
                            "os.nova.server clusters.")
                return False, err_msg

            if consts.RECOVER_REBOOT in action_names:
                err_msg = _("Recovery action REBOOT is only applicable to "
                            "os.nova.server clusters.")
                return False, err_msg

        kwargs = {
            'check_type': self.check_type,
            'interval': self.interval,
            'params': {},
            'enabled': enabled
        }

        health_manager.register(cluster.id, engine_id=None, **kwargs)

        data = {
            'check_type': self.check_type,
            'interval': self.interval,
        }

        return True, self._build_policy_data(data)

    def detach(self, cluster):
        """Hook for policy detach.

        Unregister the cluster for health management.
        :param cluster: The target cluster.
        :returns: A tuple comprising the execution result and reason.
        """
        health_manager.unregister(cluster.id)
        return True, ''

    def pre_op(self, cluster_id, action, **args):
        """Hook before action execution.

        One of the task for this routine is to disable health policy if the
        action is a request that will shrink the cluster. The reason is that
        the policy may attempt to recover nodes that are to be deleted.

        :param cluster_id: The ID of the target cluster.
        :param action: The action to be examined.
        :param kwargs args: Other keyword arguments to be checked.
        :returns: Boolean indicating whether the checking passed.
        """
        if action.action in (consts.CLUSTER_SCALE_IN, consts.CLUSTER_DEL_NODES,
                             consts.NODE_DELETE):
            health_manager.disable(cluster_id)
            return True

        if action.action == consts.CLUSTER_RESIZE:
            deletion = action.data.get('deletion', None)
            if deletion:
                health_manager.disable(cluster_id)
                return True

            db_cluster = co.Cluster.get(action.context, cluster_id)
            current = no.Node.count_by_cluster(action.context, cluster_id)
            res, reason = scaleutils.parse_resize_params(
                action, db_cluster, current)
            if res == base.CHECK_ERROR:
                action.data['status'] = base.CHECK_ERROR
                action.data['reason'] = reason
                return False

            if action.data.get('deletion', None):
                health_manager.disable(cluster_id)
                return True

        pd = {
            'recover_action': self.recover_actions,
            'fencing': self.fencing_types,
        }
        action.data.update({'health': pd})
        action.store(action.context)

        return True

    def post_op(self, cluster_id, action, **args):
        """Hook before action execution.

        One of the task for this routine is to re-enable health policy if the
        action is a request that will shrink the cluster thus the policy has
        been temporarily disabled.

        :param cluster_id: The ID of the target cluster.
        :param action: The action to be examined.
        :param kwargs args: Other keyword arguments to be checked.
        :returns: Boolean indicating whether the checking passed.
        """
        if action.action in (consts.CLUSTER_SCALE_IN, consts.CLUSTER_DEL_NODES,
                             consts.NODE_DELETE):
            health_manager.enable(cluster_id)
            return True

        if action.action == consts.CLUSTER_RESIZE:
            deletion = action.data.get('deletion', None)
            if deletion:
                health_manager.enable(cluster_id)
                return True

            db_cluster = co.Cluster.get(action.context, cluster_id)
            current = no.Node.count_by_cluster(action.context, cluster_id)
            res, reason = scaleutils.parse_resize_params(
                action, db_cluster, current)
            if res == base.CHECK_ERROR:
                action.data['status'] = base.CHECK_ERROR
                action.data['reason'] = reason
                return False

            if action.data.get('deletion', None):
                health_manager.enable(cluster_id)
                return True

        return True
Ejemplo n.º 15
0
class DockerProfile(base.Profile):
    """Profile for a docker container."""
    VERSIONS = {'1.0': [{'status': consts.EXPERIMENTAL, 'since': '2017.02'}]}

    _VALID_HOST_TYPES = [
        HOST_NOVA_SERVER,
        HOST_HEAT_STACK,
    ] = [
        "os.nova.server",
        "os.heat.stack",
    ]

    KEYS = (
        CONTEXT,
        IMAGE,
        NAME,
        COMMAND,
        HOST_NODE,
        HOST_CLUSTER,
        PORT,
    ) = (
        'context',
        'image',
        'name',
        'command',
        'host_node',
        'host_cluster',
        'port',
    )

    properties_schema = {
        CONTEXT:
        schema.Map(_('Customized security context for operating containers.')),
        IMAGE:
        schema.String(
            _('The image used to create a container'),
            required=True,
        ),
        NAME:
        schema.String(
            _('The name of the container.'),
            updatable=True,
        ),
        COMMAND:
        schema.String(_('The command to run when container is started.')),
        PORT:
        schema.Integer(_('The port number used to connect to docker daemon.'),
                       default=2375),
        HOST_NODE:
        schema.String(_('The node on which container will be launched.')),
        HOST_CLUSTER:
        schema.String(_('The cluster on which container will be launched.')),
    }

    OP_NAMES = (
        OP_RESTART,
        OP_PAUSE,
        OP_UNPAUSE,
    ) = (
        'restart',
        'pause',
        'unpause',
    )

    _RESTART_WAIT = (RESTART_WAIT) = ('wait_time')

    OPERATIONS = {
        OP_RESTART:
        schema.Operation(
            _("Restart a container."),
            schema={
                RESTART_WAIT:
                schema.IntegerParam(
                    _("Number of seconds to wait before killing the "
                      "container."))
            }),
        OP_PAUSE:
        schema.Operation(_("Pause a container.")),
        OP_UNPAUSE:
        schema.Operation(_("Unpause a container."))
    }

    def __init__(self, type_name, name, **kwargs):
        super(DockerProfile, self).__init__(type_name, name, **kwargs)

        self._dockerclient = None
        self.container_id = None
        self.host = None
        self.cluster = None

    @classmethod
    def create(cls, ctx, name, spec, metadata=None):
        profile = super(DockerProfile, cls).create(ctx, name, spec, metadata)

        host_cluster = profile.properties.get(profile.HOST_CLUSTER, None)
        if host_cluster:
            db_api.cluster_add_dependents(ctx, host_cluster, profile.id)

        host_node = profile.properties.get(profile.HOST_NODE, None)
        if host_node:
            db_api.node_add_dependents(ctx, host_node, profile.id, 'profile')

        return profile

    @classmethod
    def delete(cls, ctx, profile_id):
        obj = cls.load(ctx, profile_id=profile_id)
        cluster_id = obj.properties.get(obj.HOST_CLUSTER, None)
        if cluster_id:
            db_api.cluster_remove_dependents(ctx, cluster_id, profile_id)

        node_id = obj.properties.get(obj.HOST_NODE, None)
        if node_id:
            db_api.node_remove_dependents(ctx, node_id, profile_id, 'profile')

        super(DockerProfile, cls).delete(ctx, profile_id)

    def docker(self, obj):
        """Construct docker client based on object.

        :param obj: Object for which the client is created. It is expected to
                    be None when retrieving an existing client. When creating
                    a client, it contains the user and project to be used.
        """
        if self._dockerclient is not None:
            return self._dockerclient

        host_node = self.properties.get(self.HOST_NODE, None)
        host_cluster = self.properties.get(self.HOST_CLUSTER, None)
        ctx = context.get_admin_context()
        self.host = self._get_host(ctx, host_node, host_cluster)

        # TODO(Anyone): Check node.data for per-node host selection
        host_type = self.host.rt['profile'].type_name
        if host_type not in self._VALID_HOST_TYPES:
            msg = _("Type of host node (%s) is not supported") % host_type
            raise exc.InternalError(message=msg)

        host_ip = self._get_host_ip(obj, self.host.physical_id, host_type)
        if host_ip is None:
            msg = _("Unable to determine the IP address of host node")
            raise exc.InternalError(message=msg)

        url = 'tcp://%(ip)s:%(port)d' % {
            'ip': host_ip,
            'port': self.properties[self.PORT]
        }
        self._dockerclient = docker_driver.DockerClient(url)
        return self._dockerclient

    def _get_host(self, ctx, host_node, host_cluster):
        """Determine which node to launch container on.

        :param ctx: An instance of the request context.
        :param host_node: The uuid of the hosting node.
        :param host_cluster: The uuid of the hosting cluster.
        """
        host = None
        if host_node is not None:
            try:
                host = node_mod.Node.load(ctx, node_id=host_node)
            except exc.ResourceNotFound as ex:
                msg = ex.enhance_msg('host', ex)
                raise exc.InternalError(message=msg)
            return host

        if host_cluster is not None:
            host = self._get_random_node(ctx, host_cluster)

        return host

    def _get_random_node(self, ctx, host_cluster):
        """Get a node randomly from the host cluster.

        :param ctx: An instance of the request context.
        :param host_cluster: The uuid of the hosting cluster.
        """
        self.cluster = None
        try:
            self.cluster = cluster.Cluster.load(ctx, cluster_id=host_cluster)
        except exc.ResourceNotFound as ex:
            msg = ex.enhance_msg('host', ex)
            raise exc.InternalError(message=msg)

        filters = {consts.NODE_STATUS: consts.NS_ACTIVE}
        nodes = no.Node.get_all_by_cluster(ctx,
                                           cluster_id=host_cluster,
                                           filters=filters)
        if len(nodes) == 0:
            msg = _("The cluster (%s) contains no active nodes") % host_cluster
            raise exc.InternalError(message=msg)

        # TODO(anyone): Should pick a node by its load
        db_node = nodes[random.randrange(len(nodes))]
        return node_mod.Node.load(ctx, db_node=db_node)

    def _get_host_ip(self, obj, host_node, host_type):
        """Fetch the ip address of physical node.

        :param obj: The node object representing the container instance.
        :param host_node: The name or ID of the hosting node object.
        :param host_type: The type of the hosting node, which can be either a
                          nova server or a heat stack.
        :returns: The fixed IP address of the hosting node.
        """
        host_ip = None
        if host_type == self.HOST_NOVA_SERVER:
            server = self.compute(obj).server_get(host_node)
            private_addrs = server.addresses['private']
            for addr in private_addrs:
                if addr['version'] == 4 and addr['OS-EXT-IPS:type'] == 'fixed':
                    host_ip = addr['addr']
        elif host_type == self.HOST_HEAT_STACK:
            stack = self.orchestration(obj).stack_get(host_node)
            outputs = stack.outputs or {}
            if outputs:
                for output in outputs:
                    if output['output_key'] == 'fixed_ip':
                        host_ip = output['output_value']
                        break

            if not outputs or host_ip is None:
                msg = _("Output 'fixed_ip' is missing from the provided stack"
                        " node")
                raise exc.InternalError(message=msg)

        return host_ip

    def do_validate(self, obj):
        """Validate if the spec has provided valid configuration.

        :param obj: The node object.
        """
        cluster = self.properties[self.HOST_CLUSTER]
        node = self.properties[self.HOST_NODE]
        if all([cluster, node]):
            msg = _("Either '%(c)s' or '%(n)s' must be specified, but not "
                    "both.") % {
                        'c': self.HOST_CLUSTER,
                        'n': self.HOST_NODE
                    }
            raise exc.InvalidSpec(message=msg)

        if not any([cluster, node]):
            msg = _("Either '%(c)s' or '%(n)s' must be specified.") % {
                'c': self.HOST_CLUSTER,
                'n': self.HOST_NODE
            }
            raise exc.InvalidSpec(message=msg)

        if cluster:
            try:
                co.Cluster.find(self.context, cluster)
            except (exc.ResourceNotFound, exc.MultipleChoices):
                msg = _("The specified %(key)s '%(val)s' could not be found "
                        "or is not unique.") % {
                            'key': self.HOST_CLUSTER,
                            'val': cluster
                        }
                raise exc.InvalidSpec(message=msg)

        if node:
            try:
                no.Node.find(self.context, node)
            except (exc.ResourceNotFound, exc.MultipleChoices):
                msg = _("The specified %(key)s '%(val)s' could not be found "
                        "or is not unique.") % {
                            'key': self.HOST_NODE,
                            'val': node
                        }
                raise exc.InvalidSpec(message=msg)

    def do_create(self, obj):
        """Create a container instance using the given profile.

        :param obj: The node object for this container.
        :returns: ID of the container instance or ``None`` if driver fails.
        :raises: `EResourceCreation`
        """
        name = self.properties[self.NAME]
        if name is None:
            name = '-'.join([obj.name, utils.random_name()])

        params = {
            'image': self.properties[self.IMAGE],
            'name': name,
            'command': self.properties[self.COMMAND],
        }

        try:
            ctx = context.get_service_context(project=obj.project,
                                              user=obj.user)
            dockerclient = self.docker(obj)
            db_api.node_add_dependents(ctx, self.host.id, obj.id)
            container = dockerclient.container_create(**params)
            dockerclient.start(container['Id'])
        except exc.InternalError as ex:
            raise exc.EResourceCreation(type='container',
                                        message=six.text_type(ex))

        self.container_id = container['Id'][:36]
        return self.container_id

    def do_delete(self, obj):
        """Delete a container node.

        :param obj: The node object representing the container.
        :returns: `None`
        """
        if not obj.physical_id:
            return

        try:
            self.handle_stop(obj)
            self.docker(obj).container_delete(obj.physical_id)
        except exc.InternalError as ex:
            raise exc.EResourceDeletion(type='container',
                                        id=obj.physical_id,
                                        message=six.text_type(ex))
        ctx = context.get_admin_context()
        db_api.node_remove_dependents(ctx, self.host.id, obj.id)
        return

    def do_update(self, obj, new_profile=None, **params):
        """Perform update on the container.

        :param obj: the container to operate on
        :param new_profile: the new profile for the container.
        :param params: a dictionary of optional parameters.
        :returns: True if update was successful or False otherwise.
        :raises: `EResourceUpdate` if operation fails.
        """
        self.server_id = obj.physical_id
        if not self.server_id:
            return False

        if not new_profile:
            return False

        if not self.validate_for_update(new_profile):
            return False

        name_changed, new_name = self._check_container_name(obj, new_profile)
        if name_changed:
            self._update_name(obj, new_name)

        return True

    def _check_container_name(self, obj, profile):
        """Check if there is a new name to be assigned to the container.

        :param obj: The node object to operate on.
        :param new_profile: The new profile which may contain a name for
                            the container.
        :return: A tuple consisting a boolean indicating whether the name
                 needs change and the container name determined.
        """
        old_name = self.properties[self.NAME] or obj.name
        new_name = profile.properties[self.NAME] or obj.name
        if old_name == new_name:
            return False, new_name
        return True, new_name

    def _update_name(self, obj, new_name):
        try:
            self.docker(obj).rename(obj.physical_id, new_name)
        except exc.InternalError as ex:
            raise exc.EResourceUpdate(type='container',
                                      id=obj.physical_id,
                                      message=six.text_type(ex))

    def handle_reboot(self, obj, **options):
        """Handler for a reboot operation.

        :param obj: The node object representing the container.
        :returns: None
        """
        if not obj.physical_id:
            return

        if 'timeout' in options:
            params = {'timeout': options['timeout']}
        else:
            params = {}
        try:
            self.docker(obj).restart(obj.physical_id, **params)
        except exc.InternalError as ex:
            raise exc.EResourceOperation(type='container',
                                         id=obj.physical_id[:8],
                                         op='rebooting',
                                         message=six.text_type(ex))
        return

    def handle_pause(self, obj):
        """Handler for a pause operation.

        :param obj: The node object representing the container.
        :returns: None
        """
        if not obj.physical_id:
            return

        try:
            self.docker(obj).pause(obj.physical_id)
        except exc.InternalError as ex:
            raise exc.EResourceOperation(type='container',
                                         id=obj.physical_id[:8],
                                         op='pausing',
                                         message=six.text_type(ex))
        return

    def handle_unpause(self, obj):
        """Handler for an unpause operation.

        :param obj: The node object representing the container.
        :returns: None
        """
        if not obj.physical_id:
            return

        try:
            self.docker(obj).unpause(obj.physical_id)
        except exc.InternalError as ex:
            raise exc.EResourceOperation(type='container',
                                         id=obj.physical_id[:8],
                                         op='unpausing',
                                         message=six.text_type(ex))
        return

    def handle_stop(self, obj, **options):
        """Handler for the stop operation."""
        if not obj.physical_id:
            return
        timeout = options.get('timeout', None)
        if timeout:
            timeout = int(timeout)
        try:
            self.docker(obj).stop(obj.physical_id, timeout=timeout)
        except exc.InternalError as ex:
            raise exc.EResourceOperation(type='container',
                                         id=obj.physical_id[:8],
                                         op='stop',
                                         message=six.text_type(ex))
Ejemplo n.º 16
0
class Alarm(base.Trigger):

    # time constraints
    alarm_schema = {
        REPEAT:
        schema.Boolean(
            _('Whether the actions should be re-triggered on each evaluation '
              'cycle. Default to False.'),
            default=False,
        ),
        TIME_CONSTRAINTS:
        schema.List(schema=schema.Map(
            _('A map of time constraint settings.'),
            schema={
                NAME:
                schema.String(_('Name of the time constraint.'), ),
                TC_DESCRIPTION:
                schema.String(_('A description of the time constraint.'), ),
                TC_START:
                schema.String(
                    _('Start point of the time constraint, expressed as a '
                      'string in cron expression format.'),
                    required=True,
                ),
                TC_DURATION:
                schema.Integer(
                    _('How long the constraint should last, in seconds.'),
                    required=True,
                ),
                TC_TIMEZONE:
                schema.String(
                    _('Time zone of the constraint.'),
                    default='',
                ),
            },
        ), )
    }

    def __init__(self, name, spec, **kwargs):
        super(Alarm, self).__init__(name, spec, **kwargs)

        self.alarm_properties = schema.Spec(self.alarm_schema, spec)
        self.namespace = 'default'
        self.rule = None

    def validate(self):
        # validate cron expression if specified
        if TIME_CONSTRAINTS in self.spec:
            tcs = self.alarm_properties[TIME_CONSTRAINTS]
            for tc in tcs:
                exp = tc.get(TC_START, '')
                try:
                    croniter.croniter(exp)
                except Exception as ex:
                    msg = _("Invalid cron expression specified for property "
                            "'%(property)s' (%(exp)s): %(ex)s") % {
                                'property': TC_START,
                                'exp': exp,
                                'ex': six.text_type(ex)
                            }
                    raise exc.InvalidSpec(message=msg)

                tz = tc.get(TC_TIMEZONE, '')
                try:
                    pytz.timezone(tz)
                except Exception as ex:
                    msg = _("Invalid timezone value specified for property "
                            "'%(property)s' (%(tz)s): %(ex)s") % {
                                'property': TC_TIMEZONE,
                                'tz': tz,
                                'ex': six.text_type(ex)
                            }
                    raise exc.InvalidSpec(message=msg)

    def create(self, ctx, **kwargs):
        """Create an alarm for a cluster.

        :param name: The name for the alarm.
        :param urls: A list of URLs for webhooks to be triggered.
        :returns: A dict containing properties of the alarm.
        """
        self.ok_actions = kwargs.get(OK_ACTIONS, [])
        self.alarm_actions = kwargs.get(ALARM_ACTIONS, [])
        self.insufficient_data_actions = kwargs.get(INSUFFICIENT_DATA_ACTIONS,
                                                    [])

        rule_name = self.namespace + '_rule'
        rule_data = dict((k, v) for k, v in self.rule.items())
        params = {
            NAME: self.name,
            DESCRIPTION: self.desc,
            TYPE: self.namespace,
            STATE: self.state,
            SEVERITY: self.severity,
            ENABLED: self.enabled,
            OK_ACTIONS: self.ok_actions,
            ALARM_ACTIONS: self.alarm_actions,
            INSUFFICIENT_DATA_ACTIONS: self.insufficient_data_actions,
            TIME_CONSTRAINTS: self.alarm_properties[TIME_CONSTRAINTS],
            REPEAT: self.alarm_properties[REPEAT],
            rule_name: rule_data,
        }

        try:
            cc = driver_base.SenlinDriver().telemetry(ctx.to_dict())
            alarm = cc.alarm_create(**params)
            self.physical_id = alarm.id
            self.store(ctx)
            return True, alarm.to_dict()
        except exc.SenlinException as ex:
            return False, six.text_type(ex)

    def delete(self, ctx, identifier):
        """Delete an alarm.

        :param identifier: This must be an alarm ID.
        """
        try:
            cc = driver_base.SenlinDriver().telemetry(ctx)
            res = cc.alarm_delete(identifier, True)
            return True, res
        except exc.InternalError as ex:
            return False, six.text_type(ex)

    def update(self, identifier, values):
        return NotImplemented
Ejemplo n.º 17
0
class Profile(object):
    """Base class for profiles."""

    VERSIONS = {}

    KEYS = (
        TYPE,
        VERSION,
        PROPERTIES,
    ) = (
        'type',
        'version',
        'properties',
    )

    spec_schema = {
        TYPE:
        schema.String(
            _('Name of the profile type.'),
            required=True,
        ),
        VERSION:
        schema.String(
            _('Version number of the profile type.'),
            required=True,
        ),
        PROPERTIES:
        schema.Map(
            _('Properties for the profile.'),
            required=True,
        )
    }

    properties_schema = {}
    OPERATIONS = {}

    def __new__(cls, name, spec, **kwargs):
        """Create a new profile of the appropriate class.

        :param name: The name for the profile.
        :param spec: A dictionary containing the spec for the profile.
        :param kwargs: Keyword arguments for profile creation.
        :returns: An instance of a specific sub-class of Profile.
        """
        type_name, version = schema.get_spec_version(spec)
        type_str = "-".join([type_name, version])

        if cls != Profile:
            ProfileClass = cls
        else:
            ProfileClass = environment.global_env().get_profile(type_str)

        return super(Profile, cls).__new__(ProfileClass)

    def __init__(self, name, spec, **kwargs):
        """Initialize a profile instance.

        :param name: A string that specifies the name for the profile.
        :param spec: A dictionary containing the detailed profile spec.
        :param kwargs: Keyword arguments for initializing the profile.
        :returns: An instance of a specific sub-class of Profile.
        """

        type_name, version = schema.get_spec_version(spec)
        self.type_name = type_name
        self.version = version
        type_str = "-".join([type_name, version])

        self.name = name
        self.spec = spec

        self.id = kwargs.get('id', None)
        self.type = kwargs.get('type', type_str)

        self.user = kwargs.get('user')
        self.project = kwargs.get('project')
        self.domain = kwargs.get('domain')

        self.metadata = kwargs.get('metadata', {})

        self.created_at = kwargs.get('created_at', None)
        self.updated_at = kwargs.get('updated_at', None)

        self.spec_data = schema.Spec(self.spec_schema, self.spec)
        self.properties = schema.Spec(self.properties_schema,
                                      self.spec.get(self.PROPERTIES, {}),
                                      version)

        if not self.id:
            # new object needs a context dict
            self.context = self._init_context()
        else:
            self.context = kwargs.get('context')

        # initialize clients
        self._computeclient = None
        self._networkclient = None
        self._orchestrationclient = None

    @classmethod
    def _from_object(cls, profile):
        '''Construct a profile from profile object.

        :param profile: a profile object that contains all required fields.
        '''
        kwargs = {
            'id': profile.id,
            'type': profile.type,
            'context': profile.context,
            'user': profile.user,
            'project': profile.project,
            'domain': profile.domain,
            'metadata': profile.metadata,
            'created_at': profile.created_at,
            'updated_at': profile.updated_at,
        }

        return cls(profile.name, profile.spec, **kwargs)

    @classmethod
    def load(cls, ctx, profile=None, profile_id=None, project_safe=True):
        '''Retrieve a profile object from database.'''
        if profile is None:
            profile = po.Profile.get(ctx,
                                     profile_id,
                                     project_safe=project_safe)
            if profile is None:
                raise exc.ResourceNotFound(type='profile', id=profile_id)

        return cls._from_object(profile)

    @classmethod
    def create(cls, ctx, name, spec, metadata=None):
        """Create a profile object and validate it.

        :param ctx: The requesting context.
        :param name: The name for the profile object.
        :param spec: A dict containing the detailed spec.
        :param metadata: An optional dictionary specifying key-value pairs to
                         be associated with the profile.
        :returns: An instance of Profile.
        """
        if metadata is None:
            metadata = {}

        profile = None
        try:
            profile = cls(name,
                          spec,
                          metadata=metadata,
                          user=ctx.user,
                          project=ctx.project)
            profile.validate(True)
        except (exc.ResourceNotFound, exc.ESchema) as ex:
            error = _("Failed in creating profile %(name)s: %(error)s") % {
                "name": name,
                "error": six.text_type(ex)
            }
            raise exc.InvalidSpec(message=error)

        profile.store(ctx)

        return profile

    @classmethod
    def delete(cls, ctx, profile_id):
        po.Profile.delete(ctx, profile_id)

    def store(self, ctx):
        '''Store the profile into database and return its ID.'''
        timestamp = timeutils.utcnow(True)

        values = {
            'name': self.name,
            'type': self.type,
            'context': self.context,
            'spec': self.spec,
            'user': self.user,
            'project': self.project,
            'domain': self.domain,
            'meta_data': self.metadata,
        }

        if self.id:
            self.updated_at = timestamp
            values['updated_at'] = timestamp
            po.Profile.update(ctx, self.id, values)
        else:
            self.created_at = timestamp
            values['created_at'] = timestamp
            profile = po.Profile.create(ctx, values)
            self.id = profile.id

        return self.id

    @classmethod
    @profiler.trace('Profile.create_object', hide_args=False)
    def create_object(cls, ctx, obj):
        profile = cls.load(ctx, profile_id=obj.profile_id)
        return profile.do_create(obj)

    @classmethod
    @profiler.trace('Profile.delete_object', hide_args=False)
    def delete_object(cls, ctx, obj, **params):
        profile = cls.load(ctx, profile_id=obj.profile_id)
        return profile.do_delete(obj, **params)

    @classmethod
    @profiler.trace('Profile.update_object', hide_args=False)
    def update_object(cls, ctx, obj, new_profile_id=None, **params):
        profile = cls.load(ctx, profile_id=obj.profile_id)
        new_profile = None
        if new_profile_id:
            new_profile = cls.load(ctx, profile_id=new_profile_id)
        return profile.do_update(obj, new_profile, **params)

    @classmethod
    @profiler.trace('Profile.get_details', hide_args=False)
    def get_details(cls, ctx, obj):
        profile = cls.load(ctx, profile_id=obj.profile_id)
        return profile.do_get_details(obj)

    @classmethod
    @profiler.trace('Profile.adopt_node', hide_args=False)
    def adopt_node(cls, ctx, obj, type_name, overrides=None, snapshot=False):
        """Adopt a node.

        :param ctx: Request context.
        :param obj: A temporary node object.
        :param overrides: An optional parameter that specifies the set of
            properties to be overridden.
        :param snapshot: A boolean flag indicating whether a snapshot should
            be created before adopting the node.
        :returns: A dictionary containing the profile spec created from the
            specific node, or a dictionary containing error message.
        """
        parts = type_name.split("-")
        tmpspec = {"type": parts[0], "version": parts[1]}
        profile = cls("name", tmpspec)
        return profile.do_adopt(obj, overrides=overrides, snapshot=snapshot)

    @classmethod
    @profiler.trace('Profile.join_cluster', hide_args=False)
    def join_cluster(cls, ctx, obj, cluster_id):
        profile = cls.load(ctx, profile_id=obj.profile_id)
        return profile.do_join(obj, cluster_id)

    @classmethod
    @profiler.trace('Profile.leave_cluster', hide_args=False)
    def leave_cluster(cls, ctx, obj):
        profile = cls.load(ctx, profile_id=obj.profile_id)
        return profile.do_leave(obj)

    @classmethod
    @profiler.trace('Profile.check_object', hide_args=False)
    def check_object(cls, ctx, obj):
        profile = cls.load(ctx, profile_id=obj.profile_id)
        try:
            return profile.do_check(obj)
        except exc.InternalError as ex:
            LOG.error(ex)
            return False

    @classmethod
    @profiler.trace('Profile.recover_object', hide_args=False)
    def recover_object(cls, ctx, obj, **options):
        profile = cls.load(ctx, profile_id=obj.profile_id)
        return profile.do_recover(obj, **options)

    def validate(self, validate_props=False):
        """Validate the schema and the data provided."""
        # general validation
        self.spec_data.validate()
        self.properties.validate()

        ctx_dict = self.properties.get('context', {})
        if ctx_dict:
            argspec = inspect.getargspec(context.RequestContext.__init__)
            valid_keys = argspec.args
            bad_keys = [k for k in ctx_dict if k not in valid_keys]
            if bad_keys:
                msg = _("Some keys in 'context' are invalid: %s") % bad_keys
                raise exc.ESchema(message=msg)

        if validate_props:
            self.do_validate(obj=self)

    @classmethod
    def get_schema(cls):
        return dict((name, dict(schema))
                    for name, schema in cls.properties_schema.items())

    @classmethod
    def get_ops(cls):
        return dict(
            (name, dict(schema)) for name, schema in cls.OPERATIONS.items())

    def _init_context(self):
        profile_context = {}
        if self.CONTEXT in self.properties:
            profile_context = self.properties[self.CONTEXT] or {}

        ctx_dict = context.get_service_credentials(**profile_context)

        ctx_dict.pop('project_name', None)
        ctx_dict.pop('project_domain_name', None)

        return ctx_dict

    def _build_conn_params(self, user, project):
        """Build connection params for specific user and project.

        :param user: The ID of the user for which a trust will be used.
        :param project: The ID of the project for which a trust will be used.
        :returns: A dict containing the required parameters for connection
                  creation.
        """
        cred = co.Credential.get(oslo_context.get_current(), user, project)
        if cred is None:
            raise exc.TrustNotFound(trustor=user)

        trust_id = cred.cred['openstack']['trust']

        # This is supposed to be trust-based authentication
        params = copy.deepcopy(self.context)
        params['trust_id'] = trust_id

        return params

    def compute(self, obj):
        '''Construct compute client based on object.

        :param obj: Object for which the client is created. It is expected to
                    be None when retrieving an existing client. When creating
                    a client, it contains the user and project to be used.
        '''

        if self._computeclient is not None:
            return self._computeclient
        params = self._build_conn_params(obj.user, obj.project)
        self._computeclient = driver_base.SenlinDriver().compute(params)
        return self._computeclient

    def network(self, obj):
        """Construct network client based on object.

        :param obj: Object for which the client is created. It is expected to
                    be None when retrieving an existing client. When creating
                    a client, it contains the user and project to be used.
        """
        if self._networkclient is not None:
            return self._networkclient
        params = self._build_conn_params(obj.user, obj.project)
        self._networkclient = driver_base.SenlinDriver().network(params)
        return self._networkclient

    def orchestration(self, obj):
        """Construct orchestration client based on object.

        :param obj: Object for which the client is created. It is expected to
                    be None when retrieving an existing client. When creating
                    a client, it contains the user and project to be used.
        """
        if self._orchestrationclient is not None:
            return self._orchestrationclient
        params = self._build_conn_params(obj.user, obj.project)
        oc = driver_base.SenlinDriver().orchestration(params)
        self._orchestrationclient = oc
        return oc

    def do_create(self, obj):
        """For subclass to override."""
        raise NotImplementedError

    def do_delete(self, obj, **params):
        """For subclass to override."""
        raise NotImplementedError

    def do_update(self, obj, new_profile, **params):
        """For subclass to override."""
        LOG.warning("Update operation not supported.")
        return True

    def do_check(self, obj):
        """For subclass to override."""
        LOG.warning("Check operation not supported.")
        return True

    def do_get_details(self, obj):
        """For subclass to override."""
        LOG.warning("Get_details operation not supported.")
        return {}

    def do_adopt(self, obj, overrides=None, snapshot=False):
        """For subclass to overrid."""
        LOG.warning("Adopt operation not supported.")
        return {}

    def do_join(self, obj, cluster_id):
        """For subclass to override to perform extra operations."""
        LOG.warning("Join operation not specialized.")
        return True

    def do_leave(self, obj):
        """For subclass to override to perform extra operations."""
        LOG.warning("Leave operation not specialized.")
        return True

    def do_recover(self, obj, **options):
        """Default recover operation.

        This is provided as a fallback if a specific profile type does not
        override this method.

        :param obj: The node object to operate on.
        :param options: Keyword arguments for the recover operation.
        """
        operation = options.pop('operation', None)

        # The operation is a list of action names with optional parameters
        if operation and not isinstance(operation, six.string_types):
            operation = operation[0]

        if operation and operation['name'] != consts.RECOVER_RECREATE:
            LOG.error("Recover operation not supported: %s", operation)
            return False

        extra_params = options.get('params', {})
        fence_compute = extra_params.get('fence_compute', False)
        try:
            self.do_delete(obj, force=fence_compute)
        except exc.EResourceDeletion as ex:
            raise exc.EResourceOperation(op='recovering',
                                         type='node',
                                         id=obj.id,
                                         message=six.text_type(ex))
        res = None
        try:
            res = self.do_create(obj)
        except exc.EResourceCreation as ex:
            raise exc.EResourceOperation(op='recovering',
                                         type='node',
                                         id=obj.id,
                                         message=six.text_type(ex))
        return res

    def do_validate(self, obj):
        """For subclass to override."""
        LOG.warning("Validate operation not supported.")
        return True

    def to_dict(self):
        pb_dict = {
            'id': self.id,
            'name': self.name,
            'type': self.type,
            'user': self.user,
            'project': self.project,
            'domain': self.domain,
            'spec': self.spec,
            'metadata': self.metadata,
            'created_at': utils.isotime(self.created_at),
            'updated_at': utils.isotime(self.updated_at),
        }
        return pb_dict

    def validate_for_update(self, new_profile):
        non_updatables = []
        for (k, v) in new_profile.properties.items():
            if self.properties.get(k, None) != v:
                if not self.properties_schema[k].updatable:
                    non_updatables.append(k)

        if not non_updatables:
            return True

        msg = ", ".join(non_updatables)
        LOG.error("The following properties are not updatable: %s.", msg)
        return False
Ejemplo n.º 18
0
class ThresholdAlarm(Alarm):

    rule_schema = {
        METER_NAME:
        schema.String(
            _('Name of a meter to evaluate against.'),
            required=True,
        ),
        OPERATOR:
        schema.String(
            _('Comparison operator for evaluation.'),
            constraints=[
                constraints.AllowedValues(OPERATOR_VALUES),
            ],
            default=OP_EQUAL,
        ),
        THRESHOLD:
        schema.Number(_('Threshold for evaluation.'), required=True),
        PERIOD:
        schema.Integer(
            _('Length of every evaluation period in seconds.'),
            default=60,
        ),
        EVALUATIONS:
        schema.Integer(
            _('Number of periods to evaluate over.'),
            default=1,
        ),
        STATISTIC:
        schema.String(
            _('Statistics to evaluate. Must be one of %s, default to "avg".') %
            list(STATISTIC_VALUES),
            constraints=[
                constraints.AllowedValues(STATISTIC_VALUES),
            ],
            default=SV_AVG,
        ),
        QUERY:
        schema.List(
            _('The query to find the dat afor computing statistics.'),
            schema=schema.Map(
                schema={
                    Q_FIELD:
                    schema.String(
                        _('A field of a meter to query.'),
                        required=True,
                    ),
                    Q_OP:
                    schema.String(
                        _('An operator for meter comparison.'),
                        default='==',
                    ),
                    Q_VALUE:
                    schema.String(
                        _('A value for comparison.'),
                        required=True,
                    )
                }),
        )
    }

    def __init__(self, name, spec, **kwargs):
        super(ThresholdAlarm, self).__init__(name, spec, **kwargs)
        rule_spec = spec.get('rule', {})
        self.rule = schema.Spec(self.rule_schema, rule_spec)
        self.namespace = 'threshold'
Ejemplo n.º 19
0
class ScalingPolicy(base.Policy):
    """Policy for changing the size of a cluster.

    This policy is expected to be enforced before the node count of a cluster
    is changed.
    """

    VERSION = '1.0'

    PRIORITY = 100

    TARGET = [
        ('BEFORE', consts.CLUSTER_SCALE_IN),
        ('BEFORE', consts.CLUSTER_SCALE_OUT),
    ]

    PROFILE_TYPE = [
        'ANY',
    ]

    KEYS = (
        EVENT,
        ADJUSTMENT,
    ) = (
        'event',
        'adjustment',
    )

    _SUPPORTED_EVENTS = (
        CLUSTER_SCALE_IN,
        CLUSTER_SCALE_OUT,
    ) = (
        consts.CLUSTER_SCALE_IN,
        consts.CLUSTER_SCALE_OUT,
    )

    _ADJUSTMENT_KEYS = (
        ADJUSTMENT_TYPE,
        ADJUSTMENT_NUMBER,
        MIN_STEP,
        BEST_EFFORT,
        COOLDOWN,
    ) = (
        'type',
        'number',
        'min_step',
        'best_effort',
        'cooldown',
    )

    properties_schema = {
        EVENT:
        schema.String(
            _('Event that will trigger this policy. Must be one of '
              'CLUSTER_SCALE_IN and CLUSTER_SCALE_OUT.'),
            constraints=[
                constraints.AllowedValues(_SUPPORTED_EVENTS),
            ],
            required=True,
        ),
        ADJUSTMENT:
        schema.Map(
            _('Detailed specification for scaling adjustments.'),
            schema={
                ADJUSTMENT_TYPE:
                schema.String(
                    _('Type of adjustment when scaling is triggered.'),
                    constraints=[
                        constraints.AllowedValues(consts.ADJUSTMENT_TYPES),
                    ],
                    default=consts.CHANGE_IN_CAPACITY,
                ),
                ADJUSTMENT_NUMBER:
                schema.Number(
                    _('A number specifying the amount of adjustment.'),
                    default=1,
                ),
                MIN_STEP:
                schema.Integer(
                    _('When adjustment type is set to "CHANGE_IN_PERCENTAGE",'
                      ' this specifies the cluster size will be decreased by '
                      'at least this number of nodes.'),
                    default=1,
                ),
                BEST_EFFORT:
                schema.Boolean(
                    _('Whether do best effort scaling when new size of '
                      'cluster will break the size limitation'),
                    default=False,
                ),
                COOLDOWN:
                schema.Integer(
                    _('Number of seconds to hold the cluster for cool-down '
                      'before allowing cluster to be resized again.'),
                    default=0,
                ),
            }),
    }

    def __init__(self, name, spec, **kwargs):
        """Intialize a scaling policy object.

        :param name: Name for the policy object.
        :param spec: A dictionary containing the detailed specification for
                     the policy.
        :param \*\*kwargs: Other optional parameters for policy object
                           creation.
        :return: An object of `ScalingPolicy`.
        """
        super(ScalingPolicy, self).__init__(name, spec, **kwargs)

        self.singleton = False

        self.event = self.properties[self.EVENT]

        adjustment = self.properties[self.ADJUSTMENT]
        self.adjustment_type = adjustment[self.ADJUSTMENT_TYPE]
        self.adjustment_number = adjustment[self.ADJUSTMENT_NUMBER]
        self.adjustment_min_step = adjustment[self.MIN_STEP]

        self.best_effort = adjustment[self.BEST_EFFORT]
        self.cooldown = adjustment[self.COOLDOWN]

    def _calculate_adjustment_count(self, current_size):
        """Calculate adjustment count based on current_size.

        :param current_size: The current size of the target cluster.
        :return: The number of nodes to add or to remove.
        """

        if self.adjustment_type == consts.EXACT_CAPACITY:
            if self.event == consts.CLUSTER_SCALE_IN:
                count = current_size - self.adjustment_number
            else:
                count = self.adjustment_number - current_size
        elif self.adjustment_type == consts.CHANGE_IN_CAPACITY:
            count = self.adjustment_number
        else:  # consts.CHANGE_IN_PERCENTAGE:
            count = int((self.adjustment_number * current_size) / 100.0)
            if count < self.adjustment_min_step:
                count = self.adjustment_min_step

        return count

    def pre_op(self, cluster_id, action):
        """The hook function that is executed before the action.

        The checking result is stored in the ``data`` property of the action
        object rather than returned directly from the function.

        :param cluster_id: The ID of the target cluster.
        :param action: Action instance against which the policy is being
                       checked.
        :return: None.
        """

        # Use action input if count is provided
        count = action.inputs.get('count', None)
        current = no.Node.count_by_cluster(action.context, cluster_id)
        if count is None:
            # count not specified, calculate it
            count = self._calculate_adjustment_count(current)

        # Count must be positive value
        try:
            count = utils.parse_int_param('count', count, allow_zero=False)
        except exception.InvalidParameter:
            action.data.update({
                'status': base.CHECK_ERROR,
                'reason': _("Invalid count (%(c)s) for action '%(a)s'.") % {
                    'c': count,
                    'a': action.action
                }
            })
            action.store(action.context)
            return

        # Check size constraints
        cluster = db_api.cluster_get(action.context, cluster_id)
        if action.action == consts.CLUSTER_SCALE_IN:
            if self.best_effort:
                count = min(count, current - cluster.min_size)
            result = su.check_size_params(cluster,
                                          current - count,
                                          strict=not self.best_effort)
        else:
            if self.best_effort:
                count = min(count, cluster.max_size - current)
            result = su.check_size_params(cluster,
                                          current + count,
                                          strict=not self.best_effort)

        if result:
            # failed validation
            pd = {'status': base.CHECK_ERROR, 'reason': result}
        else:
            # passed validation
            pd = {
                'status': base.CHECK_OK,
                'reason': _('Scaling request validated.'),
            }
            if action.action == consts.CLUSTER_SCALE_IN:
                pd['deletion'] = {'count': count}
            else:
                pd['creation'] = {'count': count}

        action.data.update(pd)
        action.store(action.context)

        return

    def need_check(self, target, action):
        res = super(ScalingPolicy, self).need_check(target, action)
        if res:
            # Check if the action is expected by the policy
            res = (self.event == action.action)

        return res
Ejemplo n.º 20
0
class HealthPolicy(base.Policy):
    """Policy for health management of a cluster."""

    VERSION = '1.0'
    VERSIONS = {
        '1.0': [
            {
                'status': consts.EXPERIMENTAL,
                'since': '2017.02'
            },
            {
                'status': consts.SUPPORTED,
                'since': '2018.06'
            },
        ]
    }
    PRIORITY = 600

    TARGET = [
        ('BEFORE', consts.CLUSTER_RECOVER),
        ('BEFORE', consts.CLUSTER_DEL_NODES),
        ('BEFORE', consts.CLUSTER_SCALE_IN),
        ('BEFORE', consts.CLUSTER_RESIZE),
        ('BEFORE', consts.NODE_DELETE),
        ('AFTER', consts.CLUSTER_DEL_NODES),
        ('AFTER', consts.CLUSTER_SCALE_IN),
        ('AFTER', consts.CLUSTER_RESIZE),
        ('AFTER', consts.NODE_DELETE),
    ]

    # Should be ANY if profile provides health check support?
    PROFILE_TYPE = [
        'os.nova.server',
        'os.heat.stack',
    ]

    KEYS = (DETECTION, RECOVERY) = ('detection', 'recovery')

    _DETECTION_KEYS = (
        DETECTION_TYPE,
        DETECTION_OPTIONS,
    ) = ('type', 'options')

    _DETECTION_OPTIONS = (
        DETECTION_INTERVAL,
        POLL_URL,
        POLL_URL_SSL_VERIFY,
        POLL_URL_HEALTHY_RESPONSE,
        POLL_URL_RETRY_LIMIT,
        POLL_URL_RETRY_INTERVAL,
        NODE_UPDATE_TIMEOUT,
    ) = (
        'interval',
        'poll_url',
        'poll_url_ssl_verify',
        'poll_url_healthy_response',
        'poll_url_retry_limit',
        'poll_url_retry_interval',
        'node_update_timeout',
    )

    _RECOVERY_KEYS = (
        RECOVERY_ACTIONS,
        RECOVERY_FENCING,
        RECOVERY_DELETE_TIMEOUT,
        RECOVERY_FORCE_RECREATE,
    ) = (
        'actions',
        'fencing',
        'node_delete_timeout',
        'node_force_recreate',
    )

    FENCING_OPTION_VALUES = (
        COMPUTE,
        # STORAGE, NETWORK,
    ) = (
        'COMPUTE',
        # 'STORAGE', 'NETWORK'
    )

    ACTION_KEYS = (
        ACTION_NAME,
        ACTION_PARAMS,
    ) = (
        'name',
        'params',
    )

    properties_schema = {
        DETECTION:
        schema.Map(
            _('Policy aspect for node failure detection.'),
            schema={
                DETECTION_TYPE:
                schema.String(
                    _('Type of node failure detection.'),
                    constraints=[
                        constraints.AllowedValues(consts.DETECTION_TYPES),
                    ],
                    required=True,
                ),
                DETECTION_OPTIONS:
                schema.Map(schema={
                    DETECTION_INTERVAL:
                    schema.Integer(
                        _("Number of seconds between pollings. Only "
                          "required when type is 'NODE_STATUS_POLLING' or "
                          "'NODE_STATUS_POLL_URL'."),
                        default=60,
                    ),
                    POLL_URL:
                    schema.String(
                        _("URL to poll for node status. See documentation "
                          "for valid expansion parameters. Only required "
                          "when type is 'NODE_STATUS_POLL_URL'."),
                        default='',
                    ),
                    POLL_URL_SSL_VERIFY:
                    schema.Boolean(
                        _("Whether to verify SSL when calling URL to poll "
                          "for node status. Only required when type is "
                          "'NODE_STATUS_POLL_URL'."),
                        default=True,
                    ),
                    POLL_URL_HEALTHY_RESPONSE:
                    schema.String(
                        _("String pattern in the poll URL response body "
                          "that indicates a healthy node. "
                          "Required when type is 'NODE_STATUS_POLL_URL'."),
                        default='',
                    ),
                    POLL_URL_RETRY_LIMIT:
                    schema.Integer(
                        _("Number of times to retry URL polling when its "
                          "return body is missing "
                          "POLL_URL_HEALTHY_RESPONSE string before a node "
                          "is considered down. Required when type is "
                          "'NODE_STATUS_POLL_URL'."),
                        default=3,
                    ),
                    POLL_URL_RETRY_INTERVAL:
                    schema.Integer(
                        _("Number of seconds between URL polling retries "
                          "before a node is considered down. "
                          "Required when type is 'NODE_STATUS_POLL_URL'."),
                        default=3,
                    ),
                    NODE_UPDATE_TIMEOUT:
                    schema.Integer(
                        _("Number of seconds since last node update to "
                          "wait before checking node health. "
                          "Required when type is 'NODE_STATUS_POLL_URL'."),
                        default=300,
                    ),
                },
                           default={}),
            },
            required=True,
        ),
        RECOVERY:
        schema.Map(
            _('Policy aspect for node failure recovery.'),
            schema={
                RECOVERY_ACTIONS:
                schema.List(_('List of actions to try for node recovery.'),
                            schema=schema.Map(
                                _('Action to try for node recovery.'),
                                schema={
                                    ACTION_NAME:
                                    schema.String(
                                        _("Name of action to execute."),
                                        constraints=[
                                            constraints.AllowedValues(
                                                consts.RECOVERY_ACTIONS),
                                        ],
                                        required=True),
                                    ACTION_PARAMS:
                                    schema.Map(_("Parameters for the action")),
                                })),
                RECOVERY_FENCING:
                schema.List(
                    _('List of services to be fenced.'),
                    schema=schema.String(
                        _('Service to be fenced.'),
                        constraints=[
                            constraints.AllowedValues(FENCING_OPTION_VALUES),
                        ],
                        required=True,
                    ),
                ),
                RECOVERY_DELETE_TIMEOUT:
                schema.Integer(
                    _("Number of seconds to wait for node deletion to "
                      "finish and start node creation for recreate "
                      "recovery option. Required when type is "
                      "'NODE_STATUS_POLL_URL and recovery action "
                      "is RECREATE'."),
                    default=20,
                ),
                RECOVERY_FORCE_RECREATE:
                schema.Boolean(
                    _("Whether to create node even if node deletion "
                      "failed. Required when type is "
                      "'NODE_STATUS_POLL_URL' and action recovery "
                      "action is RECREATE."),
                    default=False,
                ),
            }),
    }

    def __init__(self, name, spec, **kwargs):
        super(HealthPolicy, self).__init__(name, spec, **kwargs)

        self.check_type = self.properties[self.DETECTION][self.DETECTION_TYPE]

        options = self.properties[self.DETECTION][self.DETECTION_OPTIONS]
        self.interval = options.get(self.DETECTION_INTERVAL, 60)
        self.poll_url = options.get(self.POLL_URL, '')
        self.poll_url_ssl_verify = options.get(self.POLL_URL_SSL_VERIFY, True)
        self.poll_url_healthy_response = options.get(
            self.POLL_URL_HEALTHY_RESPONSE, '')
        self.poll_url_retry_limit = options.get(self.POLL_URL_RETRY_LIMIT, '')
        self.poll_url_retry_interval = options.get(
            self.POLL_URL_RETRY_INTERVAL, '')
        self.node_update_timeout = options.get(self.NODE_UPDATE_TIMEOUT, 300)

        recover_settings = self.properties[self.RECOVERY]
        self.recover_actions = recover_settings[self.RECOVERY_ACTIONS]
        self.fencing_types = recover_settings[self.RECOVERY_FENCING]
        self.node_delete_timeout = recover_settings.get(
            self.RECOVERY_DELETE_TIMEOUT, None)
        self.node_force_recreate = recover_settings.get(
            self.RECOVERY_FORCE_RECREATE, False)

    def validate(self, context, validate_props=False):
        super(HealthPolicy, self).validate(context,
                                           validate_props=validate_props)

        if len(self.recover_actions) > 1:
            message = _(
                "Only one '%s' is supported for now.") % self.RECOVERY_ACTIONS
            raise exc.ESchema(message=message)

        if self.interval < cfg.CONF.health_check_interval_min:
            message = _("Specified interval of %(interval)d seconds has to be "
                        "larger than health_check_interval_min of "
                        "%(min_interval)d seconds set in configuration.") % {
                            "interval": self.interval,
                            "min_interval": cfg.CONF.health_check_interval_min
                        }
            raise exc.InvalidSpec(message=message)

        # TODO(Qiming): Add detection of duplicated action names when
        # support to list of actions is implemented.

    def attach(self, cluster, enabled=True):
        """"Hook for policy attach.

        Register the cluster for health management.

        :param cluster: The cluster to which the policy is being attached to.
        :param enabled: The attached cluster policy is enabled or disabled.
        :return: A tuple comprising execution result and policy data.
        """
        p_type = cluster.rt['profile'].type_name
        action_names = [a['name'] for a in self.recover_actions]
        if p_type != 'os.nova.server':
            if consts.RECOVER_REBUILD in action_names:
                err_msg = _("Recovery action REBUILD is only applicable to "
                            "os.nova.server clusters.")
                return False, err_msg

            if consts.RECOVER_REBOOT in action_names:
                err_msg = _("Recovery action REBOOT is only applicable to "
                            "os.nova.server clusters.")
                return False, err_msg

        kwargs = {
            'check_type': self.check_type,
            'interval': self.interval,
            'params': {
                'recover_action': self.recover_actions,
                'poll_url': self.poll_url,
                'poll_url_ssl_verify': self.poll_url_ssl_verify,
                'poll_url_healthy_response': self.poll_url_healthy_response,
                'poll_url_retry_limit': self.poll_url_retry_limit,
                'poll_url_retry_interval': self.poll_url_retry_interval,
                'node_update_timeout': self.node_update_timeout,
                'node_delete_timeout': self.node_delete_timeout,
                'node_force_recreate': self.node_force_recreate,
            },
            'enabled': enabled
        }

        health_manager.register(cluster.id, engine_id=None, **kwargs)

        data = {
            'check_type': self.check_type,
            'interval': self.interval,
            'poll_url': self.poll_url,
            'poll_url_ssl_verify': self.poll_url_ssl_verify,
            'poll_url_healthy_response': self.poll_url_healthy_response,
            'poll_url_retry_limit': self.poll_url_retry_limit,
            'poll_url_retry_interval': self.poll_url_retry_interval,
            'node_update_timeout': self.node_update_timeout,
            'node_delete_timeout': self.node_delete_timeout,
            'node_force_recreate': self.node_force_recreate,
        }

        return True, self._build_policy_data(data)

    def detach(self, cluster):
        """Hook for policy detach.

        Unregister the cluster for health management.
        :param cluster: The target cluster.
        :returns: A tuple comprising the execution result and reason.
        """
        health_manager.unregister(cluster.id)
        return True, ''

    def pre_op(self, cluster_id, action, **args):
        """Hook before action execution.

        One of the task for this routine is to disable health policy if the
        action is a request that will shrink the cluster. The reason is that
        the policy may attempt to recover nodes that are to be deleted.

        :param cluster_id: The ID of the target cluster.
        :param action: The action to be examined.
        :param kwargs args: Other keyword arguments to be checked.
        :returns: Boolean indicating whether the checking passed.
        """
        if action.action in (consts.CLUSTER_SCALE_IN, consts.CLUSTER_DEL_NODES,
                             consts.NODE_DELETE):
            health_manager.disable(cluster_id)
            return True

        if action.action == consts.CLUSTER_RESIZE:
            deletion = action.data.get('deletion', None)
            if deletion:
                health_manager.disable(cluster_id)
                return True

            cluster = action.entity
            current = len(cluster.nodes)
            res, reason = scaleutils.parse_resize_params(
                action, cluster, current)
            if res == base.CHECK_ERROR:
                action.data['status'] = base.CHECK_ERROR
                action.data['reason'] = reason
                return False

            if action.data.get('deletion', None):
                health_manager.disable(cluster_id)
                return True

        pd = {
            'recover_action': self.recover_actions,
            'fencing': self.fencing_types,
        }
        action.data.update({'health': pd})
        action.store(action.context)

        return True

    def post_op(self, cluster_id, action, **args):
        """Hook before action execution.

        One of the task for this routine is to re-enable health policy if the
        action is a request that will shrink the cluster thus the policy has
        been temporarily disabled.

        :param cluster_id: The ID of the target cluster.
        :param action: The action to be examined.
        :param kwargs args: Other keyword arguments to be checked.
        :returns: Boolean indicating whether the checking passed.
        """
        if action.action in (consts.CLUSTER_SCALE_IN, consts.CLUSTER_DEL_NODES,
                             consts.NODE_DELETE):
            health_manager.enable(cluster_id)
            return True

        if action.action == consts.CLUSTER_RESIZE:
            deletion = action.data.get('deletion', None)
            if deletion:
                health_manager.enable(cluster_id)
                return True

            cluster = action.entity
            current = len(cluster.nodes)
            res, reason = scaleutils.parse_resize_params(
                action, cluster, current)
            if res == base.CHECK_ERROR:
                action.data['status'] = base.CHECK_ERROR
                action.data['reason'] = reason
                return False

            if action.data.get('deletion', None):
                health_manager.enable(cluster_id)
                return True

        return True
Ejemplo n.º 21
0
class ScalingPolicy(base.Policy):
    """Policy for changing the size of a cluster.

    This policy is expected to be enforced before the node count of a cluster
    is changed.
    """

    VERSION = '1.0'
    VERSIONS = {'1.0': [{'status': consts.SUPPORTED, 'since': '2016.04'}]}

    PRIORITY = 100

    TARGET = [
        ('BEFORE', consts.CLUSTER_SCALE_IN),
        ('BEFORE', consts.CLUSTER_SCALE_OUT),
        ('AFTER', consts.CLUSTER_SCALE_IN),
        ('AFTER', consts.CLUSTER_SCALE_OUT),
    ]

    PROFILE_TYPE = [
        'ANY',
    ]

    KEYS = (
        EVENT,
        ADJUSTMENT,
    ) = (
        'event',
        'adjustment',
    )

    _SUPPORTED_EVENTS = (
        CLUSTER_SCALE_IN,
        CLUSTER_SCALE_OUT,
    ) = (
        consts.CLUSTER_SCALE_IN,
        consts.CLUSTER_SCALE_OUT,
    )

    _ADJUSTMENT_KEYS = (
        ADJUSTMENT_TYPE,
        ADJUSTMENT_NUMBER,
        MIN_STEP,
        BEST_EFFORT,
        COOLDOWN,
    ) = (
        'type',
        'number',
        'min_step',
        'best_effort',
        'cooldown',
    )

    properties_schema = {
        EVENT:
        schema.String(
            _('Event that will trigger this policy. Must be one of '
              'CLUSTER_SCALE_IN and CLUSTER_SCALE_OUT.'),
            constraints=[
                constraints.AllowedValues(_SUPPORTED_EVENTS),
            ],
            required=True,
        ),
        ADJUSTMENT:
        schema.Map(
            _('Detailed specification for scaling adjustments.'),
            schema={
                ADJUSTMENT_TYPE:
                schema.String(
                    _('Type of adjustment when scaling is triggered.'),
                    constraints=[
                        constraints.AllowedValues(consts.ADJUSTMENT_TYPES),
                    ],
                    default=consts.CHANGE_IN_CAPACITY,
                ),
                ADJUSTMENT_NUMBER:
                schema.Number(
                    _('A number specifying the amount of adjustment.'),
                    default=1,
                ),
                MIN_STEP:
                schema.Integer(
                    _('When adjustment type is set to "CHANGE_IN_PERCENTAGE",'
                      ' this specifies the cluster size will be decreased by '
                      'at least this number of nodes.'),
                    default=1,
                ),
                BEST_EFFORT:
                schema.Boolean(
                    _('Whether do best effort scaling when new size of '
                      'cluster will break the size limitation'),
                    default=False,
                ),
                COOLDOWN:
                schema.Integer(
                    _('Number of seconds to hold the cluster for cool-down '
                      'before allowing cluster to be resized again.'),
                    default=0,
                ),
            }),
    }

    def __init__(self, name, spec, **kwargs):
        """Initialize a scaling policy object.

        :param name: Name for the policy object.
        :param spec: A dictionary containing the detailed specification for
                     the policy.
        :param dict kwargs: Other optional parameters for policy object
                            creation.
        :return: An object of `ScalingPolicy`.
        """
        super(ScalingPolicy, self).__init__(name, spec, **kwargs)

        self.singleton = False

        self.event = self.properties[self.EVENT]

        adjustment = self.properties[self.ADJUSTMENT]
        self.adjustment_type = adjustment[self.ADJUSTMENT_TYPE]
        self.adjustment_number = adjustment[self.ADJUSTMENT_NUMBER]
        self.adjustment_min_step = adjustment[self.MIN_STEP]

        self.best_effort = adjustment[self.BEST_EFFORT]
        self.cooldown = adjustment[self.COOLDOWN]

    def validate(self, context, validate_props=False):
        super(ScalingPolicy, self).validate(context, validate_props)

        if self.adjustment_number <= 0:
            msg = _("the 'number' for 'adjustment' must be > 0")
            raise exc.InvalidSpec(message=msg)

        if self.adjustment_min_step < 0:
            msg = _("the 'min_step' for 'adjustment' must be >= 0")
            raise exc.InvalidSpec(message=msg)

        if self.cooldown < 0:
            msg = _("the 'cooldown' for 'adjustment' must be >= 0")
            raise exc.InvalidSpec(message=msg)

    def _calculate_adjustment_count(self, current_size):
        """Calculate adjustment count based on current_size.

        :param current_size: The current size of the target cluster.
        :return: The number of nodes to add or to remove.
        """

        if self.adjustment_type == consts.EXACT_CAPACITY:
            if self.event == consts.CLUSTER_SCALE_IN:
                count = current_size - self.adjustment_number
            else:
                count = self.adjustment_number - current_size
        elif self.adjustment_type == consts.CHANGE_IN_CAPACITY:
            count = self.adjustment_number
        else:  # consts.CHANGE_IN_PERCENTAGE:
            count = int((self.adjustment_number * current_size) / 100.0)
            if count < self.adjustment_min_step:
                count = self.adjustment_min_step

        return count

    def pre_op(self, cluster_id, action):
        """The hook function that is executed before the action.

        The checking result is stored in the ``data`` property of the action
        object rather than returned directly from the function.

        :param cluster_id: The ID of the target cluster.
        :param action: Action instance against which the policy is being
                       checked.
        :return: None.
        """

        # check cooldown
        last_op = action.inputs.get('last_op', None)
        if last_op and not timeutils.is_older_than(last_op, self.cooldown):
            action.data.update({
                'status':
                base.CHECK_ERROR,
                'reason':
                _('Policy %s cooldown is still '
                  'in progress.') % self.id
            })
            action.store(action.context)
            return

        # Use action input if count is provided
        count_value = action.inputs.get('count', None)
        cluster = action.entity
        current = len(cluster.nodes)

        if count_value is None:
            # count not specified, calculate it
            count_value = self._calculate_adjustment_count(current)

        # Count must be positive value
        success, count = utils.get_positive_int(count_value)
        if not success:
            action.data.update({
                'status': base.CHECK_ERROR,
                'reason': _("Invalid count (%(c)s) for action '%(a)s'.") % {
                    'c': count_value,
                    'a': action.action
                }
            })
            action.store(action.context)
            return

        # Check size constraints
        max_size = cluster.max_size
        if max_size == -1:
            max_size = cfg.CONF.max_nodes_per_cluster
        if action.action == consts.CLUSTER_SCALE_IN:
            if self.best_effort:
                count = min(count, current - cluster.min_size)
            result = su.check_size_params(cluster,
                                          current - count,
                                          strict=not self.best_effort)
        else:
            if self.best_effort:
                count = min(count, max_size - current)
            result = su.check_size_params(cluster,
                                          current + count,
                                          strict=not self.best_effort)

        if result:
            # failed validation
            pd = {'status': base.CHECK_ERROR, 'reason': result}
        else:
            # passed validation
            pd = {
                'status': base.CHECK_OK,
                'reason': _('Scaling request validated.'),
            }
            if action.action == consts.CLUSTER_SCALE_IN:
                pd['deletion'] = {'count': count}
            else:
                pd['creation'] = {'count': count}

        action.data.update(pd)
        action.store(action.context)

        return

    def post_op(self, cluster_id, action):
        # update last_op for next cooldown check
        ts = timeutils.utcnow(True)
        cpo.ClusterPolicy.update(action.context, cluster_id, self.id,
                                 {'last_op': ts})

    def need_check(self, target, action):
        # check if target + action matches policy targets
        if not super(ScalingPolicy, self).need_check(target, action):
            return False

        if target == 'BEFORE':
            # Scaling policy BEFORE check should only be triggered if the
            # incoming action matches the specific policy event.
            # E.g. for scale-out policy the BEFORE check to select nodes for
            # termination should only run for scale-out actions.
            return self.event == action.action
        else:
            # Scaling policy AFTER check to reset cooldown timer should be
            # triggered for all supported policy events (both scale-in and
            # scale-out).  E.g. a scale-out policy should reset cooldown timer
            # whenever scale-out or scale-in action completes.
            return action.action in list(self._SUPPORTED_EVENTS)
Ejemplo n.º 22
0
class StackProfile(base.Profile):
    """Profile for an OpenStack Heat stack."""

    VERSIONS = {'1.0': [{'status': consts.SUPPORTED, 'since': '2016.04'}]}

    KEYS = (
        CONTEXT,
        TEMPLATE,
        TEMPLATE_URL,
        PARAMETERS,
        FILES,
        TIMEOUT,
        DISABLE_ROLLBACK,
        ENVIRONMENT,
    ) = (
        'context',
        'template',
        'template_url',
        'parameters',
        'files',
        'timeout',
        'disable_rollback',
        'environment',
    )

    properties_schema = {
        CONTEXT:
        schema.Map(
            _('A dictionary for specifying the customized context for '
              'stack operations'),
            default={},
        ),
        TEMPLATE:
        schema.Map(
            _('Heat stack template.'),
            default={},
            updatable=True,
        ),
        TEMPLATE_URL:
        schema.String(
            _('Heat stack template url.'),
            default='',
            updatable=True,
        ),
        PARAMETERS:
        schema.Map(
            _('Parameters to be passed to Heat for stack operations.'),
            default={},
            updatable=True,
        ),
        FILES:
        schema.Map(
            _('Contents of files referenced by the template, if any.'),
            default={},
            updatable=True,
        ),
        TIMEOUT:
        schema.Integer(
            _('A integer that specifies the number of minutes that a '
              'stack operation times out.'),
            updatable=True,
        ),
        DISABLE_ROLLBACK:
        schema.Boolean(
            _('A boolean specifying whether a stack operation can be '
              'rolled back.'),
            default=True,
            updatable=True,
        ),
        ENVIRONMENT:
        schema.Map(
            _('A map that specifies the environment used for stack '
              'operations.'),
            default={},
            updatable=True,
        )
    }

    OP_NAMES = (OP_ABANDON, ) = ('abandon', )

    OPERATIONS = {OP_ABANDON: schema.Map(_('Abandon a heat stack node.'), )}

    def __init__(self, type_name, name, **kwargs):
        super(StackProfile, self).__init__(type_name, name, **kwargs)
        self.stack_id = None

    def validate(self, validate_props=False):
        """Validate the schema and the data provided."""
        # general validation
        self.spec_data.validate()
        self.properties.validate()
        # validate template
        template = self.properties[self.TEMPLATE]
        template_url = self.properties[self.TEMPLATE_URL]
        if not template and not template_url:
            msg = _("Both template and template_url are not specified "
                    "for profile '%s'.") % self.name
            raise exc.InvalidSpec(message=msg)

        if validate_props:
            self.do_validate(obj=self)

    def do_validate(self, obj):
        """Validate the stack template used by a node.

        :param obj: Node object to operate.
        :returns: True if validation succeeds.
        :raises: `InvalidSpec` exception is raised if template is invalid.
        """
        kwargs = {
            'stack_name': utils.random_name(),
            'template': self.properties[self.TEMPLATE],
            'template_url': self.properties[self.TEMPLATE_URL],
            'parameters': self.properties[self.PARAMETERS],
            'files': self.properties[self.FILES],
            'environment': self.properties[self.ENVIRONMENT],
            'preview': True,
        }
        try:
            self.orchestration(obj).stack_create(**kwargs)
        except exc.InternalError as ex:
            msg = _('Failed in validating template: %s') % six.text_type(ex)
            raise exc.InvalidSpec(message=msg)

        return True

    def do_create(self, obj):
        """Create a heat stack using the given node object.

        :param obj: The node object to operate on.
        :returns: The UUID of the heat stack created.
        """
        tags = ["cluster_node_id=%s" % obj.id]
        if obj.cluster_id:
            tags.append('cluster_id=%s' % obj.cluster_id)
            tags.append('cluster_node_index=%s' % obj.index)
        kwargs = {
            'stack_name': obj.name + '-' + utils.random_name(8),
            'template': self.properties[self.TEMPLATE],
            'template_url': self.properties[self.TEMPLATE_URL],
            'timeout_mins': self.properties[self.TIMEOUT],
            'disable_rollback': self.properties[self.DISABLE_ROLLBACK],
            'parameters': self.properties[self.PARAMETERS],
            'files': self.properties[self.FILES],
            'environment': self.properties[self.ENVIRONMENT],
            'tags': ",".join(tags)
        }

        try:
            stack = self.orchestration(obj).stack_create(**kwargs)

            # Timeout = None means we will use the 'default_action_timeout'
            # It can be overridden by the TIMEOUT profile properties
            timeout = None
            if self.properties[self.TIMEOUT]:
                timeout = self.properties[self.TIMEOUT] * 60

            self.orchestration(obj).wait_for_stack(stack.id,
                                                   'CREATE_COMPLETE',
                                                   timeout=timeout)
            return stack.id
        except exc.InternalError as ex:
            raise exc.EResourceCreation(type='stack',
                                        message=six.text_type(ex))

    def do_delete(self, obj, **params):
        """Delete the physical stack behind the node object.

        :param obj: The node object to operate on.
        :param kwargs params: Optional keyword arguments for the delete
                              operation.
        :returns: This operation always returns True unless exception is
                  caught.
        :raises: `EResourceDeletion` if interaction with heat fails.
        """
        stack_id = obj.physical_id
        if not stack_id:
            return True

        ignore_missing = params.get('ignore_missing', True)
        try:
            self.orchestration(obj).stack_delete(stack_id, ignore_missing)
            self.orchestration(obj).wait_for_stack_delete(stack_id)
        except exc.InternalError as ex:
            raise exc.EResourceDeletion(type='stack',
                                        id=stack_id,
                                        message=six.text_type(ex))
        return True

    def do_update(self, obj, new_profile, **params):
        """Perform update on object.

        :param obj: the node object to operate on
        :param new_profile: the new profile used for updating
        :param params: other parameters for the update request.
        :returns: A boolean indicating whether the operation is successful.
        """
        self.stack_id = obj.physical_id
        if not self.stack_id:
            return False

        if not self.validate_for_update(new_profile):
            return False

        fields = {}
        new_template = new_profile.properties[new_profile.TEMPLATE]
        if new_template != self.properties[self.TEMPLATE]:
            fields['template'] = new_template

        new_params = new_profile.properties[new_profile.PARAMETERS]
        if new_params != self.properties[self.PARAMETERS]:
            fields['parameters'] = new_params

        new_timeout = new_profile.properties[new_profile.TIMEOUT]
        if new_timeout != self.properties[self.TIMEOUT]:
            fields['timeout_mins'] = new_timeout

        new_dr = new_profile.properties[new_profile.DISABLE_ROLLBACK]
        if new_dr != self.properties[self.DISABLE_ROLLBACK]:
            fields['disable_rollback'] = new_dr

        new_files = new_profile.properties[new_profile.FILES]
        if new_files != self.properties[self.FILES]:
            fields['files'] = new_files

        new_environment = new_profile.properties[new_profile.ENVIRONMENT]
        if new_environment != self.properties[self.ENVIRONMENT]:
            fields['environment'] = new_environment

        if not fields:
            return True

        try:
            hc = self.orchestration(obj)
            # Timeout = None means we will use the 'default_action_timeout'
            # It can be overridden by the TIMEOUT profile properties
            timeout = None
            if self.properties[self.TIMEOUT]:
                timeout = self.properties[self.TIMEOUT] * 60
            hc.stack_update(self.stack_id, **fields)
            hc.wait_for_stack(self.stack_id,
                              'UPDATE_COMPLETE',
                              timeout=timeout)
        except exc.InternalError as ex:
            raise exc.EResourceUpdate(type='stack',
                                      id=self.stack_id,
                                      message=six.text_type(ex))

        return True

    def do_check(self, obj):
        """Check stack status.

        :param obj: Node object to operate.
        :returns: True if check succeeded, or False otherwise.
        """
        stack_id = obj.physical_id
        if stack_id is None:
            return False

        hc = self.orchestration(obj)
        try:
            # Timeout = None means we will use the 'default_action_timeout'
            # It can be overridden by the TIMEOUT profile properties
            timeout = None
            if self.properties[self.TIMEOUT]:
                timeout = self.properties[self.TIMEOUT] * 60
            hc.stack_check(stack_id)
            hc.wait_for_stack(stack_id, 'CHECK_COMPLETE', timeout=timeout)
        except exc.InternalError as ex:
            raise exc.EResourceOperation(op='checking',
                                         type='stack',
                                         id=stack_id,
                                         message=six.text_type(ex))

        return True

    def do_get_details(self, obj):
        if not obj.physical_id:
            return {}

        try:
            stack = self.orchestration(obj).stack_get(obj.physical_id)
            return stack.to_dict()
        except exc.InternalError as ex:
            return {'Error': {'code': ex.code, 'message': six.text_type(ex)}}

    def do_adopt(self, obj, overrides=None, snapshot=False):
        """Adopt an existing stack node for management.

        :param obj: A node object for this operation. It could be a puppet
            node that provides only 'user', 'project' and 'physical_id'
            properties when doing a preview. It can be a real Node object for
            node adoption.
        :param overrides: A dict containing the properties that will be
            overridden when generating a profile for the stack.
        :param snapshot: A boolean flag indicating whether the profile should
            attempt a snapshot operation before adopting the stack. If set to
            True, the ID of the snapshot will be used as the image ID.

        :returns: A dict containing the spec created from the stack object or
            a dict containing error information if failure occurred.
        """
        driver = self.orchestration(obj)

        # TODO(Qiming): Add snapshot support
        # snapshot = driver.snapshot_create(...)

        try:
            stack = driver.stack_get(obj.physical_id)
            tmpl = driver.stack_get_template(obj.physical_id)
            env = driver.stack_get_environment(obj.physical_id)
            files = driver.stack_get_files(obj.physical_id)
        except exc.InternalError as ex:
            return {'Error': {'code': ex.code, 'message': six.text_type(ex)}}

        spec = {
            self.ENVIRONMENT:
            env.to_dict(),
            self.FILES:
            files,
            self.TEMPLATE:
            tmpl.to_dict(),
            self.PARAMETERS:
            dict((k, v) for k, v in stack.parameters.items()
                 if k.find('OS::', 0) < 0),
            self.TIMEOUT:
            stack.timeout_mins,
            self.DISABLE_ROLLBACK:
            stack.is_rollback_disabled
        }
        if overrides:
            spec.update(overrides)

        return spec

    def _refresh_tags(self, current, node, add=False):
        """Refresh tag list.

        :param current: Current list of tags.
        :param node: The node object.
        :param add: Flag indicating whether new tags are added.
        :returns: (tags, updated) where tags contains a new list of tags and
                  updated indicates whether new tag list differs from the old
                  one.
        """
        tags = []
        for tag in current:
            if tag.find('cluster_id=') == 0:
                continue
            elif tag.find('cluster_node_id=') == 0:
                continue
            elif tag.find('cluster_node_index=') == 0:
                continue
            if tag.strip() != "":
                tags.append(tag.strip())

        if add:
            tags.append('cluster_id=' + node.cluster_id)
            tags.append('cluster_node_id=' + node.id)
            tags.append('cluster_node_index=%s' % node.index)

        tag_str = ",".join(tags)
        return (tag_str, tags != current)

    def do_join(self, obj, cluster_id):
        if not obj.physical_id:
            return False

        hc = self.orchestration(obj)
        try:
            stack = hc.stack_get(obj.physical_id)
            tags, updated = self._refresh_tags(stack.tags, obj, True)
            field = {'tags': tags}
            if updated:
                hc.stack_update(obj.physical_id, **field)
        except exc.InternalError as ex:
            LOG.error('Failed in updating stack tags: %s.', ex)
            return False

        return True

    def do_leave(self, obj):
        if not obj.physical_id:
            return False

        hc = self.orchestration(obj)
        try:
            stack = hc.stack_get(obj.physical_id)
            tags, updated = self._refresh_tags(stack.tags, obj, False)
            field = {'tags': tags}
            if updated:
                hc.stack_update(obj.physical_id, **field)
        except exc.InternalError as ex:
            LOG.error('Failed in updating stack tags: %s.', ex)
            return False

        return True

    def handle_abandon(self, obj, **options):
        """Handler for abandoning a heat stack node."""
        pass
Ejemplo n.º 23
0
 def test_invalid_constructor(self):
     self.assertRaises(exc.ESchema,
                       schema.String,
                       schema=schema.String('String'))
Ejemplo n.º 24
0
class HealthPolicy(base.Policy):
    '''Policy for health management of a cluster.'''

    VERSION = '1.0'

    PRIORITY = 600

    TARGET = [
        ('BEFORE', consts.CLUSTER_CHECK),
        ('BEFORE', consts.CLUSTER_RECOVER),
    ]

    # Should be ANY if profile provides health check support?
    PROFILE_TYPE = [
        'os.nova.server',
        'os.heat.stack',
    ]

    KEYS = (DETECTION, RECOVERY) = ('detection', 'recovery')

    _DETECTION_KEYS = (
        DETECTION_TYPE,
        DETECTION_OPTIONS,
    ) = ('type', 'options')

    DETECTION_TYPES = (
        VM_LIFECYCLE_EVENTS,
        NODE_STATUS_POLLING,
        LB_STATUS_POLLING,
    ) = (
        'VM_LIFECYCLE_EVENTS',
        'NODE_STATUS_POLLING',
        'LB_STATUS_POLLING',
    )

    _DETECTION_OPTIONS = (DETECTION_INTERVAL, ) = ('interval', )

    _RECOVERY_KEYS = (RECOVERY_ACTIONS, RECOVERY_FENCING) = ('actions',
                                                             'fencing')

    RECOVERY_ACTION_VALUES = (REBOOT, REBUILD, MIGRATE, EVACUATE, RECREATE,
                              NOP) = (
                                  'REBOOT',
                                  'REBUILD',
                                  'MIGRATE',
                                  'EVACUATE',
                                  'RECREATE',
                                  'NOP',
                              )

    FENCING_OPTION_VALUES = (
        COMPUTE,
        STORAGE,
        NETWORK,
    ) = ('COMPUTE', 'STORAGE', 'NETWORK')

    properties_schema = {
        DETECTION:
        schema.Map(
            _('Policy aspect for node failure detection.'),
            schema={
                DETECTION_TYPE:
                schema.String(
                    _('Type of node failure detection.'),
                    constraints=[
                        constraints.AllowedValues(DETECTION_TYPES),
                    ],
                    required=True,
                ),
                DETECTION_OPTIONS:
                schema.Map(
                    schema={
                        DETECTION_INTERVAL:
                        schema.Integer(
                            _("Number of seconds between pollings. Only "
                              "required when type is 'NODE_STATUS_POLLING'."),
                            default=60,
                        ),
                    }),
            },
            required=True,
        ),
        RECOVERY:
        schema.Map(
            _('Policy aspect for node failure recovery.'),
            schema={
                RECOVERY_ACTIONS:
                schema.List(
                    _('List of actions to try for node recovery.'),
                    schema=schema.String(
                        _('Action to try for node recovery.'),
                        constraints=[
                            constraints.AllowedValues(RECOVERY_ACTION_VALUES),
                        ]),
                ),
                RECOVERY_FENCING:
                schema.List(
                    _('List of services to be fenced.'),
                    schema=schema.String(
                        _('Service to be fenced.'),
                        constraints=[
                            constraints.AllowedValues(FENCING_OPTION_VALUES),
                        ],
                    ),
                ),
            }),
    }

    def __init__(self, name, spec, **kwargs):
        super(HealthPolicy, self).__init__(name, spec, **kwargs)

        self.check_type = self.properties[self.DETECTION][self.DETECTION_TYPE]
        options = self.properties[self.DETECTION][self.DETECTION_OPTIONS]
        self.interval = options[self.DETECTION_INTERVAL]
        recover_settings = self.properties[self.RECOVERY]
        self.recover_actions = recover_settings[self.RECOVERY_ACTIONS]

    def attach(self, cluster):
        """"Hook for policy attach.

        Register the cluster for health management.
        """

        kwargs = {
            'check_type': self.check_type,
            'interval': self.interval,
            'params': {},
        }

        health_manager.register(cluster.id, engine_id=None, **kwargs)

        data = {
            'check_type': self.check_type,
            'interval': self.interval,
        }

        return True, self._build_policy_data(data)

    def detach(self, cluster):
        '''Hook for policy detach.

        Unregister the cluster for health management.
        '''

        health_manager.unregister(cluster.id)
        return True, ''

    def pre_op(self, cluster_id, action, **args):
        # Ignore actions that are not required to be processed at this stage
        if action.action != consts.CLUSTER_RECOVER:
            return True

        pd = {
            'recover_action': self.recover_actions[0],
        }
        action.data.update({'health': pd})
        action.store(action.context)

        return True

    def post_op(self, cluster_id, action, **args):
        # Ignore irrelevant action here
        if action.action not in (consts.CLUSTER_CHECK, consts.CLUSTER_RECOVER):
            return True

        # TODO(anyone): subscribe to vm-lifecycle-events for the specified VM
        #               or add vm to the list of VM status polling
        return True
Ejemplo n.º 25
0
    def test_validate(self):
        sot = schema.List(schema=schema.String())

        res = sot.validate(['abc', 'def'])

        self.assertIsNone(res)
Ejemplo n.º 26
0
class Policy(object):
    '''Base class for policies.'''
    PROFILE_TYPE = 'ANY'

    KEYS = (
        TYPE, VERSION, DESCRIPTION, PROPERTIES,
    ) = (
        'type', 'version', 'description', 'properties',
    )

    spec_schema = {
        TYPE: schema.String(
            _('Name of the policy type.'),
            required=True,
        ),
        VERSION: schema.String(
            _('Version number of the policy type.'),
            required=True,
        ),
        DESCRIPTION: schema.String(
            _('A text description of policy.'),
            default='',
        ),
        PROPERTIES: schema.Map(
            _('Properties for the policy.'),
            required=True,
        )
    }

    properties_schema = {}

    def __new__(cls, name, spec, **kwargs):
        """Create a new policy of the appropriate class.

        :param name: The name for the policy.
        :param spec: A dictionary containing the spec for the policy.
        :param kwargs: Keyword arguments for policy creation.
        :returns: An instance of a specific sub-class of Policy.
        """
        type_name, version = schema.get_spec_version(spec)

        if cls != Policy:
            PolicyClass = cls
        else:
            PolicyClass = environment.global_env().get_policy(type_name)

        return super(Policy, cls).__new__(PolicyClass)

    def __init__(self, name, spec, **kwargs):
        """Initialize a policy instance.

        :param name: The name for the policy.
        :param spec: A dictionary containing the detailed policy spec.
        :param kwargs: Keyword arguments for initializing the policy.
        :returns: An instance of a specific sub-class of Policy.
        """

        type_name, version = schema.get_spec_version(spec)

        self.name = name
        self.spec = spec

        self.id = kwargs.get('id', None)
        self.type = kwargs.get('type', "%s-%s" % (type_name, version))
        self.user = kwargs.get('user')
        self.project = kwargs.get('project')
        self.domain = kwargs.get('domain')
        self.level = kwargs.get('level', SHOULD)
        self.cooldown = kwargs.get('cooldown', 0)
        self.data = kwargs.get('data', {})

        self.created_time = kwargs.get('created_time', None)
        self.updated_time = kwargs.get('updated_time', None)
        self.deleted_time = kwargs.get('deleted_time', None)

        self.spec_data = schema.Spec(self.spec_schema, spec)
        self.properties = schema.Spec(self.properties_schema,
                                      self.spec.get(self.PROPERTIES, {}))
        self.singleton = True

    @classmethod
    def _from_db_record(cls, record):
        '''Construct a policy object from a database record.'''

        kwargs = {
            'id': record.id,
            'type': record.type,
            'user': record.user,
            'project': record.project,
            'domain': record.domain,
            'level': record.level,
            'cooldown': record.cooldown,
            'created_time': record.created_time,
            'updated_time': record.updated_time,
            'deleted_time': record.deleted_time,
            'data': record.data,
        }

        return cls(record.name, record.spec, **kwargs)

    @classmethod
    def load(cls, context, policy_id=None, db_policy=None):
        """Retrieve and reconstruct a policy object from DB.

        :param context: DB context for object retrieval.
        :param policy_id: Optional parameter specifying the ID of policy.
        :param db_policy: Optional parameter referencing a policy DB object.
        :returns: An object of the proper policy class.
        """
        if db_policy is None:
            db_policy = db_api.policy_get(context, policy_id)
            if db_policy is None:
                raise exception.PolicyNotFound(policy=policy_id)

        return cls._from_db_record(db_policy)

    @classmethod
    def load_all(cls, context, limit=None, sort_keys=None, marker=None,
                 sort_dir=None, filters=None, show_deleted=False):
        '''Retrieve all policies from database.'''

        records = db_api.policy_get_all(context, limit=limit, marker=marker,
                                        sort_keys=sort_keys,
                                        sort_dir=sort_dir,
                                        filters=filters,
                                        show_deleted=show_deleted)

        for record in records:
            yield cls._from_db_record(record)

    @classmethod
    def delete(cls, context, policy_id):
        db_api.policy_delete(context, policy_id)

    def store(self, context):
        '''Store the policy object into database table.'''
        timestamp = timeutils.utcnow()

        values = {
            'name': self.name,
            'type': self.type,
            'user': self.user,
            'project': self.project,
            'domain': self.domain,
            'spec': self.spec,
            'level': self.level,
            'cooldown': self.cooldown,
            'data': self.data,
        }

        if self.id is not None:
            self.updated_time = timestamp
            values['updated_time'] = timestamp
            db_api.policy_update(context, self.id, values)
        else:
            self.created_time = timestamp
            values['created_time'] = timestamp
            policy = db_api.policy_create(context, values)
            self.id = policy.id

        return self.id

    def validate(self):
        '''Validate the schema and the data provided.'''
        self.spec_data.validate()
        self.properties.validate()

    @classmethod
    def get_schema(cls):
        return dict((name, dict(schema))
                    for name, schema in cls.properties_schema.items())

    def _build_policy_data(self, data):
        clsname = self.__class__.__name__
        version = self.VERSION
        result = {
            clsname: {
                'version': version,
                'data': data,
            }
        }
        return result

    def _extract_policy_data(self, policy_data):
        clsname = self.__class__.__name__
        if clsname not in policy_data:
            return None
        data = policy_data.get(clsname)
        if 'version' not in data or data['version'] != self.VERSION:
            return None

        return data.get('data', None)

    def attach(self, cluster):
        '''Method to be invoked before policy is attached to a cluster.

        :param cluster: the cluster to which the policy is being attached to.
        :returns: (True, message) if the operation is successful, or (False,
                 error) otherwise.
        '''
        if self.PROFILE_TYPE == ['ANY']:
            return True, None

        profile = cluster.rt['profile']
        if profile.type not in self.PROFILE_TYPE:
            error = _('Policy not applicable on profile type: '
                      '%s') % profile.type
            return False, error

        return True, None

    def detach(self, cluster):
        '''Method to be invoked before policy is detached from a cluster.'''
        return True, None

    def pre_op(self, cluster_id, action):
        '''A method that will be invoked before an action execution.'''
        return

    def post_op(self, cluster_id, action):
        '''A method that will be invoked after an action execution.'''
        return

    def to_dict(self):
        def _fmt_time(value):
            return value and value.isoformat()

        pb_dict = {
            'id': self.id,
            'name': self.name,
            'type': self.type,
            'user': self.user,
            'project': self.project,
            'domain': self.domain,
            'spec': self.spec,
            'level': self.level,
            'cooldown': self.cooldown,
            'created_time': _fmt_time(self.created_time),
            'updated_time': _fmt_time(self.updated_time),
            'deleted_time': _fmt_time(self.deleted_time),
            'data': self.data,
        }
        return pb_dict

    def _build_conn_params(self, cluster):
        """Build trust-based connection parameters.

        :param cluster: the cluste for which the trust will be checked.
        """
        service_creds = senlin_context.get_service_context()
        params = {
            'username': service_creds.get('username'),
            'password': service_creds.get('password'),
            'auth_url': service_creds.get('auth_url'),
            'user_domain_name': service_creds.get('user_domain_name')
        }

        cred = db_api.cred_get(oslo_context.get_current(),
                               cluster.user, cluster.project)
        if cred is None:
            raise exception.TrustNotFound(trustor=cluster.user)
        params['trust_id'] = [cred.cred['openstack']['trust']]

        return params
Ejemplo n.º 27
0
    def test_get_children(self):
        sot = schema.Map('desc', schema={'foo': schema.String()})

        res = sot._get_children({'foo': 'bar'})

        self.assertEqual({'foo': 'bar'}, dict(res))
Ejemplo n.º 28
0
class Trigger(object):

    KEYS = (
        TYPE,
        VERSION,
        RULE,
    ) = (
        'type',
        'version',
        'rule',
    )

    spec_schema = {
        TYPE:
        schema.String(
            _('Type name of the trigger type.'),
            required=True,
        ),
        VERSION:
        schema.String(
            _('Version number string of the trigger type.'),
            required=True,
        ),
        RULE:
        schema.Map(
            _('Rule collection for the trigger.'),
            required=True,
        )
    }

    def __new__(cls, name, spec, **kwargs):
        """Create a trigger instance based on its type and version.

        :param name: The name for the trigger.
        :param spec: A dictionary containing the spec for the trigger.
        :param kwargs: Keyword arguments for trigger creation.
        :returns: An instance of a specific sub-class of BaseTrigger.
        """
        type_name, version = schema.get_spec_version(spec)

        if cls != Trigger:
            TriggerClass = cls
        else:
            TriggerClass = environment.global_env().get_trigger(type_name)

        return super(Trigger, cls).__new__(TriggerClass)

    def __init__(self, name, spec, **kwargs):
        """Initialize a trigger instance.

        :param name: The name for the trigger.
        :param spec: A dictionary containing the detailed trigger spec.
        :param kwargs: Keyword arguments for initializing the trigger.
        :returns: An instance of a specific sub-class of BaseTrigger.
        """
        type_name, version = schema.get_spec_version(spec)

        self.type_name = type_name
        self.name = name
        self.id = kwargs.get('id', None)
        self.physical_id = kwargs.get('physical_id', None)
        self.desc = kwargs.get('desc', '')
        self.state = kwargs.get('state', INSUFFICIENT_DATA)
        self.enabled = kwargs.get('enabled', True)
        self.severity = kwargs.get('severity', S_LOW)
        self.links = kwargs.get('links', {})

        self.user = kwargs.get('user')
        self.project = kwargs.get('project')
        self.domain = kwargs.get('domain')
        self.created_time = kwargs.get('created_time', None)
        self.updated_time = kwargs.get('updated_time', None)
        self.deleted_time = kwargs.get('deleted_time', None)

        self.spec = spec
        self.spec_data = schema.Spec(self.spec_schema, spec)

    @classmethod
    def _from_db_record(cls, record):
        """Construct a trigger object from a database record."""

        kwargs = {
            'id': record.id,
            'physical_id': record.physical_id,
            'desc': record.desc,
            'state': record.state,
            'enabled': record.enabled,
            'severity': record.severity,
            'links': record.links,
            'user': record.user,
            'project': record.project,
            'domain': record.domain,
            'created_time': record.created_time,
            'updated_time': record.updated_time,
            'deleted_time': record.deleted_time,
        }

        return cls(record.name, record.spec, **kwargs)

    @classmethod
    def load(cls, ctx, trigger_id=None, db_trigger=None):
        """Retrieve and reconstruct a trigger object from DB.

        :param ctx: A request context for DB operations.
        :param trigger_id: The ID of a trigger for retrieval.
        :param db_trigger: A DB record for a trigger.
        """
        if db_trigger is None:
            db_trigger = db_api.trigger_get(ctx, trigger_id)
            if db_trigger is None:
                raise exception.TriggerNotFound(trigger=trigger_id)

        return cls._from_db_record(db_trigger)

    @classmethod
    def load_all(cls,
                 ctx,
                 limit=None,
                 marker=None,
                 sort_keys=None,
                 sort_dir=None,
                 filters=None,
                 project_safe=True,
                 show_deleted=False):
        """Retrieve all trigger objects from database.

        Optionally, you can use some parameters to fine tune the query.
        :param ctx: A request context for DB operations.
        :param limit: Maximum number of records to return.
        :param marker: The ID of a last-seen record. Only records after this
                       ID value will be returned.
        :param sort_keys: A list of trigger properties for sorting.
        :param sort_dir: A string indicating the sorting direction. It can be
                         either `desc` for descending sorting or `asc` for
                         ascending sorting.
        :param filters: A map consisting key-value pairs to filter the
                        results.
        :param show_deleted: A boolean indicating whether soft-deleted objects
                             should be included in the results.
        """
        records = db_api.trigger_get_all(ctx,
                                         limit=limit,
                                         marker=marker,
                                         sort_keys=sort_keys,
                                         sort_dir=sort_dir,
                                         filters=filters,
                                         project_safe=project_safe,
                                         show_deleted=show_deleted)

        for record in records:
            yield cls._from_db_record(record)

    @classmethod
    def delete(cls, ctx, trigger_id):
        """Deletes the specified trigger.

        :param ctx: The request context for DB operations.
        :param trigger_id: The unique ID of a trigger.
        """
        return db_api.trigger_delete(ctx, trigger_id)

    def store(self, ctx):
        """Store the trigger object into the database table.

        :param context: The request context for DB operations.
        """
        timestamp = timeutils.utcnow()

        values = {
            'name': self.name,
            'type': self.type_name,
            'desc': self.desc,
            'state': self.state,
            'enabled': self.enabled,
            'severity': self.severity,
            'links': self.links,
            'spec': self.spec,
        }

        if self.id is not None:
            self.updated_time = timestamp
            values['updated_time'] = timestamp
            db_api.trigger_update(ctx, self.id, values)
        else:
            self.created_time = timestamp
            values['created_time'] = timestamp
            values['user'] = ctx.user
            values['project'] = ctx.project
            values['domain'] = ctx.domain
            db_trigger = db_api.trigger_create(ctx, values)
            self.id = db_trigger.id

        return self.id

    def validate(self):
        """Validate the schema and the data provided."""
        self.spec_data.validate()
        # NOTE: the rule property is supposed to be assigned in subclasses.
        self.rule.validate()

    @classmethod
    def get_schema(cls):
        return dict(
            (name, dict(schema)) for name, schema in cls.spec_schema.items())

    def to_dict(self):
        def _fmt_time(value):
            return value and value.isoformat()

        trigger_dict = {
            'id': self.id,
            'name': self.name,
            'type': self.type_name,
            'desc': self.desc,
            'state': self.state,
            'enabled': self.enabled,
            'severity': self.severity,
            'links': self.links,
            'spec': self.spec,
            'user': self.user,
            'project': self.project,
            'domain': self.domain,
            'created_time': _fmt_time(self.created_time),
            'updated_time': _fmt_time(self.updated_time),
            'deleted_time': _fmt_time(self.deleted_time),
        }
        return trigger_dict